diff options
Diffstat (limited to 'fs')
168 files changed, 5626 insertions, 4012 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 3d681a2c2731..9d9de62592be 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -39,6 +39,48 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) } /** + * v9fs_fid_find_inode - search for an open fid off of the inode list + * @inode: return a fid pointing to a specific inode + * @uid: return a fid belonging to the specified user + * + */ + +static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) +{ + struct hlist_head *h; + struct p9_fid *fid, *ret = NULL; + + p9_debug(P9_DEBUG_VFS, " inode: %p\n", inode); + + spin_lock(&inode->i_lock); + h = (struct hlist_head *)&inode->i_private; + hlist_for_each_entry(fid, h, ilist) { + if (uid_eq(fid->uid, uid)) { + refcount_inc(&fid->count); + ret = fid; + break; + } + } + spin_unlock(&inode->i_lock); + return ret; +} + +/** + * v9fs_open_fid_add - add an open fid to an inode + * @dentry: inode that the fid is being added to + * @fid: fid to add + * + */ + +void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid) +{ + spin_lock(&inode->i_lock); + hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private); + spin_unlock(&inode->i_lock); +} + + +/** * v9fs_fid_find - retrieve a fid that belongs to the specified uid * @dentry: dentry to look for fid in * @uid: return fid that belongs to the specified user @@ -54,13 +96,18 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) dentry, dentry, from_kuid(&init_user_ns, uid), any); ret = NULL; + + if (d_inode(dentry)) + ret = v9fs_fid_find_inode(d_inode(dentry), uid); + /* we'll recheck under lock if there's anything to look in */ - if (dentry->d_fsdata) { + if (!ret && dentry->d_fsdata) { struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata; spin_lock(&dentry->d_lock); hlist_for_each_entry(fid, h, dlist) { if (any || uid_eq(fid->uid, uid)) { ret = fid; + refcount_inc(&ret->count); break; } } @@ -122,7 +169,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, fid = v9fs_fid_find(ds, uid, any); if (fid) { /* Found the parent fid do a lookup with that */ - fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1); + struct p9_fid *ofid = fid; + + fid = p9_client_walk(ofid, 1, &dentry->d_name.name, 1); + p9_client_clunk(ofid); goto fid_out; } up_read(&v9ses->rename_sem); @@ -147,8 +197,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, v9fs_fid_add(dentry->d_sb->s_root, fid); } /* If we are root ourself just return that */ - if (dentry->d_sb->s_root == dentry) + if (dentry->d_sb->s_root == dentry) { + refcount_inc(&fid->count); return fid; + } /* * Do a multipath walk with attached root. * When walking parent we need to make sure we @@ -195,6 +247,7 @@ fid_out: fid = ERR_PTR(-ENOENT); } else { __add_fid(dentry, fid); + refcount_inc(&fid->count); spin_unlock(&dentry->d_lock); } } @@ -245,11 +298,13 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) { int err; - struct p9_fid *fid; + struct p9_fid *fid, *ofid; - fid = clone_fid(v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0)); + ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0); + fid = clone_fid(ofid); if (IS_ERR(fid)) goto error_out; + p9_client_clunk(ofid); /* * writeback fid will only be used to write back the * dirty pages. We always request for the open fid in read-write diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 928b1093f511..f7f33509e169 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -15,12 +15,21 @@ static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry) } void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); struct p9_fid *v9fs_writeback_fid(struct dentry *dentry); +void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid); static inline struct p9_fid *clone_fid(struct p9_fid *fid) { return IS_ERR(fid) ? fid : p9_client_walk(fid, 0, NULL, 1); } static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) { - return clone_fid(v9fs_fid_lookup(dentry)); + struct p9_fid *fid, *nfid; + + fid = v9fs_fid_lookup(dentry); + if (!fid || IS_ERR(fid)) + return fid; + + nfid = clone_fid(fid); + p9_client_clunk(fid); + return nfid; } #endif diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 7d6f69aefd45..4b4292123b3d 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -85,6 +85,8 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) retval = v9fs_refresh_inode_dotl(fid, inode); else retval = v9fs_refresh_inode(fid, inode); + p9_client_clunk(fid); + if (retval == -ENOENT) return 0; if (retval < 0) diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 674d22bf4f6f..b6a5a0be444d 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -210,8 +210,12 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) fid = filp->private_data; p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, fid ? fid->fid : -1); - if (fid) + if (fid) { + spin_lock(&inode->i_lock); + hlist_del(&fid->ilist); + spin_unlock(&inode->i_lock); p9_client_clunk(fid); + } return 0; } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index be5768949cb1..649f04f112dc 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -46,7 +46,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) int err; struct v9fs_inode *v9inode; struct v9fs_session_info *v9ses; - struct p9_fid *fid; + struct p9_fid *fid, *writeback_fid; int omode; p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); @@ -85,17 +85,18 @@ int v9fs_file_open(struct inode *inode, struct file *file) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(file_dentry(file)); + writeback_fid = v9fs_writeback_fid(file_dentry(file)); if (IS_ERR(fid)) { err = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); goto out_error; } - v9inode->writeback_fid = (void *) fid; + v9inode->writeback_fid = (void *) writeback_fid; } mutex_unlock(&v9inode->v_mutex); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(inode, file); + v9fs_open_fid_add(inode, fid); return 0; out_error: p9_client_clunk(file->private_data); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index ae0c38ad1fcb..4a937fac1acb 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -256,6 +256,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, inode->i_rdev = rdev; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_mapping->a_ops = &v9fs_addr_operations; + inode->i_private = NULL; switch (mode & S_IFMT) { case S_IFIFO: @@ -550,6 +551,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) if (v9fs_proto_dotl(v9ses)) retval = p9_client_unlinkat(dfid, dentry->d_name.name, v9fs_at_to_dotl_flags(flags)); + p9_client_clunk(dfid); if (retval == -EOPNOTSUPP) { /* Try the one based on path */ v9fid = v9fs_fid_clone(dentry); @@ -570,6 +572,10 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); + + /* invalidate all fids associated with dentry */ + /* NOTE: This will not include open fids */ + dentry->d_op->d_release(dentry); } return retval; } @@ -590,14 +596,12 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, { int err; const unsigned char *name; - struct p9_fid *dfid, *ofid, *fid; + struct p9_fid *dfid, *ofid = NULL, *fid = NULL; struct inode *inode; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; - ofid = NULL; - fid = NULL; name = dentry->d_name.name; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { @@ -611,12 +615,14 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_client_clunk(dfid); return ERR_PTR(err); } err = p9_client_fcreate(ofid, name, perm, mode, extension); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); + p9_client_clunk(dfid); goto error; } @@ -628,6 +634,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); fid = NULL; + p9_client_clunk(dfid); goto error; } /* @@ -638,11 +645,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); + p9_client_clunk(dfid); goto error; } v9fs_fid_add(dentry, fid); d_instantiate(dentry, inode); } + p9_client_clunk(dfid); return ofid; error: if (ofid) @@ -755,6 +764,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, */ name = dentry->d_name.name; fid = p9_client_walk(dfid, 1, &name, 1); + p9_client_clunk(dfid); if (fid == ERR_PTR(-ENOENT)) inode = NULL; else if (IS_ERR(fid)) @@ -792,6 +802,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct v9fs_session_info *v9ses; struct p9_fid *fid, *inode_fid; struct dentry *res = NULL; + struct inode *inode; if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); @@ -820,7 +831,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, } v9fs_invalidate_inode_attr(dir); - v9inode = V9FS_I(d_inode(dentry)); + inode = d_inode(dentry); + v9inode = V9FS_I(inode); mutex_lock(&v9inode->v_mutex); if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) && !v9inode->writeback_fid && @@ -848,6 +860,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, file->private_data = fid; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(d_inode(dentry), file); + v9fs_open_fid_add(inode, fid); file->f_mode |= FMODE_CREATED; out: @@ -902,7 +915,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode; struct inode *new_inode; struct v9fs_session_info *v9ses; - struct p9_fid *oldfid; + struct p9_fid *oldfid, *dfid; struct p9_fid *olddirfid; struct p9_fid *newdirfid; struct p9_wstat wstat; @@ -919,13 +932,20 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ERR(oldfid)) return PTR_ERR(oldfid); - olddirfid = clone_fid(v9fs_parent_fid(old_dentry)); + dfid = v9fs_parent_fid(old_dentry); + olddirfid = clone_fid(dfid); + if (dfid && !IS_ERR(dfid)) + p9_client_clunk(dfid); + if (IS_ERR(olddirfid)) { retval = PTR_ERR(olddirfid); goto done; } - newdirfid = clone_fid(v9fs_parent_fid(new_dentry)); + dfid = v9fs_parent_fid(new_dentry); + newdirfid = clone_fid(dfid); + p9_client_clunk(dfid); + if (IS_ERR(newdirfid)) { retval = PTR_ERR(newdirfid); goto clunk_olddir; @@ -982,6 +1002,7 @@ clunk_olddir: p9_client_clunk(olddirfid); done: + p9_client_clunk(oldfid); return retval; } @@ -1014,6 +1035,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat, return PTR_ERR(fid); st = p9_client_stat(fid); + p9_client_clunk(fid); if (IS_ERR(st)) return PTR_ERR(st); @@ -1034,7 +1056,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat, static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) { - int retval; + int retval, use_dentry = 0; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL; struct p9_wstat wstat; @@ -1050,8 +1072,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) fid = iattr->ia_file->private_data; WARN_ON(!fid); } - if (!fid) + if (!fid) { fid = v9fs_fid_lookup(dentry); + use_dentry = 1; + } if(IS_ERR(fid)) return PTR_ERR(fid); @@ -1081,6 +1105,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) filemap_write_and_wait(d_inode(dentry)->i_mapping); retval = p9_client_wstat(fid, &wstat); + + if (use_dentry) + p9_client_clunk(fid); + if (retval < 0) return retval; @@ -1205,6 +1233,7 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry, return ERR_PTR(-EBADF); st = p9_client_stat(fid); + p9_client_clunk(fid); if (IS_ERR(st)) return ERR_CAST(st); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0028eccb665a..823c2eb5f1bf 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -296,6 +296,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, /* instantiate inode and assign the unopened fid to the dentry */ fid = p9_client_walk(dfid, 1, &name, 1); + p9_client_clunk(dfid); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); @@ -342,6 +343,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, file->private_data = ofid; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(inode, file); + v9fs_open_fid_add(inode, ofid); file->f_mode |= FMODE_CREATED; out: v9fs_put_acl(dacl, pacl); @@ -407,7 +409,6 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); if (err < 0) goto error; - fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -451,6 +452,7 @@ error: if (fid) p9_client_clunk(fid); v9fs_put_acl(dacl, pacl); + p9_client_clunk(dfid); return err; } @@ -478,6 +480,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat, */ st = p9_client_getattr_dotl(fid, P9_STATS_ALL); + p9_client_clunk(fid); if (IS_ERR(st)) return PTR_ERR(st); @@ -539,7 +542,7 @@ static int v9fs_mapped_iattr_valid(int iattr_valid) int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) { - int retval; + int retval, use_dentry = 0; struct p9_fid *fid = NULL; struct p9_iattr_dotl p9attr; struct inode *inode = d_inode(dentry); @@ -564,8 +567,10 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) fid = iattr->ia_file->private_data; WARN_ON(!fid); } - if (!fid) + if (!fid) { fid = v9fs_fid_lookup(dentry); + use_dentry = 1; + } if (IS_ERR(fid)) return PTR_ERR(fid); @@ -574,8 +579,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) filemap_write_and_wait(inode->i_mapping); retval = p9_client_setattr(fid, &p9attr); - if (retval < 0) + if (retval < 0) { + if (use_dentry) + p9_client_clunk(fid); return retval; + } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) @@ -587,9 +595,15 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) if (iattr->ia_valid & ATTR_MODE) { /* We also want to update ACL when we update mode bits */ retval = v9fs_acl_chmod(inode, fid); - if (retval < 0) + if (retval < 0) { + if (use_dentry) + p9_client_clunk(fid); return retval; + } } + if (use_dentry) + p9_client_clunk(fid); + return 0; } @@ -741,6 +755,7 @@ error: if (fid) p9_client_clunk(fid); + p9_client_clunk(dfid); return err; } @@ -769,11 +784,15 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, return PTR_ERR(dfid); oldfid = v9fs_fid_lookup(old_dentry); - if (IS_ERR(oldfid)) + if (IS_ERR(oldfid)) { + p9_client_clunk(dfid); return PTR_ERR(oldfid); + } err = p9_client_link(dfid, oldfid, dentry->d_name.name); + p9_client_clunk(dfid); + p9_client_clunk(oldfid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); return err; @@ -788,6 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, return PTR_ERR(fid); v9fs_refresh_inode_dotl(fid, d_inode(old_dentry)); + p9_client_clunk(fid); } ihold(d_inode(old_dentry)); d_instantiate(dentry, d_inode(old_dentry)); @@ -886,6 +906,8 @@ error: if (fid) p9_client_clunk(fid); v9fs_put_acl(dacl, pacl); + p9_client_clunk(dfid); + return err; } @@ -914,6 +936,7 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry, if (IS_ERR(fid)) return ERR_CAST(fid); retval = p9_client_readlink(fid, &target); + p9_client_clunk(fid); if (retval) return ERR_PTR(retval); set_delayed_call(done, kfree_link, target); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 9a21269b7234..5fce6e30bc5a 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -268,6 +268,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf) } res = simple_statfs(dentry, buf); done: + p9_client_clunk(fid); return res; } diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index ac8ff8ca4c11..87217dd0433e 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -71,14 +71,17 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, void *buffer, size_t buffer_size) { struct p9_fid *fid; + int ret; p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n", name, buffer_size); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); + ret = v9fs_fid_xattr_get(fid, name, buffer, buffer_size); + p9_client_clunk(fid); - return v9fs_fid_xattr_get(fid, name, buffer, buffer_size); + return ret; } /* @@ -96,8 +99,15 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, int v9fs_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t value_len, int flags) { - struct p9_fid *fid = v9fs_fid_lookup(dentry); - return v9fs_fid_xattr_set(fid, name, value, value_len, flags); + int ret; + struct p9_fid *fid; + + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + ret = v9fs_fid_xattr_set(fid, name, value, value_len, flags); + p9_client_clunk(fid); + return ret; } int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9068d5578a26..7bd659ad959e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -350,7 +350,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, unsigned blkoff) { union afs_xdr_dirent *dire; - unsigned offset, next, curr; + unsigned offset, next, curr, nr_slots; size_t nlen; int tmp; @@ -363,13 +363,12 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, offset < AFS_DIR_SLOTS_PER_BLOCK; offset = next ) { - next = offset + 1; - /* skip entries marked unused in the bitmap */ if (!(block->hdr.bitmap[offset / 8] & (1 << (offset % 8)))) { _debug("ENT[%zu.%u]: unused", blkoff / sizeof(union afs_xdr_dir_block), offset); + next = offset + 1; if (offset >= curr) ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); @@ -381,35 +380,39 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, nlen = strnlen(dire->u.name, sizeof(*block) - offset * sizeof(union afs_xdr_dirent)); + if (nlen > AFSNAMEMAX - 1) { + _debug("ENT[%zu]: name too long (len %u/%zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, nlen); + return afs_bad(dvnode, afs_file_error_dir_name_too_long); + } _debug("ENT[%zu.%u]: %s %zu \"%s\"", blkoff / sizeof(union afs_xdr_dir_block), offset, (offset < curr ? "skip" : "fill"), nlen, dire->u.name); - /* work out where the next possible entry is */ - for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) { - if (next >= AFS_DIR_SLOTS_PER_BLOCK) { - _debug("ENT[%zu.%u]:" - " %u travelled beyond end dir block" - " (len %u/%zu)", - blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); - return afs_bad(dvnode, afs_file_error_dir_over_end); - } - if (!(block->hdr.bitmap[next / 8] & - (1 << (next % 8)))) { - _debug("ENT[%zu.%u]:" - " %u unmarked extension (len %u/%zu)", + nr_slots = afs_dir_calc_slots(nlen); + next = offset + nr_slots; + if (next > AFS_DIR_SLOTS_PER_BLOCK) { + _debug("ENT[%zu.%u]:" + " %u extends beyond end dir block" + " (len %zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, next, nlen); + return afs_bad(dvnode, afs_file_error_dir_over_end); + } + + /* Check that the name-extension dirents are all allocated */ + for (tmp = 1; tmp < nr_slots; tmp++) { + unsigned int ix = offset + tmp; + if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) { + _debug("ENT[%zu.u]:" + " %u unmarked extension (%u/%u)", blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); + offset, tmp, nr_slots); return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); } - - _debug("ENT[%zu.%u]: ext %u/%zu", - blkoff / sizeof(union afs_xdr_dir_block), - next, tmp, nlen); - next++; } /* skip if starts before the current position */ diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index 2ffe09abae7f..f4600c1353ad 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -215,8 +215,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode, } /* Work out how many slots we're going to need. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); meta_page = kmap(page0); meta = &meta_page->blocks[0]; @@ -393,8 +392,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, } /* Work out how many slots we're going to discard. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); meta_page = kmap(page0); meta = &meta_page->blocks[0]; diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index 94f1f398eefa..8ca868164507 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -54,10 +54,16 @@ union afs_xdr_dirent { __be16 hash_next; __be32 vnode; __be32 unique; - u8 name[16]; - u8 overflow[4]; /* if any char of the name (inc - * NUL) reaches here, consume - * the next dirent too */ + u8 name[]; + /* When determining the number of dirent slots needed to + * represent a directory entry, name should be assumed to be 16 + * bytes, due to a now-standardised (mis)calculation, but it is + * in fact 20 bytes in size. afs_dir_calc_slots() should be + * used for this. + * + * For names longer than (16 or) 20 bytes, extra slots should + * be annexed to this one using the extended_name format. + */ } u; u8 extended_name[32]; } __packed; @@ -96,4 +102,15 @@ struct afs_xdr_dir_page { union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE]; }; +/* + * Calculate the number of dirent slots required for any given name length. + * The calculation is made assuming the part of the name in the first slot is + * 16 bytes, rather than 20, but this miscalculation is now standardised. + */ +static inline unsigned int afs_dir_calc_slots(size_t name_len) +{ + name_len++; /* NUL-terminated */ + return 1 + ((name_len + 15) / AFS_DIR_DIRENT_SIZE); +} + #endif /* XDR_FS_H */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 9e56ee1f2652..3e5b02f6606c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1,9 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/block_dev.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2001 Andrea Arcangeli <[email protected]> SuSE + * Copyright (C) 2016 - 2020 Christoph Hellwig */ #include <linux/init.h> @@ -1056,7 +1055,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder) /** * bd_abort_claiming - abort claiming of a block device * @bdev: block device of interest - * @whole: whole block device * @holder: holder that has claimed @bdev * * Abort claiming of a block device when the exclusive open failed. This can be @@ -1829,6 +1827,7 @@ const struct file_operations def_blk_fops = { /** * lookup_bdev - lookup a struct block_device by name * @pathname: special file representing the block device + * @dev: return value of the block device's dev_t * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 98c15ff2e599..840587037b59 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2475,6 +2475,22 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, return r; } +static void encode_timestamp_and_gids(void **p, + const struct ceph_mds_request *req) +{ + struct ceph_timespec ts; + int i; + + ceph_encode_timespec64(&ts, &req->r_stamp); + ceph_encode_copy(p, &ts, sizeof(ts)); + + /* gid_list */ + ceph_encode_32(p, req->r_cred->group_info->ngroups); + for (i = 0; i < req->r_cred->group_info->ngroups; i++) + ceph_encode_64(p, from_kgid(&init_user_ns, + req->r_cred->group_info->gid[i])); +} + /* * called under mdsc->mutex */ @@ -2491,7 +2507,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; bool freepath1 = false, freepath2 = false; - int len, i; + int len; u16 releases; void *p, *end; int ret; @@ -2517,17 +2533,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, goto out_free1; } - if (legacy) { - /* Old style */ - len = sizeof(*head); - } else { - /* New style: add gid_list and any later fields */ - len = sizeof(struct ceph_mds_request_head) + sizeof(u32) + - (sizeof(u64) * req->r_cred->group_info->ngroups); - } - + len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head); len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + sizeof(struct ceph_timespec); + len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -2548,7 +2557,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.tid = cpu_to_le64(req->r_tid); /* - * The old ceph_mds_request_header didn't contain a version field, and + * The old ceph_mds_request_head didn't contain a version field, and * one was added when we moved the message version from 3->4. */ if (legacy) { @@ -2609,20 +2618,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, head->num_releases = cpu_to_le16(releases); - /* time stamp */ - { - struct ceph_timespec ts; - ceph_encode_timespec64(&ts, &req->r_stamp); - ceph_encode_copy(&p, &ts, sizeof(ts)); - } - - /* gid list */ - if (!legacy) { - ceph_encode_32(&p, req->r_cred->group_info->ngroups); - for (i = 0; i < req->r_cred->group_info->ngroups; i++) - ceph_encode_64(&p, from_kgid(&init_user_ns, - req->r_cred->group_info->gid[i])); - } + encode_timestamp_and_gids(&p, req); if (WARN_ON_ONCE(p > end)) { ceph_msg_put(msg); @@ -2730,13 +2726,8 @@ static int __prepare_send_request(struct ceph_mds_session *session, /* remove cap/dentry releases from message */ rhead->num_releases = 0; - /* time stamp */ p = msg->front.iov_base + req->r_request_release_offset; - { - struct ceph_timespec ts; - ceph_encode_timespec64(&ts, &req->r_stamp); - ceph_encode_copy(&p, &ts, sizeof(ts)); - } + encode_timestamp_and_gids(&p, req); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 604f65f4b6c5..fe03cbdae959 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -60,9 +60,9 @@ config CIFS_STATS2 Enabling this option will allow more detailed statistics on SMB request timing to be displayed in /proc/fs/cifs/DebugData and also allow optional logging of slow responses to dmesg (depending on the - value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details). - These additional statistics may have a minor effect on performance - and memory utilization. + value of /proc/fs/cifs/cifsFYI). See Documentation/admin-guide/cifs/usage.rst + for more details. These additional statistics may have a minor effect + on performance and memory utilization. Unless you are a developer or are doing network performance analysis or tuning, say N. @@ -102,10 +102,10 @@ config CIFS_WEAK_PW_HASH is enabled in the kernel build, LANMAN authentication will not be used automatically. At runtime LANMAN mounts are disabled but can be set to required (or optional) either in - /proc/fs/cifs (see fs/cifs/README for more detail) or via an - option on the mount command. This support is disabled by - default in order to reduce the possibility of a downgrade - attack. + /proc/fs/cifs (see Documentation/admin-guide/cifs/usage.rst for + more detail) or via an option on the mount command. This support + is disabled by default in order to reduce the possibility of a + downgrade attack. If unsure, say N. @@ -190,6 +190,17 @@ config CIFS_DFS_UPCALL servers if their addresses change or for implicit mounts of DFS junction points. If unsure, say Y. +config CIFS_SWN_UPCALL + bool "SWN feature support" + depends on CIFS + help + The Service Witness Protocol (SWN) is used to get notifications + from a highly available server of resource state changes. This + feature enables an upcall mechanism for CIFS which contacts a + userspace daemon to establish the DCE/RPC connection to retrieve + the cluster available interfaces and resource change notifications. + If unsure, say Y. + config CIFS_NFSD_EXPORT bool "Allow nfsd to export CIFS file system" depends on CIFS && BROKEN diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index cd17d0e50f2a..5213b20843b5 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \ inode.o link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \ cifs_unicode.o nterr.o cifsencrypt.o \ - readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \ + readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \ smb2ops.o smb2maperror.o smb2transport.o \ smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o @@ -18,6 +18,8 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o +cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o + cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 0f2adecb94f2..488fe0ffc1ef 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -53,30 +53,6 @@ const struct fscache_cookie_def cifs_fscache_server_index_def = { .type = FSCACHE_COOKIE_TYPE_INDEX, }; -char *extract_sharename(const char *treename) -{ - const char *src; - char *delim, *dst; - int len; - - /* skip double chars at the beginning */ - src = treename + 2; - - /* share name is always preceded by '\\' now */ - delim = strchr(src, '\\'); - if (!delim) - return ERR_PTR(-EINVAL); - delim++; - len = strlen(delim); - - /* caller has to free the memory */ - dst = kstrndup(delim, len, GFP_KERNEL); - if (!dst) - return ERR_PTR(-ENOMEM); - - return dst; -} - static enum fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data, const void *data, diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 53588d7517b4..b231dcf1d1f9 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -23,6 +23,9 @@ #ifdef CONFIG_CIFS_SMB_DIRECT #include "smbdirect.h" #endif +#ifdef CONFIG_CIFS_SWN_UPCALL +#include "cifs_swn.h" +#endif void cifs_dump_mem(char *label, void *data, int length) @@ -115,6 +118,10 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon) seq_printf(m, " POSIX Extensions"); if (tcon->ses->server->ops->dump_share_caps) tcon->ses->server->ops->dump_share_caps(m, tcon); +#ifdef CONFIG_CIFS_SWN_UPCALL + if (tcon->use_witness) + seq_puts(m, " Witness"); +#endif if (tcon->need_reconnect) seq_puts(m, "\tDISCONNECTED "); @@ -262,6 +269,9 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, ",XATTR"); #endif seq_printf(m, ",ACL"); +#ifdef CONFIG_CIFS_SWN_UPCALL + seq_puts(m, ",WITNESS"); +#endif seq_putc(m, '\n'); seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); @@ -462,6 +472,9 @@ skip_rdma: spin_unlock(&cifs_tcp_ses_lock); seq_putc(m, '\n'); +#ifdef CONFIG_CIFS_SWN_UPCALL + cifs_swn_dump(m); +#endif /* BB add code to dump additional info such as TCP session info now */ return 0; } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index cc3ada12848d..e4c6ae47a796 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -23,6 +23,7 @@ #include "cifs_debug.h" #include "cifs_unicode.h" #include "dfs_cache.h" +#include "fs_context.h" static LIST_HEAD(cifs_dfs_automount_list); @@ -124,7 +125,6 @@ cifs_build_devname(char *nodename, const char *prepath) * @sb_mountdata: parent/root DFS mount options (template) * @fullpath: full path in UNC format * @ref: optional server's referral - * @devname: optional pointer for saving device name * * creates mount options for submount based on template options sb_mountdata * and replacing unc,ip,prefixpath options with ones we've got form ref_unc. @@ -134,8 +134,7 @@ cifs_build_devname(char *nodename, const char *prepath) */ char *cifs_compose_mount_options(const char *sb_mountdata, const char *fullpath, - const struct dfs_info3_param *ref, - char **devname) + const struct dfs_info3_param *ref) { int rc; char *name; @@ -232,10 +231,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, strcat(mountdata, "ip="); strcat(mountdata, srvIP); - if (devname) - *devname = name; - else - kfree(name); + kfree(name); /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ @@ -258,6 +254,7 @@ compose_mount_options_err: * to perform failover in case we failed to connect to the first target in the * referral. * + * @mntpt: directory entry for the path we are trying to automount * @cifs_sb: parent/root superblock * @fullpath: full path in UNC format */ @@ -275,9 +272,13 @@ static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt, convert_delimiter(devname, '/'); + /* TODO: change to call fs_context_for_mount(), fill in context directly, call fc_mount */ + + /* See afs_mntpt_do_automount in fs/afs/mntpt.c for an example */ + /* strip first '\' from fullpath */ - mountdata = cifs_compose_mount_options(cifs_sb->mountdata, - fullpath + 1, NULL, NULL); + mountdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, + fullpath + 1, NULL); if (IS_ERR(mountdata)) { kfree(devname); return (struct vfsmount *)mountdata; diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 6e7c4427369d..aa77edc12212 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -61,19 +61,9 @@ struct cifs_sb_info { spinlock_t tlink_tree_lock; struct tcon_link *master_tlink; struct nls_table *local_nls; - unsigned int bsize; - unsigned int rsize; - unsigned int wsize; - unsigned long actimeo; /* attribute cache timeout (jiffies) */ + struct smb3_fs_context *ctx; atomic_t active; - kuid_t mnt_uid; - kgid_t mnt_gid; - kuid_t mnt_backupuid; - kgid_t mnt_backupgid; - umode_t mnt_file_mode; - umode_t mnt_dir_mode; unsigned int mnt_cifs_flags; - char *mountdata; /* options received at mount time or via DFS refs */ struct delayed_work prune_tlinks; struct rcu_head rcu; diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c new file mode 100644 index 000000000000..d35f599aa00e --- /dev/null +++ b/fs/cifs/cifs_swn.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Witness Service client for CIFS + * + * Copyright (c) 2020 Samuel Cabrero <[email protected]> + */ + +#include <linux/kref.h> +#include <net/genetlink.h> +#include <uapi/linux/cifs/cifs_netlink.h> + +#include "cifs_swn.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "fscache.h" +#include "cifs_debug.h" +#include "netlink.h" + +static DEFINE_IDR(cifs_swnreg_idr); +static DEFINE_MUTEX(cifs_swnreg_idr_mutex); + +struct cifs_swn_reg { + int id; + struct kref ref_count; + + const char *net_name; + const char *share_name; + bool net_name_notify; + bool share_name_notify; + bool ip_notify; + + struct cifs_tcon *tcon; +}; + +static int cifs_swn_auth_info_krb(struct cifs_tcon *tcon, struct sk_buff *skb) +{ + int ret; + + ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_KRB_AUTH); + if (ret < 0) + return ret; + + return 0; +} + +static int cifs_swn_auth_info_ntlm(struct cifs_tcon *tcon, struct sk_buff *skb) +{ + int ret; + + if (tcon->ses->user_name != NULL) { + ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_USER_NAME, tcon->ses->user_name); + if (ret < 0) + return ret; + } + + if (tcon->ses->password != NULL) { + ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_PASSWORD, tcon->ses->password); + if (ret < 0) + return ret; + } + + if (tcon->ses->domainName != NULL) { + ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_DOMAIN_NAME, tcon->ses->domainName); + if (ret < 0) + return ret; + } + + return 0; +} + +/* + * Sends a register message to the userspace daemon based on the registration. + * The authentication information to connect to the witness service is bundled + * into the message. + */ +static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg) +{ + struct sk_buff *skb; + struct genlmsghdr *hdr; + enum securityEnum authtype; + struct sockaddr_storage *addr; + int ret; + + skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb == NULL) { + ret = -ENOMEM; + goto fail; + } + + hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_REGISTER); + if (hdr == NULL) { + ret = -ENOMEM; + goto nlmsg_fail; + } + + ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id); + if (ret < 0) + goto nlmsg_fail; + + ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name); + if (ret < 0) + goto nlmsg_fail; + + ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name); + if (ret < 0) + goto nlmsg_fail; + + /* + * If there is an address stored use it instead of the server address, because we are + * in the process of reconnecting to it after a share has been moved or we have been + * told to switch to it (client move message). In these cases we unregister from the + * server address and register to the new address when we receive the notification. + */ + if (swnreg->tcon->ses->server->use_swn_dstaddr) + addr = &swnreg->tcon->ses->server->swn_dstaddr; + else + addr = &swnreg->tcon->ses->server->dstaddr; + + ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage), addr); + if (ret < 0) + goto nlmsg_fail; + + if (swnreg->net_name_notify) { + ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY); + if (ret < 0) + goto nlmsg_fail; + } + + if (swnreg->share_name_notify) { + ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY); + if (ret < 0) + goto nlmsg_fail; + } + + if (swnreg->ip_notify) { + ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY); + if (ret < 0) + goto nlmsg_fail; + } + + authtype = cifs_select_sectype(swnreg->tcon->ses->server, swnreg->tcon->ses->sectype); + switch (authtype) { + case Kerberos: + ret = cifs_swn_auth_info_krb(swnreg->tcon, skb); + if (ret < 0) { + cifs_dbg(VFS, "%s: Failed to get kerberos auth info: %d\n", __func__, ret); + goto nlmsg_fail; + } + break; + case LANMAN: + case NTLM: + case NTLMv2: + case RawNTLMSSP: + ret = cifs_swn_auth_info_ntlm(swnreg->tcon, skb); + if (ret < 0) { + cifs_dbg(VFS, "%s: Failed to get NTLM auth info: %d\n", __func__, ret); + goto nlmsg_fail; + } + break; + default: + cifs_dbg(VFS, "%s: secType %d not supported!\n", __func__, authtype); + ret = -EINVAL; + goto nlmsg_fail; + } + + genlmsg_end(skb, hdr); + genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC); + + cifs_dbg(FYI, "%s: Message to register for network name %s with id %d sent\n", __func__, + swnreg->net_name, swnreg->id); + + return 0; + +nlmsg_fail: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); +fail: + return ret; +} + +/* + * Sends an uregister message to the userspace daemon based on the registration + */ +static int cifs_swn_send_unregister_message(struct cifs_swn_reg *swnreg) +{ + struct sk_buff *skb; + struct genlmsghdr *hdr; + int ret; + + skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb == NULL) + return -ENOMEM; + + hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_UNREGISTER); + if (hdr == NULL) { + ret = -ENOMEM; + goto nlmsg_fail; + } + + ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id); + if (ret < 0) + goto nlmsg_fail; + + ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name); + if (ret < 0) + goto nlmsg_fail; + + ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name); + if (ret < 0) + goto nlmsg_fail; + + ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage), + &swnreg->tcon->ses->server->dstaddr); + if (ret < 0) + goto nlmsg_fail; + + if (swnreg->net_name_notify) { + ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY); + if (ret < 0) + goto nlmsg_fail; + } + + if (swnreg->share_name_notify) { + ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY); + if (ret < 0) + goto nlmsg_fail; + } + + if (swnreg->ip_notify) { + ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY); + if (ret < 0) + goto nlmsg_fail; + } + + genlmsg_end(skb, hdr); + genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC); + + cifs_dbg(FYI, "%s: Message to unregister for network name %s with id %d sent\n", __func__, + swnreg->net_name, swnreg->id); + + return 0; + +nlmsg_fail: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return ret; +} + +/* + * Try to find a matching registration for the tcon's server name and share name. + * Calls to this funciton must be protected by cifs_swnreg_idr_mutex. + * TODO Try to avoid memory allocations + */ +static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon) +{ + struct cifs_swn_reg *swnreg; + int id; + const char *share_name; + const char *net_name; + + net_name = extract_hostname(tcon->treeName); + if (IS_ERR(net_name)) { + int ret; + + ret = PTR_ERR(net_name); + cifs_dbg(VFS, "%s: failed to extract host name from target '%s': %d\n", + __func__, tcon->treeName, ret); + return ERR_PTR(-EINVAL); + } + + share_name = extract_sharename(tcon->treeName); + if (IS_ERR(share_name)) { + int ret; + + ret = PTR_ERR(net_name); + cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n", + __func__, tcon->treeName, ret); + kfree(net_name); + return ERR_PTR(-EINVAL); + } + + idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) { + if (strcasecmp(swnreg->net_name, net_name) != 0 + || strcasecmp(swnreg->share_name, share_name) != 0) { + continue; + } + + cifs_dbg(FYI, "Existing swn registration for %s:%s found\n", swnreg->net_name, + swnreg->share_name); + + kfree(net_name); + kfree(share_name); + + return swnreg; + } + + kfree(net_name); + kfree(share_name); + + return ERR_PTR(-EEXIST); +} + +/* + * Get a registration for the tcon's server and share name, allocating a new one if it does not + * exists + */ +static struct cifs_swn_reg *cifs_get_swn_reg(struct cifs_tcon *tcon) +{ + struct cifs_swn_reg *reg = NULL; + int ret; + + mutex_lock(&cifs_swnreg_idr_mutex); + + /* Check if we are already registered for this network and share names */ + reg = cifs_find_swn_reg(tcon); + if (!IS_ERR(reg)) { + kref_get(®->ref_count); + mutex_unlock(&cifs_swnreg_idr_mutex); + return reg; + } else if (PTR_ERR(reg) != -EEXIST) { + mutex_unlock(&cifs_swnreg_idr_mutex); + return reg; + } + + reg = kmalloc(sizeof(struct cifs_swn_reg), GFP_ATOMIC); + if (reg == NULL) { + mutex_unlock(&cifs_swnreg_idr_mutex); + return ERR_PTR(-ENOMEM); + } + + kref_init(®->ref_count); + + reg->id = idr_alloc(&cifs_swnreg_idr, reg, 1, 0, GFP_ATOMIC); + if (reg->id < 0) { + cifs_dbg(FYI, "%s: failed to allocate registration id\n", __func__); + ret = reg->id; + goto fail; + } + + reg->net_name = extract_hostname(tcon->treeName); + if (IS_ERR(reg->net_name)) { + ret = PTR_ERR(reg->net_name); + cifs_dbg(VFS, "%s: failed to extract host name from target: %d\n", __func__, ret); + goto fail_idr; + } + + reg->share_name = extract_sharename(tcon->treeName); + if (IS_ERR(reg->share_name)) { + ret = PTR_ERR(reg->share_name); + cifs_dbg(VFS, "%s: failed to extract share name from target: %d\n", __func__, ret); + goto fail_net_name; + } + + reg->net_name_notify = true; + reg->share_name_notify = true; + reg->ip_notify = (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT); + + reg->tcon = tcon; + + mutex_unlock(&cifs_swnreg_idr_mutex); + + return reg; + +fail_net_name: + kfree(reg->net_name); +fail_idr: + idr_remove(&cifs_swnreg_idr, reg->id); +fail: + kfree(reg); + mutex_unlock(&cifs_swnreg_idr_mutex); + return ERR_PTR(ret); +} + +static void cifs_swn_reg_release(struct kref *ref) +{ + struct cifs_swn_reg *swnreg = container_of(ref, struct cifs_swn_reg, ref_count); + int ret; + + ret = cifs_swn_send_unregister_message(swnreg); + if (ret < 0) + cifs_dbg(VFS, "%s: Failed to send unregister message: %d\n", __func__, ret); + + idr_remove(&cifs_swnreg_idr, swnreg->id); + kfree(swnreg->net_name); + kfree(swnreg->share_name); + kfree(swnreg); +} + +static void cifs_put_swn_reg(struct cifs_swn_reg *swnreg) +{ + mutex_lock(&cifs_swnreg_idr_mutex); + kref_put(&swnreg->ref_count, cifs_swn_reg_release); + mutex_unlock(&cifs_swnreg_idr_mutex); +} + +static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const char *name, int state) +{ + int i; + + switch (state) { + case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE: + cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name); + for (i = 0; i < swnreg->tcon->ses->chan_count; i++) { + spin_lock(&GlobalMid_Lock); + if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting) + swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect; + spin_unlock(&GlobalMid_Lock); + } + break; + case CIFS_SWN_RESOURCE_STATE_AVAILABLE: + cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name); + for (i = 0; i < swnreg->tcon->ses->chan_count; i++) { + spin_lock(&GlobalMid_Lock); + if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting) + swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect; + spin_unlock(&GlobalMid_Lock); + } + break; + case CIFS_SWN_RESOURCE_STATE_UNKNOWN: + cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name); + break; + } + return 0; +} + +static bool cifs_sockaddr_equal(struct sockaddr_storage *addr1, struct sockaddr_storage *addr2) +{ + if (addr1->ss_family != addr2->ss_family) + return false; + + if (addr1->ss_family == AF_INET) { + return (memcmp(&((const struct sockaddr_in *)addr1)->sin_addr, + &((const struct sockaddr_in *)addr2)->sin_addr, + sizeof(struct in_addr)) == 0); + } + + if (addr1->ss_family == AF_INET6) { + return (memcmp(&((const struct sockaddr_in6 *)addr1)->sin6_addr, + &((const struct sockaddr_in6 *)addr2)->sin6_addr, + sizeof(struct in6_addr)) == 0); + } + + return false; +} + +static int cifs_swn_store_swn_addr(const struct sockaddr_storage *new, + const struct sockaddr_storage *old, + struct sockaddr_storage *dst) +{ + __be16 port; + + if (old->ss_family == AF_INET) { + struct sockaddr_in *ipv4 = (struct sockaddr_in *)old; + + port = ipv4->sin_port; + } + + if (old->ss_family == AF_INET6) { + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)old; + + port = ipv6->sin6_port; + } + + if (new->ss_family == AF_INET) { + struct sockaddr_in *ipv4 = (struct sockaddr_in *)new; + + ipv4->sin_port = port; + } + + if (new->ss_family == AF_INET6) { + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)new; + + ipv6->sin6_port = port; + } + + *dst = *new; + + return 0; +} + +static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *addr) +{ + int ret = 0; + + /* Store the reconnect address */ + mutex_lock(&tcon->ses->server->srv_mutex); + if (cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr)) + goto unlock; + + ret = cifs_swn_store_swn_addr(addr, &tcon->ses->server->dstaddr, + &tcon->ses->server->swn_dstaddr); + if (ret < 0) { + cifs_dbg(VFS, "%s: failed to store address: %d\n", __func__, ret); + goto unlock; + } + tcon->ses->server->use_swn_dstaddr = true; + + /* + * Unregister to stop receiving notifications for the old IP address. + */ + ret = cifs_swn_unregister(tcon); + if (ret < 0) { + cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n", + __func__, ret); + goto unlock; + } + + /* + * And register to receive notifications for the new IP address now that we have + * stored the new address. + */ + ret = cifs_swn_register(tcon); + if (ret < 0) { + cifs_dbg(VFS, "%s: Failed to register for witness notifications: %d\n", + __func__, ret); + goto unlock; + } + + spin_lock(&GlobalMid_Lock); + if (tcon->ses->server->tcpStatus != CifsExiting) + tcon->ses->server->tcpStatus = CifsNeedReconnect; + spin_unlock(&GlobalMid_Lock); + +unlock: + mutex_unlock(&tcon->ses->server->srv_mutex); + + return ret; +} + +static int cifs_swn_client_move(struct cifs_swn_reg *swnreg, struct sockaddr_storage *addr) +{ + struct sockaddr_in *ipv4 = (struct sockaddr_in *)addr; + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)addr; + + if (addr->ss_family == AF_INET) + cifs_dbg(FYI, "%s: move to %pI4\n", __func__, &ipv4->sin_addr); + else if (addr->ss_family == AF_INET6) + cifs_dbg(FYI, "%s: move to %pI6\n", __func__, &ipv6->sin6_addr); + + return cifs_swn_reconnect(swnreg->tcon, addr); +} + +int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info) +{ + struct cifs_swn_reg *swnreg; + char name[256]; + int type; + + if (info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]) { + int swnreg_id; + + swnreg_id = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]); + mutex_lock(&cifs_swnreg_idr_mutex); + swnreg = idr_find(&cifs_swnreg_idr, swnreg_id); + mutex_unlock(&cifs_swnreg_idr_mutex); + if (swnreg == NULL) { + cifs_dbg(FYI, "%s: registration id %d not found\n", __func__, swnreg_id); + return -EINVAL; + } + } else { + cifs_dbg(FYI, "%s: missing registration id attribute\n", __func__); + return -EINVAL; + } + + if (info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]) { + type = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]); + } else { + cifs_dbg(FYI, "%s: missing notification type attribute\n", __func__); + return -EINVAL; + } + + switch (type) { + case CIFS_SWN_NOTIFICATION_RESOURCE_CHANGE: { + int state; + + if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME]) { + nla_strscpy(name, info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME], + sizeof(name)); + } else { + cifs_dbg(FYI, "%s: missing resource name attribute\n", __func__); + return -EINVAL; + } + if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]) { + state = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]); + } else { + cifs_dbg(FYI, "%s: missing resource state attribute\n", __func__); + return -EINVAL; + } + return cifs_swn_resource_state_changed(swnreg, name, state); + } + case CIFS_SWN_NOTIFICATION_CLIENT_MOVE: { + struct sockaddr_storage addr; + + if (info->attrs[CIFS_GENL_ATTR_SWN_IP]) { + nla_memcpy(&addr, info->attrs[CIFS_GENL_ATTR_SWN_IP], sizeof(addr)); + } else { + cifs_dbg(FYI, "%s: missing IP address attribute\n", __func__); + return -EINVAL; + } + return cifs_swn_client_move(swnreg, &addr); + } + default: + cifs_dbg(FYI, "%s: unknown notification type %d\n", __func__, type); + break; + } + + return 0; +} + +int cifs_swn_register(struct cifs_tcon *tcon) +{ + struct cifs_swn_reg *swnreg; + int ret; + + swnreg = cifs_get_swn_reg(tcon); + if (IS_ERR(swnreg)) + return PTR_ERR(swnreg); + + ret = cifs_swn_send_register_message(swnreg); + if (ret < 0) { + cifs_dbg(VFS, "%s: Failed to send swn register message: %d\n", __func__, ret); + /* Do not put the swnreg or return error, the echo task will retry */ + } + + return 0; +} + +int cifs_swn_unregister(struct cifs_tcon *tcon) +{ + struct cifs_swn_reg *swnreg; + + mutex_lock(&cifs_swnreg_idr_mutex); + + swnreg = cifs_find_swn_reg(tcon); + if (IS_ERR(swnreg)) { + mutex_unlock(&cifs_swnreg_idr_mutex); + return PTR_ERR(swnreg); + } + + mutex_unlock(&cifs_swnreg_idr_mutex); + + cifs_put_swn_reg(swnreg); + + return 0; +} + +void cifs_swn_dump(struct seq_file *m) +{ + struct cifs_swn_reg *swnreg; + struct sockaddr_in *sa; + struct sockaddr_in6 *sa6; + int id; + + seq_puts(m, "Witness registrations:"); + + mutex_lock(&cifs_swnreg_idr_mutex); + idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) { + seq_printf(m, "\nId: %u Refs: %u Network name: '%s'%s Share name: '%s'%s Ip address: ", + id, kref_read(&swnreg->ref_count), + swnreg->net_name, swnreg->net_name_notify ? "(y)" : "(n)", + swnreg->share_name, swnreg->share_name_notify ? "(y)" : "(n)"); + switch (swnreg->tcon->ses->server->dstaddr.ss_family) { + case AF_INET: + sa = (struct sockaddr_in *) &swnreg->tcon->ses->server->dstaddr; + seq_printf(m, "%pI4", &sa->sin_addr.s_addr); + break; + case AF_INET6: + sa6 = (struct sockaddr_in6 *) &swnreg->tcon->ses->server->dstaddr; + seq_printf(m, "%pI6", &sa6->sin6_addr.s6_addr); + if (sa6->sin6_scope_id) + seq_printf(m, "%%%u", sa6->sin6_scope_id); + break; + default: + seq_puts(m, "(unknown)"); + } + seq_printf(m, "%s", swnreg->ip_notify ? "(y)" : "(n)"); + } + mutex_unlock(&cifs_swnreg_idr_mutex); + seq_puts(m, "\n"); +} + +void cifs_swn_check(void) +{ + struct cifs_swn_reg *swnreg; + int id; + int ret; + + mutex_lock(&cifs_swnreg_idr_mutex); + idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) { + ret = cifs_swn_send_register_message(swnreg); + if (ret < 0) + cifs_dbg(FYI, "%s: Failed to send register message: %d\n", __func__, ret); + } + mutex_unlock(&cifs_swnreg_idr_mutex); +} diff --git a/fs/cifs/cifs_swn.h b/fs/cifs/cifs_swn.h new file mode 100644 index 000000000000..236ecd4959d5 --- /dev/null +++ b/fs/cifs/cifs_swn.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Witness Service client for CIFS + * + * Copyright (c) 2020 Samuel Cabrero <[email protected]> + */ + +#ifndef _CIFS_SWN_H +#define _CIFS_SWN_H + +struct cifs_tcon; +struct sk_buff; +struct genl_info; + +extern int cifs_swn_register(struct cifs_tcon *tcon); + +extern int cifs_swn_unregister(struct cifs_tcon *tcon); + +extern int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info); + +extern void cifs_swn_dump(struct seq_file *m); + +extern void cifs_swn_check(void); + +#endif /* _CIFS_SWN_H */ diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index ef4784e72b1d..562913e2b3f2 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -32,6 +32,7 @@ #include "cifsacl.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "fs_context.h" /* security id for everyone/world system group */ static const struct cifs_sid sid_everyone = { @@ -346,8 +347,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, struct key *sidkey; char *sidstr; const struct cred *saved_cred; - kuid_t fuid = cifs_sb->mnt_uid; - kgid_t fgid = cifs_sb->mnt_gid; + kuid_t fuid = cifs_sb->ctx->linux_uid; + kgid_t fgid = cifs_sb->ctx->linux_gid; /* * If we have too many subauthorities, then something is really wrong. @@ -448,7 +449,7 @@ out_revert_creds: /* * Note that we return 0 here unconditionally. If the mapping - * fails then we just fall back to using the mnt_uid/mnt_gid. + * fails then we just fall back to using the ctx->linux_uid/linux_gid. */ got_valid_id: rc = 0; @@ -557,30 +558,37 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd, bits to set can be: S_IRWXU, S_IRWXG or S_IRWXO ie 00700 or 00070 or 00007 */ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, - umode_t *pbits_to_set) + umode_t *pdenied, umode_t mask) { __u32 flags = le32_to_cpu(ace_flags); - /* the order of ACEs is important. The canonical order is to begin with - DENY entries followed by ALLOW, otherwise an allow entry could be - encountered first, making the subsequent deny entry like "dead code" - which would be superflous since Windows stops when a match is made - for the operation you are trying to perform for your user */ - - /* For deny ACEs we change the mask so that subsequent allow access - control entries do not turn on the bits we are denying */ + /* + * Do not assume "preferred" or "canonical" order. + * The first DENY or ALLOW ACE which matches perfectly is + * the permission to be used. Once allowed or denied, same + * permission in later ACEs do not matter. + */ + + /* If not already allowed, deny these bits */ if (type == ACCESS_DENIED) { - if (flags & GENERIC_ALL) - *pbits_to_set &= ~S_IRWXUGO; - - if ((flags & GENERIC_WRITE) || - ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) - *pbits_to_set &= ~S_IWUGO; - if ((flags & GENERIC_READ) || - ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) - *pbits_to_set &= ~S_IRUGO; - if ((flags & GENERIC_EXECUTE) || - ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) - *pbits_to_set &= ~S_IXUGO; + if (flags & GENERIC_ALL && + !(*pmode & mask & 0777)) + *pdenied |= mask & 0777; + + if (((flags & GENERIC_WRITE) || + ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) && + !(*pmode & mask & 0222)) + *pdenied |= mask & 0222; + + if (((flags & GENERIC_READ) || + ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) && + !(*pmode & mask & 0444)) + *pdenied |= mask & 0444; + + if (((flags & GENERIC_EXECUTE) || + ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) && + !(*pmode & mask & 0111)) + *pdenied |= mask & 0111; + return; } else if (type != ACCESS_ALLOWED) { cifs_dbg(VFS, "unknown access control type %d\n", type); @@ -588,20 +596,38 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, } /* else ACCESS_ALLOWED type */ - if (flags & GENERIC_ALL) { - *pmode |= (S_IRWXUGO & (*pbits_to_set)); + if ((flags & GENERIC_ALL) && + !(*pdenied & mask & 0777)) { + *pmode |= mask & 0777; cifs_dbg(NOISY, "all perms\n"); return; } - if ((flags & GENERIC_WRITE) || - ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) - *pmode |= (S_IWUGO & (*pbits_to_set)); - if ((flags & GENERIC_READ) || - ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) - *pmode |= (S_IRUGO & (*pbits_to_set)); - if ((flags & GENERIC_EXECUTE) || - ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) - *pmode |= (S_IXUGO & (*pbits_to_set)); + + if (((flags & GENERIC_WRITE) || + ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) && + !(*pdenied & mask & 0222)) + *pmode |= mask & 0222; + + if (((flags & GENERIC_READ) || + ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) && + !(*pdenied & mask & 0444)) + *pmode |= mask & 0444; + + if (((flags & GENERIC_EXECUTE) || + ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) && + !(*pdenied & mask & 0111)) + *pmode |= mask & 0111; + + /* If DELETE_CHILD is set only on an owner ACE, set sticky bit */ + if (flags & FILE_DELETE_CHILD) { + if (mask == ACL_OWNER_MASK) { + if (!(*pdenied & 01000)) + *pmode |= 01000; + } else if (!(*pdenied & 01000)) { + *pmode &= ~01000; + *pdenied |= 01000; + } + } cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode); return; @@ -638,17 +664,26 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, } static __u16 fill_ace_for_sid(struct cifs_ace *pntace, - const struct cifs_sid *psid, __u64 nmode, umode_t bits) + const struct cifs_sid *psid, __u64 nmode, + umode_t bits, __u8 access_type, + bool allow_delete_child) { int i; __u16 size = 0; __u32 access_req = 0; - pntace->type = ACCESS_ALLOWED; + pntace->type = access_type; pntace->flags = 0x0; mode_to_access_flags(nmode, bits, &access_req); - if (!access_req) + + if (access_type == ACCESS_ALLOWED && allow_delete_child) + access_req |= FILE_DELETE_CHILD; + + if (access_type == ACCESS_ALLOWED && !access_req) access_req = SET_MINIMUM_RIGHTS; + else if (access_type == ACCESS_DENIED) + access_req &= ~SET_MINIMUM_RIGHTS; + pntace->access_req = cpu_to_le32(access_req); pntace->sid.revision = psid->revision; @@ -716,7 +751,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, if (!pdacl) { /* no DACL in the security descriptor, set all the permissions for user/group/other */ - fattr->cf_mode |= S_IRWXUGO; + fattr->cf_mode |= 0777; return; } @@ -733,16 +768,14 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, /* reset rwx permissions for user/group/other. Also, if num_aces is 0 i.e. DACL has no ACEs, user/group/other have no permissions */ - fattr->cf_mode &= ~(S_IRWXUGO); + fattr->cf_mode &= ~(0777); acl_base = (char *)pdacl; acl_size = sizeof(struct cifs_acl); num_aces = le32_to_cpu(pdacl->num_aces); if (num_aces > 0) { - umode_t user_mask = S_IRWXU; - umode_t group_mask = S_IRWXG; - umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; + umode_t denied_mode = 0; if (num_aces > ULONG_MAX / sizeof(struct cifs_ace *)) return; @@ -768,26 +801,28 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, fattr->cf_mode |= le32_to_cpu(ppace[i]->sid.sub_auth[2]); break; - } else if (compare_sids(&(ppace[i]->sid), pownersid) == 0) - access_flags_to_mode(ppace[i]->access_req, - ppace[i]->type, - &fattr->cf_mode, - &user_mask); - else if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0) - access_flags_to_mode(ppace[i]->access_req, - ppace[i]->type, - &fattr->cf_mode, - &group_mask); - else if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0) - access_flags_to_mode(ppace[i]->access_req, - ppace[i]->type, - &fattr->cf_mode, - &other_mask); - else if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0) - access_flags_to_mode(ppace[i]->access_req, - ppace[i]->type, - &fattr->cf_mode, - &other_mask); + } else { + if (compare_sids(&(ppace[i]->sid), pownersid) == 0) { + access_flags_to_mode(ppace[i]->access_req, + ppace[i]->type, + &fattr->cf_mode, + &denied_mode, + ACL_OWNER_MASK); + } else if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0) { + access_flags_to_mode(ppace[i]->access_req, + ppace[i]->type, + &fattr->cf_mode, + &denied_mode, + ACL_GROUP_MASK); + } else if ((compare_sids(&(ppace[i]->sid), &sid_everyone) == 0) || + (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)) { + access_flags_to_mode(ppace[i]->access_req, + ppace[i]->type, + &fattr->cf_mode, + &denied_mode, + ACL_EVERYONE_MASK); + } + } /* memcpy((void *)(&(cifscred->aces[i])), @@ -873,32 +908,91 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace) } static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid, - struct cifs_sid *pgrpsid, __u64 nmode, bool modefromsid) + struct cifs_sid *pgrpsid, __u64 *pnmode, bool modefromsid) { u16 size = 0; u32 num_aces = 0; struct cifs_acl *pnndacl; + __u64 nmode; + __u64 user_mode; + __u64 group_mode; + __u64 other_mode; + __u64 deny_user_mode = 0; + __u64 deny_group_mode = 0; + bool sticky_set = false; pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl)); + nmode = *pnmode; + if (modefromsid) { struct cifs_ace *pntace = (struct cifs_ace *)((char *)pnndacl + size); size += setup_special_mode_ACE(pntace, nmode); num_aces++; + goto set_size; } + /* + * We'll try to keep the mode as requested by the user. + * But in cases where we cannot meaningfully convert that + * into ACL, return back the updated mode, so that it is + * updated in the inode. + */ + + if (!memcmp(pownersid, pgrpsid, sizeof(struct cifs_sid))) { + /* + * Case when owner and group SIDs are the same. + * Set the more restrictive of the two modes. + */ + user_mode = nmode & (nmode << 3) & 0700; + group_mode = nmode & (nmode >> 3) & 0070; + } else { + user_mode = nmode & 0700; + group_mode = nmode & 0070; + } + + other_mode = nmode & 0007; + + /* We need DENY ACE when the perm is more restrictive than the next sets. */ + deny_user_mode = ~(user_mode) & ((group_mode << 3) | (other_mode << 6)) & 0700; + deny_group_mode = ~(group_mode) & (other_mode << 3) & 0070; + + *pnmode = user_mode | group_mode | other_mode | (nmode & ~0777); + + /* This tells if we should allow delete child for group and everyone. */ + if (nmode & 01000) + sticky_set = true; + + if (deny_user_mode) { + size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size), + pownersid, deny_user_mode, 0700, ACCESS_DENIED, false); + num_aces++; + } + /* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/ + if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) { + size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size), + pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false); + num_aces++; + } size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size), - pownersid, nmode, S_IRWXU); + pownersid, user_mode, 0700, ACCESS_ALLOWED, true); num_aces++; + /* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */ + if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) { + size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size), + pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false); + num_aces++; + } size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size), - pgrpsid, nmode, S_IRWXG); + pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set); num_aces++; size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size), - &sid_everyone, nmode, S_IRWXO); + &sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set); num_aces++; +set_size: pndacl->num_aces = cpu_to_le32(num_aces); pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl)); @@ -1000,7 +1094,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, /* Convert permission bits from mode to equivalent CIFS ACL */ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, - __u32 secdesclen, __u64 nmode, kuid_t uid, kgid_t gid, + __u32 secdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid, bool mode_from_sid, bool id_from_sid, int *aclflag) { int rc = 0; @@ -1012,7 +1106,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */ struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ - if (nmode != NO_CHANGE_64) { /* chmod */ + if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); group_sid_ptr = (struct cifs_sid *)((char *)pntsd + @@ -1026,7 +1120,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, ndacl_ptr->num_aces = 0; rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr, - nmode, mode_from_sid); + pnmode, mode_from_sid); sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size); /* copy sec desc control portion & owner and group sids */ copy_sec_desc(pntsd, pnntsd, sidsoffset); @@ -1101,7 +1195,8 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, } struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, - const struct cifs_fid *cifsfid, u32 *pacllen) + const struct cifs_fid *cifsfid, u32 *pacllen, + u32 __maybe_unused unused) { struct cifs_ntsd *pntsd = NULL; unsigned int xid; @@ -1169,7 +1264,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, /* Retrieve an ACL from the server */ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, struct inode *inode, const char *path, - u32 *pacllen) + u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; struct cifsFileInfo *open_file = NULL; @@ -1179,7 +1274,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, if (!open_file) return get_cifs_acl_by_path(cifs_sb, path, pacllen); - pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen); + pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info); cifsFileInfo_put(open_file); return pntsd; } @@ -1244,6 +1339,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, int rc = 0; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct smb_version_operations *ops; + const u32 info = 0; cifs_dbg(NOISY, "converting ACL to mode for %s\n", path); @@ -1253,9 +1349,9 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, ops = tlink_tcon(tlink)->ses->server->ops; if (pfid && (ops->get_acl_by_fid)) - pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen); + pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen, info); else if (ops->get_acl) - pntsd = ops->get_acl(cifs_sb, inode, path, &acllen); + pntsd = ops->get_acl(cifs_sb, inode, path, &acllen, info); else { cifs_put_tlink(tlink); return -EOPNOTSUPP; @@ -1282,7 +1378,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, /* Convert mode bits to an ACL so we can update the ACL on the server */ int -id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, +id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, kuid_t uid, kgid_t gid) { int rc = 0; @@ -1294,6 +1390,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct smb_version_operations *ops; bool mode_from_sid, id_from_sid; + const u32 info = 0; if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -1309,7 +1406,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, return -EOPNOTSUPP; } - pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen); + pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen, info); if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); @@ -1341,7 +1438,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, else id_from_sid = false; - rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid, + rc = build_sec_desc(pntsd, pnntsd, secdesclen, pnmode, uid, gid, mode_from_sid, id_from_sid, &aclflag); cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc); diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index 45665ff87b64..ff7fd0862e28 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h @@ -30,6 +30,10 @@ #define WRITE_BIT 0x2 #define EXEC_BIT 0x1 +#define ACL_OWNER_MASK 0700 +#define ACL_GROUP_MASK 0770 +#define ACL_EVERYONE_MASK 0777 + #define UBITSHIFT 6 #define GBITSHIFT 3 diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 9daa256f69d4..51d53e4bdf6b 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -661,6 +661,11 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) unsigned char *tiblob = NULL; /* target info blob */ __le64 rsp_timestamp; + if (nls_cp == NULL) { + cifs_dbg(VFS, "%s called with nls_cp==NULL\n", __func__); + return -EINVAL; + } + if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { if (!ses->domainName) { if (ses->domainAuto) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 472cb7777e3e..ce0d0037fd0a 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -55,6 +55,10 @@ #ifdef CONFIG_CIFS_DFS_UPCALL #include "dfs_cache.h" #endif +#ifdef CONFIG_CIFS_SWN_UPCALL +#include "netlink.h" +#endif +#include "fs_context.h" /* * DOS dates from 1980/1/1 through 2107/12/31 @@ -214,7 +218,7 @@ cifs_read_super(struct super_block *sb) if (rc) goto out_no_root; /* tune readahead according to rsize */ - sb->s_bdi->ra_pages = cifs_sb->rsize / PAGE_SIZE; + sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE; sb->s_blocksize = CIFS_MAX_MSGSIZE; sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ @@ -458,16 +462,23 @@ cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb) seq_puts(s, "loose"); } -static void -cifs_show_nls(struct seq_file *s, struct nls_table *cur) +/* + * cifs_show_devname() is used so we show the mount device name with correct + * format (e.g. forward slashes vs. back slashes) in /proc/mounts + */ +static int cifs_show_devname(struct seq_file *m, struct dentry *root) { - struct nls_table *def; - - /* Display iocharset= option if it's not default charset */ - def = load_nls_default(); - if (def != cur) - seq_printf(s, ",iocharset=%s", cur->charset); - unload_nls(def); + struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); + char *devname = kstrdup(cifs_sb->ctx->UNC, GFP_KERNEL); + + if (devname == NULL) + seq_puts(m, "none"); + else { + convert_delimiter(devname, '/'); + seq_puts(m, devname); + kfree(devname); + } + return 0; } /* @@ -489,7 +500,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) if (tcon->no_lease) seq_puts(s, ",nolease"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) + if (cifs_sb->ctx->multiuser) seq_puts(s, ",multiuser"); else if (tcon->ses->user_name) seq_show_option(s, "username", tcon->ses->user_name); @@ -514,14 +525,14 @@ cifs_show_options(struct seq_file *s, struct dentry *root) } seq_printf(s, ",uid=%u", - from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid)); + from_kuid_munged(&init_user_ns, cifs_sb->ctx->linux_uid)); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) seq_puts(s, ",forceuid"); else seq_puts(s, ",noforceuid"); seq_printf(s, ",gid=%u", - from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid)); + from_kgid_munged(&init_user_ns, cifs_sb->ctx->linux_gid)); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) seq_puts(s, ",forcegid"); else @@ -531,11 +542,10 @@ cifs_show_options(struct seq_file *s, struct dentry *root) if (!tcon->unix_ext) seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho", - cifs_sb->mnt_file_mode, - cifs_sb->mnt_dir_mode); - - cifs_show_nls(s, cifs_sb->local_nls); - + cifs_sb->ctx->file_mode, + cifs_sb->ctx->dir_mode); + if (cifs_sb->ctx->iocharset) + seq_printf(s, ",iocharset=%s", cifs_sb->ctx->iocharset); if (tcon->seal) seq_puts(s, ",seal"); else if (tcon->ses->server->ignore_signature) @@ -605,15 +615,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) seq_printf(s, ",backupuid=%u", from_kuid_munged(&init_user_ns, - cifs_sb->mnt_backupuid)); + cifs_sb->ctx->backupuid)); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) seq_printf(s, ",backupgid=%u", from_kgid_munged(&init_user_ns, - cifs_sb->mnt_backupgid)); + cifs_sb->ctx->backupgid)); - seq_printf(s, ",rsize=%u", cifs_sb->rsize); - seq_printf(s, ",wsize=%u", cifs_sb->wsize); - seq_printf(s, ",bsize=%u", cifs_sb->bsize); + seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize); + seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize); + seq_printf(s, ",bsize=%u", cifs_sb->ctx->bsize); if (tcon->ses->server->min_offload) seq_printf(s, ",esize=%u", tcon->ses->server->min_offload); seq_printf(s, ",echo_interval=%lu", @@ -628,12 +638,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root) if (tcon->handle_timeout) seq_printf(s, ",handletimeout=%u", tcon->handle_timeout); /* convert actimeo and display it in seconds */ - seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ); + seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->actimeo / HZ); if (tcon->ses->chan_max > 1) seq_printf(s, ",multichannel,max_channels=%zu", tcon->ses->chan_max); +#ifdef CONFIG_CIFS_SWN_UPCALL + if (tcon->use_witness) + seq_puts(s, ",witness"); +#endif + return 0; } @@ -681,13 +696,6 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) } #endif -static int cifs_remount(struct super_block *sb, int *flags, char *data) -{ - sync_filesystem(sb); - *flags |= SB_NODIRATIME; - return 0; -} - static int cifs_drop_inode(struct inode *inode) { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -703,13 +711,14 @@ static const struct super_operations cifs_super_ops = { .free_inode = cifs_free_inode, .drop_inode = cifs_drop_inode, .evict_inode = cifs_evict_inode, +/* .show_path = cifs_show_path, */ /* Would we ever need show path? */ + .show_devname = cifs_show_devname, /* .delete_inode = cifs_delete_inode, */ /* Do not need above function unless later we add lazy close of inodes or unless the kernel forgets to call us with the same number of releases (closes) as opens */ .show_options = cifs_show_options, .umount_begin = cifs_umount_begin, - .remount_fs = cifs_remount, #ifdef CONFIG_CIFS_STATS2 .show_stats = cifs_show_stats, #endif @@ -720,7 +729,7 @@ static const struct super_operations cifs_super_ops = { * Return dentry with refcount + 1 on success and NULL otherwise. */ static struct dentry * -cifs_get_root(struct smb_vol *vol, struct super_block *sb) +cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb) { struct dentry *dentry; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); @@ -731,7 +740,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) return dget(sb->s_root); - full_path = cifs_build_path_to_root(vol, cifs_sb, + full_path = cifs_build_path_to_root(ctx, cifs_sb, cifs_sb_master_tcon(cifs_sb), 0); if (full_path == NULL) return ERR_PTR(-ENOMEM); @@ -777,14 +786,13 @@ static int cifs_set_super(struct super_block *sb, void *data) return set_anon_super(sb, NULL); } -static struct dentry * +struct dentry * cifs_smb3_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, bool is_smb3) + int flags, struct smb3_fs_context *old_ctx) { int rc; struct super_block *sb; - struct cifs_sb_info *cifs_sb; - struct smb_vol *volume_info; + struct cifs_sb_info *cifs_sb = NULL; struct cifs_mnt_data mnt_data; struct dentry *root; @@ -793,42 +801,49 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, * If CIFS_DEBUG && cifs_FYI */ if (cifsFYI) - cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags); + cifs_dbg(FYI, "Devname: %s flags: %d\n", old_ctx->UNC, flags); else - cifs_info("Attempting to mount %s\n", dev_name); - - volume_info = cifs_get_volume_info((char *)data, dev_name, is_smb3); - if (IS_ERR(volume_info)) - return ERR_CAST(volume_info); + cifs_info("Attempting to mount %s\n", old_ctx->UNC); cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); if (cifs_sb == NULL) { root = ERR_PTR(-ENOMEM); - goto out_nls; + goto out; } - cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); - if (cifs_sb->mountdata == NULL) { + cifs_sb->ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL); + if (!cifs_sb->ctx) { root = ERR_PTR(-ENOMEM); - goto out_free; + goto out; + } + rc = smb3_fs_context_dup(cifs_sb->ctx, old_ctx); + if (rc) { + root = ERR_PTR(rc); + goto out; + } + + rc = cifs_setup_volume_info(cifs_sb->ctx); + if (rc) { + root = ERR_PTR(rc); + goto out; } - rc = cifs_setup_cifs_sb(volume_info, cifs_sb); + rc = cifs_setup_cifs_sb(cifs_sb); if (rc) { root = ERR_PTR(rc); - goto out_free; + goto out; } - rc = cifs_mount(cifs_sb, volume_info); + rc = cifs_mount(cifs_sb, cifs_sb->ctx); if (rc) { if (!(flags & SB_SILENT)) cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", rc); root = ERR_PTR(rc); - goto out_free; + goto out; } - mnt_data.vol = volume_info; + mnt_data.ctx = cifs_sb->ctx; mnt_data.cifs_sb = cifs_sb; mnt_data.flags = flags; @@ -839,12 +854,14 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, if (IS_ERR(sb)) { root = ERR_CAST(sb); cifs_umount(cifs_sb); + cifs_sb = NULL; goto out; } if (sb->s_root) { cifs_dbg(FYI, "Use existing superblock\n"); cifs_umount(cifs_sb); + cifs_sb = NULL; } else { rc = cifs_read_super(sb); if (rc) { @@ -855,41 +872,24 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, sb->s_flags |= SB_ACTIVE; } - root = cifs_get_root(volume_info, sb); + root = cifs_get_root(cifs_sb ? cifs_sb->ctx : old_ctx, sb); if (IS_ERR(root)) goto out_super; cifs_dbg(FYI, "dentry root is: %p\n", root); - goto out; + return root; out_super: deactivate_locked_super(sb); out: - cifs_cleanup_volume_info(volume_info); + if (cifs_sb) { + kfree(cifs_sb->prepath); + smb3_cleanup_fs_context(cifs_sb->ctx); + kfree(cifs_sb); + } return root; - -out_free: - kfree(cifs_sb->prepath); - kfree(cifs_sb->mountdata); - kfree(cifs_sb); -out_nls: - unload_nls(volume_info->local_nls); - goto out; } -static struct dentry * -smb3_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return cifs_smb3_do_mount(fs_type, flags, dev_name, data, true); -} - -static struct dentry * -cifs_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return cifs_smb3_do_mount(fs_type, flags, dev_name, data, false); -} static ssize_t cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) @@ -1026,7 +1026,8 @@ cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv struct file_system_type cifs_fs_type = { .owner = THIS_MODULE, .name = "cifs", - .mount = cifs_do_mount, + .init_fs_context = smb3_init_fs_context, + .parameters = smb3_fs_parameters, .kill_sb = cifs_kill_sb, .fs_flags = FS_RENAME_DOES_D_MOVE, }; @@ -1035,7 +1036,8 @@ MODULE_ALIAS_FS("cifs"); static struct file_system_type smb3_fs_type = { .owner = THIS_MODULE, .name = "smb3", - .mount = smb3_do_mount, + .init_fs_context = smb3_init_fs_context, + .parameters = smb3_fs_parameters, .kill_sb = cifs_kill_sb, .fs_flags = FS_RENAME_DOES_D_MOVE, }; @@ -1617,10 +1619,15 @@ init_cifs(void) if (rc) goto out_destroy_dfs_cache; #endif /* CONFIG_CIFS_UPCALL */ +#ifdef CONFIG_CIFS_SWN_UPCALL + rc = cifs_genl_init(); + if (rc) + goto out_register_key_type; +#endif /* CONFIG_CIFS_SWN_UPCALL */ rc = init_cifs_idmap(); if (rc) - goto out_register_key_type; + goto out_cifs_swn_init; rc = register_filesystem(&cifs_fs_type); if (rc) @@ -1636,7 +1643,11 @@ init_cifs(void) out_init_cifs_idmap: exit_cifs_idmap(); +out_cifs_swn_init: +#ifdef CONFIG_CIFS_SWN_UPCALL + cifs_genl_exit(); out_register_key_type: +#endif #ifdef CONFIG_CIFS_UPCALL exit_cifs_spnego(); out_destroy_dfs_cache: @@ -1673,6 +1684,9 @@ exit_cifs(void) unregister_filesystem(&smb3_fs_type); cifs_dfs_release_automount_timer(); exit_cifs_idmap(); +#ifdef CONFIG_CIFS_SWN_UPCALL + cifs_genl_exit(); +#endif #ifdef CONFIG_CIFS_UPCALL exit_cifs_spnego(); #endif diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 905d03863721..2307bb0f6147 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -152,9 +152,13 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern void cifs_setsize(struct inode *inode, loff_t offset); extern int cifs_truncate_page(struct address_space *mapping, loff_t from); +struct smb3_fs_context; +extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type, + int flags, struct smb3_fs_context *ctx); + #ifdef CONFIG_CIFS_NFSD_EXPORT extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.29" +#define CIFS_VERSION "2.30" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 484ec2d8c5c9..50fcb65920e8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -202,7 +202,7 @@ struct cifs_ses; struct cifs_tcon; struct dfs_info3_param; struct cifs_fattr; -struct smb_vol; +struct smb3_fs_context; struct cifs_fid; struct cifs_readdata; struct cifs_writedata; @@ -268,9 +268,9 @@ struct smb_version_operations { /* negotiate to the server */ int (*negotiate)(const unsigned int, struct cifs_ses *); /* set negotiated write size */ - unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *); + unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* set negotiated read size */ - unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *); + unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* setup smb sessionn */ int (*sess_setup)(const unsigned int, struct cifs_ses *, const struct nls_table *); @@ -456,9 +456,9 @@ struct smb_version_operations { const char *, const void *, const __u16, const struct nls_table *, struct cifs_sb_info *); struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *, - const char *, u32 *); + const char *, u32 *, u32); struct cifs_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *, - const struct cifs_fid *, u32 *); + const struct cifs_fid *, u32 *, u32); int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, int); /* writepages retry size */ @@ -530,97 +530,6 @@ struct smb_version_values { #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) -struct smb_vol { - char *username; - char *password; - char *domainname; - char *UNC; - char *iocharset; /* local code page for mapping to and from Unicode */ - char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ - char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ - kuid_t cred_uid; - kuid_t linux_uid; - kgid_t linux_gid; - kuid_t backupuid; - kgid_t backupgid; - umode_t file_mode; - umode_t dir_mode; - enum securityEnum sectype; /* sectype requested via mnt opts */ - bool sign; /* was signing requested via mnt opts? */ - bool ignore_signature:1; - bool retry:1; - bool intr:1; - bool setuids:1; - bool setuidfromacl:1; - bool override_uid:1; - bool override_gid:1; - bool dynperm:1; - bool noperm:1; - bool nodelete:1; - bool mode_ace:1; - bool no_psx_acl:1; /* set if posix acl support should be disabled */ - bool cifs_acl:1; - bool backupuid_specified; /* mount option backupuid is specified */ - bool backupgid_specified; /* mount option backupgid is specified */ - bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ - bool server_ino:1; /* use inode numbers from server ie UniqueId */ - bool direct_io:1; - bool strict_io:1; /* strict cache behavior */ - bool cache_ro:1; - bool cache_rw:1; - bool remap:1; /* set to remap seven reserved chars in filenames */ - bool sfu_remap:1; /* remap seven reserved chars ala SFU */ - bool posix_paths:1; /* unset to not ask for posix pathnames. */ - bool no_linux_ext:1; - bool linux_ext:1; - bool sfu_emul:1; - bool nullauth:1; /* attempt to authenticate with null user */ - bool nocase:1; /* request case insensitive filenames */ - bool nobrl:1; /* disable sending byte range locks to srv */ - bool nohandlecache:1; /* disable caching dir handles if srvr probs */ - bool mand_lock:1; /* send mandatory not posix byte range lock reqs */ - bool seal:1; /* request transport encryption on share */ - bool nodfs:1; /* Do not request DFS, even if available */ - bool local_lease:1; /* check leases only on local system, not remote */ - bool noblocksnd:1; - bool noautotune:1; - bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ - bool no_lease:1; /* disable requesting leases */ - bool fsc:1; /* enable fscache */ - bool mfsymlinks:1; /* use Minshall+French Symlinks */ - bool multiuser:1; - bool rwpidforward:1; /* pid forward for read/write operations */ - bool nosharesock:1; - bool persistent:1; - bool nopersistent:1; - bool resilient:1; /* noresilient not required since not fored for CA */ - bool domainauto:1; - bool rdma:1; - bool multichannel:1; - bool use_client_guid:1; - /* reuse existing guid for multichannel */ - u8 client_guid[SMB2_CLIENT_GUID_SIZE]; - unsigned int bsize; - unsigned int rsize; - unsigned int wsize; - unsigned int min_offload; - bool sockopt_tcp_nodelay:1; - unsigned long actimeo; /* attribute cache timeout (jiffies) */ - struct smb_version_operations *ops; - struct smb_version_values *vals; - char *prepath; - struct sockaddr_storage dstaddr; /* destination address */ - struct sockaddr_storage srcaddr; /* allow binding to a local IP */ - struct nls_table *local_nls; - unsigned int echo_interval; /* echo interval in secs */ - __u64 snapshot_time; /* needed for timewarp tokens */ - __u32 handle_timeout; /* persistent and durable handle timeout in ms */ - unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */ - unsigned int max_channels; - __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */ - bool rootfs:1; /* if it's a SMB root file system */ -}; - /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when * trying to reuse existing superblock for a new mount @@ -649,7 +558,7 @@ struct smb_vol { struct cifs_mnt_data { struct cifs_sb_info *cifs_sb; - struct smb_vol *vol; + struct smb3_fs_context *ctx; int flags; }; @@ -778,6 +687,10 @@ struct TCP_Server_Info { int nr_targets; bool noblockcnt; /* use non-blocking connect() */ bool is_channel; /* if a session channel */ +#ifdef CONFIG_CIFS_SWN_UPCALL + bool use_swn_dstaddr; + struct sockaddr_storage swn_dstaddr; +#endif }; struct cifs_credits { @@ -1177,6 +1090,9 @@ struct cifs_tcon { int remap:2; struct list_head ulist; /* cache update list */ #endif +#ifdef CONFIG_CIFS_SWN_UPCALL + bool use_witness:1; /* use witness protocol */ +#endif }; /* diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 593d826820c3..64fe5a47b5e8 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -240,6 +240,8 @@ #define SYNCHRONIZE 0x00100000 /* The file handle can waited on to */ /* synchronize with the completion */ /* of an input/output request */ +#define SYSTEM_SECURITY 0x01000000 /* The system access control list */ + /* can be read and changed */ #define GENERIC_ALL 0x10000000 #define GENERIC_EXECUTE 0x20000000 #define GENERIC_WRITE 0x40000000 @@ -262,7 +264,7 @@ | WRITE_OWNER | SYNCHRONIZE) #define SET_FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \ | FILE_READ_EA | FILE_WRITE_EA \ - | FILE_DELETE_CHILD | FILE_READ_ATTRIBUTES \ + | FILE_READ_ATTRIBUTES \ | FILE_WRITE_ATTRIBUTES \ | DELETE | READ_CONTROL | WRITE_DAC \ | WRITE_OWNER | SYNCHRONIZE) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 24c6f36177ba..340ff81ee87b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -27,8 +27,8 @@ #endif struct statfs; -struct smb_vol; struct smb_rqst; +struct smb3_fs_context; /* ***************************************************************** @@ -72,14 +72,13 @@ extern void exit_cifs_spnego(void); extern char *build_path_from_dentry(struct dentry *); extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix); -extern char *cifs_build_path_to_root(struct smb_vol *vol, +extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, int add_treename); extern char *build_wildcard_path_from_dentry(struct dentry *direntry); extern char *cifs_compose_mount_options(const char *sb_mountdata, - const char *fullpath, const struct dfs_info3_param *ref, - char **devname); + const char *fullpath, const struct dfs_info3_param *ref); /* extern void renew_parental_timestamps(struct dentry *direntry);*/ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); @@ -89,6 +88,7 @@ extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); +extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx); extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs); extern int cifs_discard_remaining_data(struct TCP_Server_Info *server); extern int cifs_call_async(struct TCP_Server_Info *server, @@ -215,12 +215,12 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, bool get_mode_from_special_sid, const char *path, const struct cifs_fid *pfid); -extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64, - kuid_t, kgid_t); +extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, + kuid_t uid, kgid_t gid); extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, - const char *, u32 *); + const char *, u32 *, u32); extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *, - const struct cifs_fid *, u32 *); + const struct cifs_fid *, u32 *, u32); extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, const char *, int); extern unsigned int setup_authusers_ACE(struct cifs_ace *pace); @@ -234,13 +234,9 @@ extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, unsigned int page_offset, unsigned int to_read); -extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, - struct cifs_sb_info *cifs_sb); +extern int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); -extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); -extern struct smb_vol *cifs_get_volume_info(char *mount_data, - const char *devname, bool is_smb3); -extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol); +extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx); extern void cifs_umount(struct cifs_sb_info *); extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon); @@ -256,7 +252,7 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink, struct cifs_pending_open *open); extern void cifs_del_pending_open(struct cifs_pending_open *open); -extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb_vol *vol); +extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx); extern void cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect); extern void cifs_put_tcon(struct cifs_tcon *tcon); @@ -332,7 +328,7 @@ extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, const char *searchName, bool is_unicode); extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, - struct smb_vol *vol); + struct smb3_fs_context *ctx); extern int CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); extern int SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, @@ -553,18 +549,15 @@ extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); extern int -cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, - const char *devname, bool is_smb3); -extern void -cifs_cleanup_volume_info_contents(struct smb_vol *volume_info); +cifs_setup_volume_info(struct smb3_fs_context *ctx); extern struct TCP_Server_Info * -cifs_find_tcp_session(struct smb_vol *vol); +cifs_find_tcp_session(struct smb3_fs_context *ctx); extern void cifs_put_smb_ses(struct cifs_ses *ses); extern struct cifs_ses * -cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info); +cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx); void cifs_readdata_release(struct kref *refcount); int cifs_async_readv(struct cifs_readdata *rdata); @@ -604,9 +597,7 @@ extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, unsigned int *len, unsigned int *offset); struct cifs_chan * cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); -int cifs_try_adding_channels(struct cifs_ses *ses); -int cifs_ses_add_channel(struct cifs_ses *ses, - struct cifs_server_iface *iface); +int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); bool is_server_using_iface(struct TCP_Server_Info *server, struct cifs_server_iface *iface); bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); @@ -620,6 +611,8 @@ int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov, struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server); void cifs_put_tcp_super(struct super_block *sb); int update_super_prepath(struct cifs_tcon *tcon, char *prefix); +char *extract_hostname(const char *unc); +char *extract_sharename(const char *unc); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 44f9cce57099..b9df85506938 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -62,6 +62,9 @@ #include "dfs_cache.h" #endif #include "fs_context.h" +#ifdef CONFIG_CIFS_SWN_UPCALL +#include "cifs_swn.h" +#endif extern mempool_t *cifs_req_poolp; extern bool disable_legacy_dialects; @@ -73,218 +76,10 @@ extern bool disable_legacy_dialects; /* Drop the connection to not overload the server */ #define NUM_STATUS_IO_TIMEOUT 5 -enum { - /* Mount options that take no arguments */ - Opt_user_xattr, Opt_nouser_xattr, - Opt_forceuid, Opt_noforceuid, - Opt_forcegid, Opt_noforcegid, - Opt_noblocksend, Opt_noautotune, Opt_nolease, - Opt_hard, Opt_soft, Opt_perm, Opt_noperm, Opt_nodelete, - Opt_mapposix, Opt_nomapposix, - Opt_mapchars, Opt_nomapchars, Opt_sfu, - Opt_nosfu, Opt_nodfs, Opt_posixpaths, - Opt_noposixpaths, Opt_nounix, Opt_unix, - Opt_nocase, - Opt_brl, Opt_nobrl, - Opt_handlecache, Opt_nohandlecache, - Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids, - Opt_nosetuids, Opt_dynperm, Opt_nodynperm, - Opt_nohard, Opt_nosoft, - Opt_nointr, Opt_intr, - Opt_nostrictsync, Opt_strictsync, - Opt_serverino, Opt_noserverino, - Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl, - Opt_acl, Opt_noacl, Opt_locallease, - Opt_sign, Opt_ignore_signature, Opt_seal, Opt_noac, - Opt_fsc, Opt_mfsymlinks, - Opt_multiuser, Opt_sloppy, Opt_nosharesock, - Opt_persistent, Opt_nopersistent, - Opt_resilient, Opt_noresilient, - Opt_domainauto, Opt_rdma, Opt_modesid, Opt_rootfs, - Opt_multichannel, Opt_nomultichannel, - Opt_compress, - - /* Mount options which take numeric value */ - Opt_backupuid, Opt_backupgid, Opt_uid, - Opt_cruid, Opt_gid, Opt_file_mode, - Opt_dirmode, Opt_port, - Opt_min_enc_offload, - Opt_blocksize, Opt_rsize, Opt_wsize, Opt_actimeo, - Opt_echo_interval, Opt_max_credits, Opt_handletimeout, - Opt_snapshot, Opt_max_channels, - - /* Mount options which take string value */ - Opt_user, Opt_pass, Opt_ip, - Opt_domain, Opt_srcaddr, Opt_iocharset, - Opt_netbiosname, Opt_servern, - Opt_ver, Opt_vers, Opt_sec, Opt_cache, - - /* Mount options to be ignored */ - Opt_ignore, - - /* Options which could be blank */ - Opt_blank_pass, - Opt_blank_user, - Opt_blank_ip, - - Opt_err -}; - -static const match_table_t cifs_mount_option_tokens = { - - { Opt_user_xattr, "user_xattr" }, - { Opt_nouser_xattr, "nouser_xattr" }, - { Opt_forceuid, "forceuid" }, - { Opt_noforceuid, "noforceuid" }, - { Opt_forcegid, "forcegid" }, - { Opt_noforcegid, "noforcegid" }, - { Opt_noblocksend, "noblocksend" }, - { Opt_noautotune, "noautotune" }, - { Opt_nolease, "nolease" }, - { Opt_hard, "hard" }, - { Opt_soft, "soft" }, - { Opt_perm, "perm" }, - { Opt_noperm, "noperm" }, - { Opt_nodelete, "nodelete" }, - { Opt_mapchars, "mapchars" }, /* SFU style */ - { Opt_nomapchars, "nomapchars" }, - { Opt_mapposix, "mapposix" }, /* SFM style */ - { Opt_nomapposix, "nomapposix" }, - { Opt_sfu, "sfu" }, - { Opt_nosfu, "nosfu" }, - { Opt_nodfs, "nodfs" }, - { Opt_posixpaths, "posixpaths" }, - { Opt_noposixpaths, "noposixpaths" }, - { Opt_nounix, "nounix" }, - { Opt_nounix, "nolinux" }, - { Opt_nounix, "noposix" }, - { Opt_unix, "unix" }, - { Opt_unix, "linux" }, - { Opt_unix, "posix" }, - { Opt_nocase, "nocase" }, - { Opt_nocase, "ignorecase" }, - { Opt_brl, "brl" }, - { Opt_nobrl, "nobrl" }, - { Opt_handlecache, "handlecache" }, - { Opt_nohandlecache, "nohandlecache" }, - { Opt_nobrl, "nolock" }, - { Opt_forcemandatorylock, "forcemandatorylock" }, - { Opt_forcemandatorylock, "forcemand" }, - { Opt_setuids, "setuids" }, - { Opt_nosetuids, "nosetuids" }, - { Opt_setuidfromacl, "idsfromsid" }, - { Opt_dynperm, "dynperm" }, - { Opt_nodynperm, "nodynperm" }, - { Opt_nohard, "nohard" }, - { Opt_nosoft, "nosoft" }, - { Opt_nointr, "nointr" }, - { Opt_intr, "intr" }, - { Opt_nostrictsync, "nostrictsync" }, - { Opt_strictsync, "strictsync" }, - { Opt_serverino, "serverino" }, - { Opt_noserverino, "noserverino" }, - { Opt_rwpidforward, "rwpidforward" }, - { Opt_modesid, "modefromsid" }, - { Opt_cifsacl, "cifsacl" }, - { Opt_nocifsacl, "nocifsacl" }, - { Opt_acl, "acl" }, - { Opt_noacl, "noacl" }, - { Opt_locallease, "locallease" }, - { Opt_sign, "sign" }, - { Opt_ignore_signature, "signloosely" }, - { Opt_seal, "seal" }, - { Opt_noac, "noac" }, - { Opt_fsc, "fsc" }, - { Opt_mfsymlinks, "mfsymlinks" }, - { Opt_multiuser, "multiuser" }, - { Opt_sloppy, "sloppy" }, - { Opt_nosharesock, "nosharesock" }, - { Opt_persistent, "persistenthandles"}, - { Opt_nopersistent, "nopersistenthandles"}, - { Opt_resilient, "resilienthandles"}, - { Opt_noresilient, "noresilienthandles"}, - { Opt_domainauto, "domainauto"}, - { Opt_rdma, "rdma"}, - { Opt_multichannel, "multichannel" }, - { Opt_nomultichannel, "nomultichannel" }, - - { Opt_backupuid, "backupuid=%s" }, - { Opt_backupgid, "backupgid=%s" }, - { Opt_uid, "uid=%s" }, - { Opt_cruid, "cruid=%s" }, - { Opt_gid, "gid=%s" }, - { Opt_file_mode, "file_mode=%s" }, - { Opt_dirmode, "dirmode=%s" }, - { Opt_dirmode, "dir_mode=%s" }, - { Opt_port, "port=%s" }, - { Opt_min_enc_offload, "esize=%s" }, - { Opt_blocksize, "bsize=%s" }, - { Opt_rsize, "rsize=%s" }, - { Opt_wsize, "wsize=%s" }, - { Opt_actimeo, "actimeo=%s" }, - { Opt_handletimeout, "handletimeout=%s" }, - { Opt_echo_interval, "echo_interval=%s" }, - { Opt_max_credits, "max_credits=%s" }, - { Opt_snapshot, "snapshot=%s" }, - { Opt_max_channels, "max_channels=%s" }, - { Opt_compress, "compress=%s" }, - - { Opt_blank_user, "user=" }, - { Opt_blank_user, "username=" }, - { Opt_user, "user=%s" }, - { Opt_user, "username=%s" }, - { Opt_blank_pass, "pass=" }, - { Opt_blank_pass, "password=" }, - { Opt_pass, "pass=%s" }, - { Opt_pass, "password=%s" }, - { Opt_blank_ip, "ip=" }, - { Opt_blank_ip, "addr=" }, - { Opt_ip, "ip=%s" }, - { Opt_ip, "addr=%s" }, - { Opt_ignore, "unc=%s" }, - { Opt_ignore, "target=%s" }, - { Opt_ignore, "path=%s" }, - { Opt_domain, "dom=%s" }, - { Opt_domain, "domain=%s" }, - { Opt_domain, "workgroup=%s" }, - { Opt_srcaddr, "srcaddr=%s" }, - { Opt_ignore, "prefixpath=%s" }, - { Opt_iocharset, "iocharset=%s" }, - { Opt_netbiosname, "netbiosname=%s" }, - { Opt_servern, "servern=%s" }, - { Opt_ver, "ver=%s" }, - { Opt_vers, "vers=%s" }, - { Opt_sec, "sec=%s" }, - { Opt_cache, "cache=%s" }, - - { Opt_ignore, "cred" }, - { Opt_ignore, "credentials" }, - { Opt_ignore, "cred=%s" }, - { Opt_ignore, "credentials=%s" }, - { Opt_ignore, "guest" }, - { Opt_ignore, "rw" }, - { Opt_ignore, "ro" }, - { Opt_ignore, "suid" }, - { Opt_ignore, "nosuid" }, - { Opt_ignore, "exec" }, - { Opt_ignore, "noexec" }, - { Opt_ignore, "nodev" }, - { Opt_ignore, "noauto" }, - { Opt_ignore, "dev" }, - { Opt_ignore, "mand" }, - { Opt_ignore, "nomand" }, - { Opt_ignore, "relatime" }, - { Opt_ignore, "_netdev" }, - { Opt_rootfs, "rootfs" }, - - { Opt_err, NULL } -}; - static int ip_connect(struct TCP_Server_Info *server); static int generic_ip_connect(struct TCP_Server_Info *server); static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); static void cifs_prune_tlinks(struct work_struct *work); -static char *extract_hostname(const char *unc); /* * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may @@ -293,7 +88,7 @@ static char *extract_hostname(const char *unc); * This should be called with server->srv_mutex held. */ #ifdef CONFIG_CIFS_DFS_UPCALL -static int reconn_set_ipaddr(struct TCP_Server_Info *server) +static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) { int rc; int len; @@ -328,14 +123,7 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server) return !rc ? -1 : 0; } -#else -static inline int reconn_set_ipaddr(struct TCP_Server_Info *server) -{ - return 0; -} -#endif -#ifdef CONFIG_CIFS_DFS_UPCALL /* These functions must be called with server->srv_mutex held */ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb, @@ -343,6 +131,7 @@ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server, struct dfs_cache_tgt_iterator **tgt_it) { const char *name; + int rc; if (!cifs_sb || !cifs_sb->origin_fullpath) return; @@ -366,6 +155,13 @@ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server, cifs_dbg(FYI, "%s: failed to extract hostname from target: %ld\n", __func__, PTR_ERR(server->hostname)); + return; + } + + rc = reconn_set_ipaddr_from_hostname(server); + if (rc) { + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", + __func__, rc); } } @@ -517,19 +313,25 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); mutex_lock(&server->srv_mutex); + +#ifdef CONFIG_CIFS_SWN_UPCALL + if (server->use_swn_dstaddr) { + server->dstaddr = server->swn_dstaddr; + } else { +#endif + #ifdef CONFIG_CIFS_DFS_UPCALL - /* - * Set up next DFS target server (if any) for reconnect. If DFS - * feature is disabled, then we will retry last server we - * connected to before. - */ - reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it); + /* + * Set up next DFS target server (if any) for reconnect. If DFS + * feature is disabled, then we will retry last server we + * connected to before. + */ + reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it); #endif - rc = reconn_set_ipaddr(server); - if (rc) { - cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", - __func__, rc); + +#ifdef CONFIG_CIFS_SWN_UPCALL } +#endif if (cifs_rdma_enabled(server)) rc = smbd_reconnect(server); @@ -546,6 +348,9 @@ cifs_reconnect(struct TCP_Server_Info *server) if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); +#ifdef CONFIG_CIFS_SWN_UPCALL + server->use_swn_dstaddr = false; +#endif mutex_unlock(&server->srv_mutex); } } while (server->tcpStatus == CifsNeedReconnect); @@ -610,6 +415,11 @@ cifs_echo_request(struct work_struct *work) cifs_dbg(FYI, "Unable to send echo request to server: %s\n", server->hostname); +#ifdef CONFIG_CIFS_SWN_UPCALL + /* Check witness registrations */ + cifs_swn_check(); +#endif + requeue_echo: queue_delayed_work(cifsiod_wq, &server->echo, server->echo_interval); } @@ -1036,6 +846,7 @@ static void smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) { struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer; + int scredits = server->credits; /* * SMB1 does not use credits. @@ -1048,6 +859,13 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) server->credits += le16_to_cpu(shdr->CreditRequest); spin_unlock(&server->req_lock); wake_up(&server->request_q); + + trace_smb3_add_credits(server->CurrentMid, + server->hostname, scredits, + le16_to_cpu(shdr->CreditRequest)); + cifs_server_dbg(FYI, "%s: added %u credits total=%d\n", + __func__, le16_to_cpu(shdr->CreditRequest), + scredits); } } @@ -1232,1051 +1050,9 @@ next_pdu: module_put_and_exit(0); } -/* extract the host portion of the UNC string */ -static char * -extract_hostname(const char *unc) -{ - const char *src; - char *dst, *delim; - unsigned int len; - - /* skip double chars at beginning of string */ - /* BB: check validity of these bytes? */ - if (strlen(unc) < 3) - return ERR_PTR(-EINVAL); - for (src = unc; *src && *src == '\\'; src++) - ; - if (!*src) - return ERR_PTR(-EINVAL); - - /* delimiter between hostname and sharename is always '\\' now */ - delim = strchr(src, '\\'); - if (!delim) - return ERR_PTR(-EINVAL); - - len = delim - src; - dst = kmalloc((len + 1), GFP_KERNEL); - if (dst == NULL) - return ERR_PTR(-ENOMEM); - - memcpy(dst, src, len); - dst[len] = '\0'; - - return dst; -} - -static int get_option_ul(substring_t args[], unsigned long *option) -{ - int rc; - char *string; - - string = match_strdup(args); - if (string == NULL) - return -ENOMEM; - rc = kstrtoul(string, 0, option); - kfree(string); - - return rc; -} - -static int get_option_uid(substring_t args[], kuid_t *result) -{ - unsigned long value; - kuid_t uid; - int rc; - - rc = get_option_ul(args, &value); - if (rc) - return rc; - - uid = make_kuid(current_user_ns(), value); - if (!uid_valid(uid)) - return -EINVAL; - - *result = uid; - return 0; -} - -static int get_option_gid(substring_t args[], kgid_t *result) -{ - unsigned long value; - kgid_t gid; - int rc; - - rc = get_option_ul(args, &value); - if (rc) - return rc; - - gid = make_kgid(current_user_ns(), value); - if (!gid_valid(gid)) - return -EINVAL; - - *result = gid; - return 0; -} - -/* - * Parse a devname into substrings and populate the vol->UNC and vol->prepath - * fields with the result. Returns 0 on success and an error otherwise. - */ -static int -cifs_parse_devname(const char *devname, struct smb_vol *vol) -{ - char *pos; - const char *delims = "/\\"; - size_t len; - - if (unlikely(!devname || !*devname)) { - cifs_dbg(VFS, "Device name not specified\n"); - return -EINVAL; - } - - /* make sure we have a valid UNC double delimiter prefix */ - len = strspn(devname, delims); - if (len != 2) - return -EINVAL; - - /* find delimiter between host and sharename */ - pos = strpbrk(devname + 2, delims); - if (!pos) - return -EINVAL; - - /* skip past delimiter */ - ++pos; - - /* now go until next delimiter or end of string */ - len = strcspn(pos, delims); - - /* move "pos" up to delimiter or NULL */ - pos += len; - vol->UNC = kstrndup(devname, pos - devname, GFP_KERNEL); - if (!vol->UNC) - return -ENOMEM; - - convert_delimiter(vol->UNC, '\\'); - - /* skip any delimiter */ - if (*pos == '/' || *pos == '\\') - pos++; - - /* If pos is NULL then no prepath */ - if (!*pos) - return 0; - - vol->prepath = kstrdup(pos, GFP_KERNEL); - if (!vol->prepath) - return -ENOMEM; - - return 0; -} - -static int -cifs_parse_mount_options(const char *mountdata, const char *devname, - struct smb_vol *vol, bool is_smb3) -{ - char *data, *end; - char *mountdata_copy = NULL, *options; - unsigned int temp_len, i, j; - char separator[2]; - short int override_uid = -1; - short int override_gid = -1; - bool uid_specified = false; - bool gid_specified = false; - bool sloppy = false; - char *invalid = NULL; - char *nodename = utsname()->nodename; - char *string = NULL; - char *tmp_end, *value; - char delim; - bool got_ip = false; - bool got_version = false; - unsigned short port = 0; - struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr; - - separator[0] = ','; - separator[1] = 0; - delim = separator[0]; - - /* ensure we always start with zeroed-out smb_vol */ - memset(vol, 0, sizeof(*vol)); - - /* - * does not have to be perfect mapping since field is - * informational, only used for servers that do not support - * port 445 and it can be overridden at mount time - */ - memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN); - for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++) - vol->source_rfc1001_name[i] = toupper(nodename[i]); - - vol->source_rfc1001_name[RFC1001_NAME_LEN] = 0; - /* null target name indicates to use *SMBSERVR default called name - if we end up sending RFC1001 session initialize */ - vol->target_rfc1001_name[0] = 0; - vol->cred_uid = current_uid(); - vol->linux_uid = current_uid(); - vol->linux_gid = current_gid(); - vol->bsize = 1024 * 1024; /* can improve cp performance significantly */ - /* - * default to SFM style remapping of seven reserved characters - * unless user overrides it or we negotiate CIFS POSIX where - * it is unnecessary. Can not simultaneously use more than one mapping - * since then readdir could list files that open could not open - */ - vol->remap = true; - - /* default to only allowing write access to owner of the mount */ - vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; - - /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ - /* default is always to request posix paths. */ - vol->posix_paths = 1; - /* default to using server inode numbers where available */ - vol->server_ino = 1; - - /* default is to use strict cifs caching semantics */ - vol->strict_io = true; - - vol->actimeo = CIFS_DEF_ACTIMEO; - - /* Most clients set timeout to 0, allows server to use its default */ - vol->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */ - - /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */ - vol->ops = &smb30_operations; - vol->vals = &smbdefault_values; - - vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT; - - /* default to no multichannel (single server connection) */ - vol->multichannel = false; - vol->max_channels = 1; - - if (!mountdata) - goto cifs_parse_mount_err; - - mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL); - if (!mountdata_copy) - goto cifs_parse_mount_err; - - options = mountdata_copy; - end = options + strlen(options); - - if (strncmp(options, "sep=", 4) == 0) { - if (options[4] != 0) { - separator[0] = options[4]; - options += 5; - } else { - cifs_dbg(FYI, "Null separator not allowed\n"); - } - } - vol->backupuid_specified = false; /* no backup intent for a user */ - vol->backupgid_specified = false; /* no backup intent for a group */ - - switch (cifs_parse_devname(devname, vol)) { - case 0: - break; - case -ENOMEM: - cifs_dbg(VFS, "Unable to allocate memory for devname\n"); - goto cifs_parse_mount_err; - case -EINVAL: - cifs_dbg(VFS, "Malformed UNC in devname\n"); - goto cifs_parse_mount_err; - default: - cifs_dbg(VFS, "Unknown error parsing devname\n"); - goto cifs_parse_mount_err; - } - - while ((data = strsep(&options, separator)) != NULL) { - substring_t args[MAX_OPT_ARGS]; - unsigned long option; - int token; - - if (!*data) - continue; - - token = match_token(data, cifs_mount_option_tokens, args); - - switch (token) { - - /* Ingnore the following */ - case Opt_ignore: - break; - - /* Boolean values */ - case Opt_user_xattr: - vol->no_xattr = 0; - break; - case Opt_nouser_xattr: - vol->no_xattr = 1; - break; - case Opt_forceuid: - override_uid = 1; - break; - case Opt_noforceuid: - override_uid = 0; - break; - case Opt_forcegid: - override_gid = 1; - break; - case Opt_noforcegid: - override_gid = 0; - break; - case Opt_noblocksend: - vol->noblocksnd = 1; - break; - case Opt_noautotune: - vol->noautotune = 1; - break; - case Opt_nolease: - vol->no_lease = 1; - break; - case Opt_hard: - vol->retry = 1; - break; - case Opt_soft: - vol->retry = 0; - break; - case Opt_perm: - vol->noperm = 0; - break; - case Opt_noperm: - vol->noperm = 1; - break; - case Opt_nodelete: - vol->nodelete = 1; - break; - case Opt_mapchars: - vol->sfu_remap = true; - vol->remap = false; /* disable SFM mapping */ - break; - case Opt_nomapchars: - vol->sfu_remap = false; - break; - case Opt_mapposix: - vol->remap = true; - vol->sfu_remap = false; /* disable SFU mapping */ - break; - case Opt_nomapposix: - vol->remap = false; - break; - case Opt_sfu: - vol->sfu_emul = 1; - break; - case Opt_nosfu: - vol->sfu_emul = 0; - break; - case Opt_nodfs: - vol->nodfs = 1; - break; - case Opt_rootfs: -#ifdef CONFIG_CIFS_ROOT - vol->rootfs = true; -#endif - break; - case Opt_posixpaths: - vol->posix_paths = 1; - break; - case Opt_noposixpaths: - vol->posix_paths = 0; - break; - case Opt_nounix: - if (vol->linux_ext) - cifs_dbg(VFS, - "conflicting unix mount options\n"); - vol->no_linux_ext = 1; - break; - case Opt_unix: - if (vol->no_linux_ext) - cifs_dbg(VFS, - "conflicting unix mount options\n"); - vol->linux_ext = 1; - break; - case Opt_nocase: - vol->nocase = 1; - break; - case Opt_brl: - vol->nobrl = 0; - break; - case Opt_nobrl: - vol->nobrl = 1; - /* - * turn off mandatory locking in mode - * if remote locking is turned off since the - * local vfs will do advisory - */ - if (vol->file_mode == - (S_IALLUGO & ~(S_ISUID | S_IXGRP))) - vol->file_mode = S_IALLUGO; - break; - case Opt_nohandlecache: - vol->nohandlecache = 1; - break; - case Opt_handlecache: - vol->nohandlecache = 0; - break; - case Opt_forcemandatorylock: - vol->mand_lock = 1; - break; - case Opt_setuids: - vol->setuids = 1; - break; - case Opt_nosetuids: - vol->setuids = 0; - break; - case Opt_setuidfromacl: - vol->setuidfromacl = 1; - break; - case Opt_dynperm: - vol->dynperm = true; - break; - case Opt_nodynperm: - vol->dynperm = false; - break; - case Opt_nohard: - vol->retry = 0; - break; - case Opt_nosoft: - vol->retry = 1; - break; - case Opt_nointr: - vol->intr = 0; - break; - case Opt_intr: - vol->intr = 1; - break; - case Opt_nostrictsync: - vol->nostrictsync = 1; - break; - case Opt_strictsync: - vol->nostrictsync = 0; - break; - case Opt_serverino: - vol->server_ino = 1; - break; - case Opt_noserverino: - vol->server_ino = 0; - break; - case Opt_rwpidforward: - vol->rwpidforward = 1; - break; - case Opt_modesid: - vol->mode_ace = 1; - break; - case Opt_cifsacl: - vol->cifs_acl = 1; - break; - case Opt_nocifsacl: - vol->cifs_acl = 0; - break; - case Opt_acl: - vol->no_psx_acl = 0; - break; - case Opt_noacl: - vol->no_psx_acl = 1; - break; - case Opt_locallease: - vol->local_lease = 1; - break; - case Opt_sign: - vol->sign = true; - break; - case Opt_ignore_signature: - vol->sign = true; - vol->ignore_signature = true; - break; - case Opt_seal: - /* we do not do the following in secFlags because seal - * is a per tree connection (mount) not a per socket - * or per-smb connection option in the protocol - * vol->secFlg |= CIFSSEC_MUST_SEAL; - */ - vol->seal = 1; - break; - case Opt_noac: - pr_warn("Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n"); - break; - case Opt_fsc: -#ifndef CONFIG_CIFS_FSCACHE - cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n"); - goto cifs_parse_mount_err; -#endif - vol->fsc = true; - break; - case Opt_mfsymlinks: - vol->mfsymlinks = true; - break; - case Opt_multiuser: - vol->multiuser = true; - break; - case Opt_sloppy: - sloppy = true; - break; - case Opt_nosharesock: - vol->nosharesock = true; - break; - case Opt_nopersistent: - vol->nopersistent = true; - if (vol->persistent) { - cifs_dbg(VFS, - "persistenthandles mount options conflict\n"); - goto cifs_parse_mount_err; - } - break; - case Opt_persistent: - vol->persistent = true; - if ((vol->nopersistent) || (vol->resilient)) { - cifs_dbg(VFS, - "persistenthandles mount options conflict\n"); - goto cifs_parse_mount_err; - } - break; - case Opt_resilient: - vol->resilient = true; - if (vol->persistent) { - cifs_dbg(VFS, - "persistenthandles mount options conflict\n"); - goto cifs_parse_mount_err; - } - break; - case Opt_noresilient: - vol->resilient = false; /* already the default */ - break; - case Opt_domainauto: - vol->domainauto = true; - break; - case Opt_rdma: - vol->rdma = true; - break; - case Opt_multichannel: - vol->multichannel = true; - /* if number of channels not specified, default to 2 */ - if (vol->max_channels < 2) - vol->max_channels = 2; - break; - case Opt_nomultichannel: - vol->multichannel = false; - vol->max_channels = 1; - break; - case Opt_compress: - vol->compression = UNKNOWN_TYPE; - cifs_dbg(VFS, - "SMB3 compression support is experimental\n"); - break; - - /* Numeric Values */ - case Opt_backupuid: - if (get_option_uid(args, &vol->backupuid)) { - cifs_dbg(VFS, "%s: Invalid backupuid value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->backupuid_specified = true; - break; - case Opt_backupgid: - if (get_option_gid(args, &vol->backupgid)) { - cifs_dbg(VFS, "%s: Invalid backupgid value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->backupgid_specified = true; - break; - case Opt_uid: - if (get_option_uid(args, &vol->linux_uid)) { - cifs_dbg(VFS, "%s: Invalid uid value\n", - __func__); - goto cifs_parse_mount_err; - } - uid_specified = true; - break; - case Opt_cruid: - if (get_option_uid(args, &vol->cred_uid)) { - cifs_dbg(VFS, "%s: Invalid cruid value\n", - __func__); - goto cifs_parse_mount_err; - } - break; - case Opt_gid: - if (get_option_gid(args, &vol->linux_gid)) { - cifs_dbg(VFS, "%s: Invalid gid value\n", - __func__); - goto cifs_parse_mount_err; - } - gid_specified = true; - break; - case Opt_file_mode: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid file_mode value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->file_mode = option; - break; - case Opt_dirmode: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid dir_mode value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->dir_mode = option; - break; - case Opt_port: - if (get_option_ul(args, &option) || - option > USHRT_MAX) { - cifs_dbg(VFS, "%s: Invalid port value\n", - __func__); - goto cifs_parse_mount_err; - } - port = (unsigned short)option; - break; - case Opt_min_enc_offload: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "Invalid minimum encrypted read offload size (esize)\n"); - goto cifs_parse_mount_err; - } - vol->min_offload = option; - break; - case Opt_blocksize: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid blocksize value\n", - __func__); - goto cifs_parse_mount_err; - } - /* - * inode blocksize realistically should never need to be - * less than 16K or greater than 16M and default is 1MB. - * Note that small inode block sizes (e.g. 64K) can lead - * to very poor performance of common tools like cp and scp - */ - if ((option < CIFS_MAX_MSGSIZE) || - (option > (4 * SMB3_DEFAULT_IOSIZE))) { - cifs_dbg(VFS, "%s: Invalid blocksize\n", - __func__); - goto cifs_parse_mount_err; - } - vol->bsize = option; - break; - case Opt_rsize: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid rsize value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->rsize = option; - break; - case Opt_wsize: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid wsize value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->wsize = option; - break; - case Opt_actimeo: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid actimeo value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->actimeo = HZ * option; - if (vol->actimeo > CIFS_MAX_ACTIMEO) { - cifs_dbg(VFS, "attribute cache timeout too large\n"); - goto cifs_parse_mount_err; - } - break; - case Opt_handletimeout: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid handletimeout value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->handle_timeout = option; - if (vol->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) { - cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n"); - goto cifs_parse_mount_err; - } - break; - case Opt_echo_interval: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid echo interval value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->echo_interval = option; - break; - case Opt_snapshot: - if (get_option_ul(args, &option)) { - cifs_dbg(VFS, "%s: Invalid snapshot time\n", - __func__); - goto cifs_parse_mount_err; - } - vol->snapshot_time = option; - break; - case Opt_max_credits: - if (get_option_ul(args, &option) || (option < 20) || - (option > 60000)) { - cifs_dbg(VFS, "%s: Invalid max_credits value\n", - __func__); - goto cifs_parse_mount_err; - } - vol->max_credits = option; - break; - case Opt_max_channels: - if (get_option_ul(args, &option) || option < 1 || - option > CIFS_MAX_CHANNELS) { - cifs_dbg(VFS, "%s: Invalid max_channels value, needs to be 1-%d\n", - __func__, CIFS_MAX_CHANNELS); - goto cifs_parse_mount_err; - } - vol->max_channels = option; - break; - - /* String Arguments */ - - case Opt_blank_user: - /* null user, ie. anonymous authentication */ - vol->nullauth = 1; - vol->username = NULL; - break; - case Opt_user: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (strnlen(string, CIFS_MAX_USERNAME_LEN) > - CIFS_MAX_USERNAME_LEN) { - pr_warn("username too long\n"); - goto cifs_parse_mount_err; - } - - kfree(vol->username); - vol->username = kstrdup(string, GFP_KERNEL); - if (!vol->username) - goto cifs_parse_mount_err; - break; - case Opt_blank_pass: - /* passwords have to be handled differently - * to allow the character used for deliminator - * to be passed within them - */ - - /* - * Check if this is a case where the password - * starts with a delimiter - */ - tmp_end = strchr(data, '='); - tmp_end++; - if (!(tmp_end < end && tmp_end[1] == delim)) { - /* No it is not. Set the password to NULL */ - kfree_sensitive(vol->password); - vol->password = NULL; - break; - } - fallthrough; /* to Opt_pass below */ - case Opt_pass: - /* Obtain the value string */ - value = strchr(data, '='); - value++; - - /* Set tmp_end to end of the string */ - tmp_end = (char *) value + strlen(value); - - /* Check if following character is the deliminator - * If yes, we have encountered a double deliminator - * reset the NULL character to the deliminator - */ - if (tmp_end < end && tmp_end[1] == delim) { - tmp_end[0] = delim; - - /* Keep iterating until we get to a single - * deliminator OR the end - */ - while ((tmp_end = strchr(tmp_end, delim)) - != NULL && (tmp_end[1] == delim)) { - tmp_end = (char *) &tmp_end[2]; - } - - /* Reset var options to point to next element */ - if (tmp_end) { - tmp_end[0] = '\0'; - options = (char *) &tmp_end[1]; - } else - /* Reached the end of the mount option - * string */ - options = end; - } - - kfree_sensitive(vol->password); - /* Now build new password string */ - temp_len = strlen(value); - vol->password = kzalloc(temp_len+1, GFP_KERNEL); - if (vol->password == NULL) { - pr_warn("no memory for password\n"); - goto cifs_parse_mount_err; - } - - for (i = 0, j = 0; i < temp_len; i++, j++) { - vol->password[j] = value[i]; - if ((value[i] == delim) && - value[i+1] == delim) - /* skip the second deliminator */ - i++; - } - vol->password[j] = '\0'; - break; - case Opt_blank_ip: - /* FIXME: should this be an error instead? */ - got_ip = false; - break; - case Opt_ip: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (!cifs_convert_address(dstaddr, string, - strlen(string))) { - pr_err("bad ip= option (%s)\n", string); - goto cifs_parse_mount_err; - } - got_ip = true; - break; - case Opt_domain: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN) - == CIFS_MAX_DOMAINNAME_LEN) { - pr_warn("domain name too long\n"); - goto cifs_parse_mount_err; - } - - kfree(vol->domainname); - vol->domainname = kstrdup(string, GFP_KERNEL); - if (!vol->domainname) { - pr_warn("no memory for domainname\n"); - goto cifs_parse_mount_err; - } - cifs_dbg(FYI, "Domain name set\n"); - break; - case Opt_srcaddr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (!cifs_convert_address( - (struct sockaddr *)&vol->srcaddr, - string, strlen(string))) { - pr_warn("Could not parse srcaddr: %s\n", - string); - goto cifs_parse_mount_err; - } - break; - case Opt_iocharset: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (strnlen(string, 1024) >= 65) { - pr_warn("iocharset name too long\n"); - goto cifs_parse_mount_err; - } - - if (strncasecmp(string, "default", 7) != 0) { - kfree(vol->iocharset); - vol->iocharset = kstrdup(string, - GFP_KERNEL); - if (!vol->iocharset) { - pr_warn("no memory for charset\n"); - goto cifs_parse_mount_err; - } - } - /* if iocharset not set then load_nls_default - * is used by caller - */ - cifs_dbg(FYI, "iocharset set to %s\n", string); - break; - case Opt_netbiosname: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - memset(vol->source_rfc1001_name, 0x20, - RFC1001_NAME_LEN); - /* - * FIXME: are there cases in which a comma can - * be valid in workstation netbios name (and - * need special handling)? - */ - for (i = 0; i < RFC1001_NAME_LEN; i++) { - /* don't ucase netbiosname for user */ - if (string[i] == 0) - break; - vol->source_rfc1001_name[i] = string[i]; - } - /* The string has 16th byte zero still from - * set at top of the function - */ - if (i == RFC1001_NAME_LEN && string[i] != 0) - pr_warn("netbiosname longer than 15 truncated\n"); - break; - case Opt_servern: - /* servernetbiosname specified override *SMBSERVER */ - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - /* last byte, type, is 0x20 for servr type */ - memset(vol->target_rfc1001_name, 0x20, - RFC1001_NAME_LEN_WITH_NULL); - - /* BB are there cases in which a comma can be - valid in this workstation netbios name - (and need special handling)? */ - - /* user or mount helper must uppercase the - netbios name */ - for (i = 0; i < 15; i++) { - if (string[i] == 0) - break; - vol->target_rfc1001_name[i] = string[i]; - } - /* The string has 16th byte zero still from - set at top of the function */ - if (i == RFC1001_NAME_LEN && string[i] != 0) - pr_warn("server netbiosname longer than 15 truncated\n"); - break; - case Opt_ver: - /* version of mount userspace tools, not dialect */ - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - /* If interface changes in mount.cifs bump to new ver */ - if (strncasecmp(string, "1", 1) == 0) { - if (strlen(string) > 1) { - pr_warn("Bad mount helper ver=%s. Did you want SMB1 (CIFS) dialect and mean to type vers=1.0 instead?\n", - string); - goto cifs_parse_mount_err; - } - /* This is the default */ - break; - } - /* For all other value, error */ - pr_warn("Invalid mount helper version specified\n"); - goto cifs_parse_mount_err; - case Opt_vers: - /* protocol version (dialect) */ - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (cifs_parse_smb_version(string, vol, is_smb3) != 0) - goto cifs_parse_mount_err; - got_version = true; - break; - case Opt_sec: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (cifs_parse_security_flavors(string, vol) != 0) - goto cifs_parse_mount_err; - break; - case Opt_cache: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (cifs_parse_cache_flavor(string, vol) != 0) - goto cifs_parse_mount_err; - break; - default: - /* - * An option we don't recognize. Save it off for later - * if we haven't already found one - */ - if (!invalid) - invalid = data; - break; - } - /* Free up any allocated string */ - kfree(string); - string = NULL; - } - - if (!sloppy && invalid) { - pr_err("Unknown mount option \"%s\"\n", invalid); - goto cifs_parse_mount_err; - } - - if (vol->rdma && vol->vals->protocol_id < SMB30_PROT_ID) { - cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n"); - goto cifs_parse_mount_err; - } - -#ifndef CONFIG_KEYS - /* Muliuser mounts require CONFIG_KEYS support */ - if (vol->multiuser) { - cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n"); - goto cifs_parse_mount_err; - } -#endif - if (!vol->UNC) { - cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n"); - goto cifs_parse_mount_err; - } - - /* make sure UNC has a share name */ - if (!strchr(vol->UNC + 3, '\\')) { - cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n"); - goto cifs_parse_mount_err; - } - - if (!got_ip) { - int len; - const char *slash; - - /* No ip= option specified? Try to get it from UNC */ - /* Use the address part of the UNC. */ - slash = strchr(&vol->UNC[2], '\\'); - len = slash - &vol->UNC[2]; - if (!cifs_convert_address(dstaddr, &vol->UNC[2], len)) { - pr_err("Unable to determine destination address\n"); - goto cifs_parse_mount_err; - } - } - - /* set the port that we got earlier */ - cifs_set_port(dstaddr, port); - - if (uid_specified) - vol->override_uid = override_uid; - else if (override_uid == 1) - pr_notice("ignoring forceuid mount option specified with no uid= option\n"); - - if (gid_specified) - vol->override_gid = override_gid; - else if (override_gid == 1) - pr_notice("ignoring forcegid mount option specified with no gid= option\n"); - - if (got_version == false) - pr_warn_once("No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3.1.1), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3.1.1 (or even SMB3 or SMB2.1) specify vers=1.0 on mount.\n"); - - kfree(mountdata_copy); - return 0; - -out_nomem: - pr_warn("Could not allocate temporary buffer\n"); -cifs_parse_mount_err: - kfree(string); - kfree(mountdata_copy); - return 1; -} - -/** Returns true if srcaddr isn't specified and rhs isn't - * specified, or if srcaddr is specified and - * matches the IP address of the rhs argument. +/** + * Returns true if srcaddr isn't specified and rhs isn't specified, or + * if srcaddr is specified and matches the IP address of the rhs argument */ bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs) @@ -2377,14 +1153,14 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr, } static bool -match_security(struct TCP_Server_Info *server, struct smb_vol *vol) +match_security(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { /* - * The select_sectype function should either return the vol->sectype + * The select_sectype function should either return the ctx->sectype * that was specified, or "Unspecified" if that sectype was not * compatible with the given NEGOTIATE request. */ - if (server->ops->select_sectype(server, vol->sectype) + if (server->ops->select_sectype(server, ctx->sectype) == Unspecified) return false; @@ -2393,60 +1169,60 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol) * global_secflags at this point since if MUST_SIGN is set then * the server->sign had better be too. */ - if (vol->sign && !server->sign) + if (ctx->sign && !server->sign) return false; return true; } -static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) +static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { - struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr; + struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; - if (vol->nosharesock) + if (ctx->nosharesock) return 0; /* If multidialect negotiation see if existing sessions match one */ - if (strcmp(vol->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { + if (strcmp(ctx->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { if (server->vals->protocol_id < SMB30_PROT_ID) return 0; - } else if (strcmp(vol->vals->version_string, + } else if (strcmp(ctx->vals->version_string, SMBDEFAULT_VERSION_STRING) == 0) { if (server->vals->protocol_id < SMB21_PROT_ID) return 0; - } else if ((server->vals != vol->vals) || (server->ops != vol->ops)) + } else if ((server->vals != ctx->vals) || (server->ops != ctx->ops)) return 0; if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) return 0; if (!match_address(server, addr, - (struct sockaddr *)&vol->srcaddr)) + (struct sockaddr *)&ctx->srcaddr)) return 0; if (!match_port(server, addr)) return 0; - if (!match_security(server, vol)) + if (!match_security(server, ctx)) return 0; - if (server->echo_interval != vol->echo_interval * HZ) + if (server->echo_interval != ctx->echo_interval * HZ) return 0; - if (server->rdma != vol->rdma) + if (server->rdma != ctx->rdma) return 0; - if (server->ignore_signature != vol->ignore_signature) + if (server->ignore_signature != ctx->ignore_signature) return 0; - if (server->min_offload != vol->min_offload) + if (server->min_offload != ctx->min_offload) return 0; return 1; } struct TCP_Server_Info * -cifs_find_tcp_session(struct smb_vol *vol) +cifs_find_tcp_session(struct smb3_fs_context *ctx) { struct TCP_Server_Info *server; @@ -2456,7 +1232,7 @@ cifs_find_tcp_session(struct smb_vol *vol) * Skip ses channels since they're only handled in lower layers * (e.g. cifs_send_recv). */ - if (server->is_channel || !match_server(server, vol)) + if (server->is_channel || !match_server(server, ctx)) continue; ++server->srv_count; @@ -2514,15 +1290,15 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) } struct TCP_Server_Info * -cifs_get_tcp_session(struct smb_vol *volume_info) +cifs_get_tcp_session(struct smb3_fs_context *ctx) { struct TCP_Server_Info *tcp_ses = NULL; int rc; - cifs_dbg(FYI, "UNC: %s\n", volume_info->UNC); + cifs_dbg(FYI, "UNC: %s\n", ctx->UNC); /* see if we already have a matching tcp_ses */ - tcp_ses = cifs_find_tcp_session(volume_info); + tcp_ses = cifs_find_tcp_session(ctx); if (tcp_ses) return tcp_ses; @@ -2532,20 +1308,20 @@ cifs_get_tcp_session(struct smb_vol *volume_info) goto out_err; } - tcp_ses->ops = volume_info->ops; - tcp_ses->vals = volume_info->vals; + tcp_ses->ops = ctx->ops; + tcp_ses->vals = ctx->vals; cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); - tcp_ses->hostname = extract_hostname(volume_info->UNC); + tcp_ses->hostname = extract_hostname(ctx->UNC); if (IS_ERR(tcp_ses->hostname)) { rc = PTR_ERR(tcp_ses->hostname); goto out_err_crypto_release; } - tcp_ses->noblockcnt = volume_info->rootfs; - tcp_ses->noblocksnd = volume_info->noblocksnd || volume_info->rootfs; - tcp_ses->noautotune = volume_info->noautotune; - tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; - tcp_ses->rdma = volume_info->rdma; + tcp_ses->noblockcnt = ctx->rootfs; + tcp_ses->noblocksnd = ctx->noblocksnd || ctx->rootfs; + tcp_ses->noautotune = ctx->noautotune; + tcp_ses->tcp_nodelay = ctx->sockopt_tcp_nodelay; + tcp_ses->rdma = ctx->rdma; tcp_ses->in_flight = 0; tcp_ses->max_in_flight = 0; tcp_ses->credits = 1; @@ -2554,26 +1330,26 @@ cifs_get_tcp_session(struct smb_vol *volume_info) INIT_LIST_HEAD(&tcp_ses->pending_mid_q); mutex_init(&tcp_ses->srv_mutex); memcpy(tcp_ses->workstation_RFC1001_name, - volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); + ctx->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); memcpy(tcp_ses->server_RFC1001_name, - volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); + ctx->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); tcp_ses->session_estab = false; tcp_ses->sequence_number = 0; tcp_ses->reconnect_instance = 1; tcp_ses->lstrp = jiffies; - tcp_ses->compress_algorithm = cpu_to_le16(volume_info->compression); + tcp_ses->compress_algorithm = cpu_to_le16(ctx->compression); spin_lock_init(&tcp_ses->req_lock); INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); mutex_init(&tcp_ses->reconnect_mutex); - memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr, + memcpy(&tcp_ses->srcaddr, &ctx->srcaddr, sizeof(tcp_ses->srcaddr)); - memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr, + memcpy(&tcp_ses->dstaddr, &ctx->dstaddr, sizeof(tcp_ses->dstaddr)); - if (volume_info->use_client_guid) - memcpy(tcp_ses->client_guid, volume_info->client_guid, + if (ctx->use_client_guid) + memcpy(tcp_ses->client_guid, ctx->client_guid, SMB2_CLIENT_GUID_SIZE); else generate_random_uuid(tcp_ses->client_guid); @@ -2585,9 +1361,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->tcpStatus = CifsNew; ++tcp_ses->srv_count; - if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN && - volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX) - tcp_ses->echo_interval = volume_info->echo_interval * HZ; + if (ctx->echo_interval >= SMB_ECHO_INTERVAL_MIN && + ctx->echo_interval <= SMB_ECHO_INTERVAL_MAX) + tcp_ses->echo_interval = ctx->echo_interval * HZ; else tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ; if (tcp_ses->rdma) { @@ -2597,7 +1373,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) goto out_err_crypto_release; #endif tcp_ses->smbd_conn = smbd_get_connection( - tcp_ses, (struct sockaddr *)&volume_info->dstaddr); + tcp_ses, (struct sockaddr *)&ctx->dstaddr); if (tcp_ses->smbd_conn) { cifs_dbg(VFS, "RDMA transport established\n"); rc = 0; @@ -2626,11 +1402,11 @@ smbd_connected: module_put(THIS_MODULE); goto out_err_crypto_release; } - tcp_ses->min_offload = volume_info->min_offload; + tcp_ses->min_offload = ctx->min_offload; tcp_ses->tcpStatus = CifsNeedNegotiate; tcp_ses->nr_targets = 1; - tcp_ses->ignore_signature = volume_info->ignore_signature; + tcp_ses->ignore_signature = ctx->ignore_signature; /* thread spawned, put it on the list */ spin_lock(&cifs_tcp_ses_lock); list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list); @@ -2659,41 +1435,41 @@ out_err: return ERR_PTR(rc); } -static int match_session(struct cifs_ses *ses, struct smb_vol *vol) +static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) { - if (vol->sectype != Unspecified && - vol->sectype != ses->sectype) + if (ctx->sectype != Unspecified && + ctx->sectype != ses->sectype) return 0; /* * If an existing session is limited to less channels than * requested, it should not be reused */ - if (ses->chan_max < vol->max_channels) + if (ses->chan_max < ctx->max_channels) return 0; switch (ses->sectype) { case Kerberos: - if (!uid_eq(vol->cred_uid, ses->cred_uid)) + if (!uid_eq(ctx->cred_uid, ses->cred_uid)) return 0; break; default: /* NULL username means anonymous session */ if (ses->user_name == NULL) { - if (!vol->nullauth) + if (!ctx->nullauth) return 0; break; } /* anything else takes username/password */ if (strncmp(ses->user_name, - vol->username ? vol->username : "", + ctx->username ? ctx->username : "", CIFS_MAX_USERNAME_LEN)) return 0; - if ((vol->username && strlen(vol->username) != 0) && + if ((ctx->username && strlen(ctx->username) != 0) && ses->password != NULL && strncmp(ses->password, - vol->password ? vol->password : "", + ctx->password ? ctx->password : "", CIFS_MAX_PASSWORD_LEN)) return 0; } @@ -2707,11 +1483,10 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) * tcon_ipc. The IPC tcon has the same lifetime as the session. */ static int -cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info) +cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) { int rc = 0, xid; struct cifs_tcon *tcon; - struct nls_table *nls_codepage; char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0}; bool seal = false; struct TCP_Server_Info *server = ses->server; @@ -2720,7 +1495,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info) * If the mount request that resulted in the creation of the * session requires encryption, force IPC to be encrypted too. */ - if (volume_info->seal) { + if (ctx->seal) { if (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) seal = true; else { @@ -2736,14 +1511,11 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info) scnprintf(unc, sizeof(unc), "\\\\%s\\IPC$", server->hostname); - /* cannot fail */ - nls_codepage = load_nls_default(); - xid = get_xid(); tcon->ses = ses; tcon->ipc = true; tcon->seal = seal; - rc = server->ops->tree_connect(xid, ses, unc, tcon, nls_codepage); + rc = server->ops->tree_connect(xid, ses, unc, tcon, ctx->local_nls); free_xid(xid); if (rc) { @@ -2756,7 +1528,6 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info) ses->tcon_ipc = tcon; out: - unload_nls(nls_codepage); return rc; } @@ -2789,7 +1560,7 @@ cifs_free_ipc(struct cifs_ses *ses) } static struct cifs_ses * -cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) +cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { struct cifs_ses *ses; @@ -2797,7 +1568,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { if (ses->status == CifsExiting) continue; - if (!match_session(ses, vol)) + if (!match_session(ses, ctx)) continue; ++ses->ses_count; spin_unlock(&cifs_tcp_ses_lock); @@ -2861,7 +1632,7 @@ void cifs_put_smb_ses(struct cifs_ses *ses) /* Populate username and pw fields from keyring if possible */ static int -cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) +cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) { int rc = 0; int is_domain = 0; @@ -2941,32 +1712,32 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) goto out_key_put; } - vol->username = kstrndup(payload, len, GFP_KERNEL); - if (!vol->username) { + ctx->username = kstrndup(payload, len, GFP_KERNEL); + if (!ctx->username) { cifs_dbg(FYI, "Unable to allocate %zd bytes for username\n", len); rc = -ENOMEM; goto out_key_put; } - cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username); + cifs_dbg(FYI, "%s: username=%s\n", __func__, ctx->username); len = key->datalen - (len + 1); if (len > CIFS_MAX_PASSWORD_LEN || len <= 0) { cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len); rc = -EINVAL; - kfree(vol->username); - vol->username = NULL; + kfree(ctx->username); + ctx->username = NULL; goto out_key_put; } ++delim; - vol->password = kstrndup(delim, len, GFP_KERNEL); - if (!vol->password) { + ctx->password = kstrndup(delim, len, GFP_KERNEL); + if (!ctx->password) { cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n", len); rc = -ENOMEM; - kfree(vol->username); - vol->username = NULL; + kfree(ctx->username); + ctx->username = NULL; goto out_key_put; } @@ -2975,17 +1746,17 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) * for the request. */ if (is_domain && ses->domainName) { - vol->domainname = kstrndup(ses->domainName, + ctx->domainname = kstrndup(ses->domainName, strlen(ses->domainName), GFP_KERNEL); - if (!vol->domainname) { + if (!ctx->domainname) { cifs_dbg(FYI, "Unable to allocate %zd bytes for domain\n", len); rc = -ENOMEM; - kfree(vol->username); - vol->username = NULL; - kfree_sensitive(vol->password); - vol->password = NULL; + kfree(ctx->username); + ctx->username = NULL; + kfree_sensitive(ctx->password); + ctx->password = NULL; goto out_key_put; } } @@ -3000,7 +1771,7 @@ out_err: } #else /* ! CONFIG_KEYS */ static inline int -cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), +cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)), struct cifs_ses *ses __attribute__((unused))) { return -ENOSYS; @@ -3008,14 +1779,14 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), #endif /* CONFIG_KEYS */ /** - * cifs_get_smb_ses - get a session matching @volume_info data from @server + * cifs_get_smb_ses - get a session matching @ctx data from @server * * This function assumes it is being called from cifs_mount() where we * already got a server reference (server refcount +1). See * cifs_get_tcon() for refcount explanations. */ struct cifs_ses * -cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) +cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { int rc = -ENOMEM; unsigned int xid; @@ -3025,7 +1796,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) xid = get_xid(); - ses = cifs_find_smb_ses(server, volume_info); + ses = cifs_find_smb_ses(server, ctx); if (ses) { cifs_dbg(FYI, "Existing smb sess found (status=%d)\n", ses->status); @@ -3042,7 +1813,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) if (ses->need_reconnect) { cifs_dbg(FYI, "Session needs reconnect\n"); rc = cifs_setup_session(xid, ses, - volume_info->local_nls); + ctx->local_nls); if (rc) { mutex_unlock(&ses->session_mutex); /* problem -- put our reference */ @@ -3071,40 +1842,40 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) else sprintf(ses->serverName, "%pI4", &addr->sin_addr); - if (volume_info->username) { - ses->user_name = kstrdup(volume_info->username, GFP_KERNEL); + if (ctx->username) { + ses->user_name = kstrdup(ctx->username, GFP_KERNEL); if (!ses->user_name) goto get_ses_fail; } - /* volume_info->password freed at unmount */ - if (volume_info->password) { - ses->password = kstrdup(volume_info->password, GFP_KERNEL); + /* ctx->password freed at unmount */ + if (ctx->password) { + ses->password = kstrdup(ctx->password, GFP_KERNEL); if (!ses->password) goto get_ses_fail; } - if (volume_info->domainname) { - ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL); + if (ctx->domainname) { + ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL); if (!ses->domainName) goto get_ses_fail; } - if (volume_info->domainauto) - ses->domainAuto = volume_info->domainauto; - ses->cred_uid = volume_info->cred_uid; - ses->linux_uid = volume_info->linux_uid; + if (ctx->domainauto) + ses->domainAuto = ctx->domainauto; + ses->cred_uid = ctx->cred_uid; + ses->linux_uid = ctx->linux_uid; - ses->sectype = volume_info->sectype; - ses->sign = volume_info->sign; + ses->sectype = ctx->sectype; + ses->sign = ctx->sign; mutex_lock(&ses->session_mutex); /* add server as first channel */ ses->chans[0].server = server; ses->chan_count = 1; - ses->chan_max = volume_info->multichannel ? volume_info->max_channels:1; + ses->chan_max = ctx->multichannel ? ctx->max_channels:1; rc = cifs_negotiate_protocol(xid, ses); if (!rc) - rc = cifs_setup_session(xid, ses, volume_info->local_nls); + rc = cifs_setup_session(xid, ses, ctx->local_nls); /* each channel uses a different signing key */ memcpy(ses->chans[0].signkey, ses->smb3signingkey, @@ -3121,7 +1892,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) free_xid(xid); - cifs_setup_ipc(ses, volume_info); + cifs_setup_ipc(ses, ctx); return ses; @@ -3131,27 +1902,27 @@ get_ses_fail: return ERR_PTR(rc); } -static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info) +static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { if (tcon->tidStatus == CifsExiting) return 0; - if (strncmp(tcon->treeName, volume_info->UNC, MAX_TREE_SIZE)) + if (strncmp(tcon->treeName, ctx->UNC, MAX_TREE_SIZE)) return 0; - if (tcon->seal != volume_info->seal) + if (tcon->seal != ctx->seal) return 0; - if (tcon->snapshot_time != volume_info->snapshot_time) + if (tcon->snapshot_time != ctx->snapshot_time) return 0; - if (tcon->handle_timeout != volume_info->handle_timeout) + if (tcon->handle_timeout != ctx->handle_timeout) return 0; - if (tcon->no_lease != volume_info->no_lease) + if (tcon->no_lease != ctx->no_lease) return 0; - if (tcon->nodelete != volume_info->nodelete) + if (tcon->nodelete != ctx->nodelete) return 0; return 1; } static struct cifs_tcon * -cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) +cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) { struct list_head *tmp; struct cifs_tcon *tcon; @@ -3163,7 +1934,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) if (tcon->dfs_path) continue; #endif - if (!match_tcon(tcon, volume_info)) + if (!match_tcon(tcon, ctx)) continue; ++tcon->tc_count; spin_unlock(&cifs_tcp_ses_lock); @@ -3194,6 +1965,18 @@ cifs_put_tcon(struct cifs_tcon *tcon) return; } +#ifdef CONFIG_CIFS_SWN_UPCALL + if (tcon->use_witness) { + int rc; + + rc = cifs_swn_unregister(tcon); + if (rc < 0) { + cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n", + __func__, rc); + } + } +#endif + list_del_init(&tcon->tcon_list); spin_unlock(&cifs_tcp_ses_lock); @@ -3208,7 +1991,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) } /** - * cifs_get_tcon - get a tcon matching @volume_info data from @ses + * cifs_get_tcon - get a tcon matching @ctx data from @ses * * - tcon refcount is the number of mount points using the tcon. * - ses refcount is the number of tcon using the session. @@ -3228,12 +2011,12 @@ cifs_put_tcon(struct cifs_tcon *tcon) * decrement the ses refcount. */ static struct cifs_tcon * -cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) +cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) { int rc, xid; struct cifs_tcon *tcon; - tcon = cifs_find_tcon(ses, volume_info); + tcon = cifs_find_tcon(ses, ctx); if (tcon) { /* * tcon has refcount already incremented but we need to @@ -3255,36 +2038,36 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) goto out_fail; } - if (volume_info->snapshot_time) { + if (ctx->snapshot_time) { if (ses->server->vals->protocol_id == 0) { cifs_dbg(VFS, "Use SMB2 or later for snapshot mount option\n"); rc = -EOPNOTSUPP; goto out_fail; } else - tcon->snapshot_time = volume_info->snapshot_time; + tcon->snapshot_time = ctx->snapshot_time; } - if (volume_info->handle_timeout) { + if (ctx->handle_timeout) { if (ses->server->vals->protocol_id == 0) { cifs_dbg(VFS, "Use SMB2.1 or later for handle timeout option\n"); rc = -EOPNOTSUPP; goto out_fail; } else - tcon->handle_timeout = volume_info->handle_timeout; + tcon->handle_timeout = ctx->handle_timeout; } tcon->ses = ses; - if (volume_info->password) { - tcon->password = kstrdup(volume_info->password, GFP_KERNEL); + if (ctx->password) { + tcon->password = kstrdup(ctx->password, GFP_KERNEL); if (!tcon->password) { rc = -ENOMEM; goto out_fail; } } - if (volume_info->seal) { + if (ctx->seal) { if (ses->server->vals->protocol_id == 0) { cifs_dbg(VFS, "SMB3 or later required for encryption\n"); @@ -3300,7 +2083,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) } } - if (volume_info->linux_ext) { + if (ctx->linux_ext) { if (ses->server->posix_ext_supported) { tcon->posix_extensions = true; pr_warn_once("SMB3.11 POSIX Extensions are experimental\n"); @@ -3316,8 +2099,8 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) * SetFS as we do on SessSetup and reconnect? */ xid = get_xid(); - rc = ses->server->ops->tree_connect(xid, ses, volume_info->UNC, tcon, - volume_info->local_nls); + rc = ses->server->ops->tree_connect(xid, ses, ctx->UNC, tcon, + ctx->local_nls); free_xid(xid); cifs_dbg(FYI, "Tcon rc = %d\n", rc); if (rc) @@ -3325,7 +2108,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) tcon->use_persistent = false; /* check if SMB2 or later, CIFS does not support persistent handles */ - if (volume_info->persistent) { + if (ctx->persistent) { if (ses->server->vals->protocol_id == 0) { cifs_dbg(VFS, "SMB3 or later required for persistent handles\n"); @@ -3342,10 +2125,10 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) } } else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY) && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) - && (volume_info->nopersistent == false)) { + && (ctx->nopersistent == false)) { cifs_dbg(FYI, "enabling persistent handles\n"); tcon->use_persistent = true; - } else if (volume_info->resilient) { + } else if (ctx->resilient) { if (ses->server->vals->protocol_id == 0) { cifs_dbg(VFS, "SMB2.1 or later required for resilient handles\n"); @@ -3354,23 +2137,52 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) } tcon->use_resilient = true; } +#ifdef CONFIG_CIFS_SWN_UPCALL + tcon->use_witness = false; + if (ctx->witness) { + if (ses->server->vals->protocol_id >= SMB30_PROT_ID) { + if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) { + /* + * Set witness in use flag in first place + * to retry registration in the echo task + */ + tcon->use_witness = true; + /* And try to register immediately */ + rc = cifs_swn_register(tcon); + if (rc < 0) { + cifs_dbg(VFS, "Failed to register for witness notifications: %d\n", rc); + goto out_fail; + } + } else { + /* TODO: try to extend for non-cluster uses (eg multichannel) */ + cifs_dbg(VFS, "witness requested on mount but no CLUSTER capability on share\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } + } else { + cifs_dbg(VFS, "SMB3 or later required for witness option\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } + } +#endif /* If the user really knows what they are doing they can override */ if (tcon->share_flags & SMB2_SHAREFLAG_NO_CACHING) { - if (volume_info->cache_ro) + if (ctx->cache_ro) cifs_dbg(VFS, "cache=ro requested on mount but NO_CACHING flag set on share\n"); - else if (volume_info->cache_rw) + else if (ctx->cache_rw) cifs_dbg(VFS, "cache=singleclient requested on mount but NO_CACHING flag set on share\n"); } - if (volume_info->no_lease) { + if (ctx->no_lease) { if (ses->server->vals->protocol_id == 0) { cifs_dbg(VFS, "SMB2 or later required for nolease option\n"); rc = -EOPNOTSUPP; goto out_fail; } else - tcon->no_lease = volume_info->no_lease; + tcon->no_lease = ctx->no_lease; } /* @@ -3378,14 +2190,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) * resources mounted more than once to the same server share the last * value passed in for the retry flag is used. */ - tcon->retry = volume_info->retry; - tcon->nocase = volume_info->nocase; + tcon->retry = ctx->retry; + tcon->nocase = ctx->nocase; if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) - tcon->nohandlecache = volume_info->nohandlecache; + tcon->nohandlecache = ctx->nohandlecache; else tcon->nohandlecache = 1; - tcon->nodelete = volume_info->nodelete; - tcon->local_lease = volume_info->local_lease; + tcon->nodelete = ctx->nodelete; + tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); spin_lock(&cifs_tcp_ses_lock); @@ -3440,23 +2252,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) * We want to share sb only if we don't specify an r/wsize or * specified r/wsize is greater than or equal to existing one. */ - if (new->wsize && new->wsize < old->wsize) + if (new->ctx->wsize && new->ctx->wsize < old->ctx->wsize) return 0; - if (new->rsize && new->rsize < old->rsize) + if (new->ctx->rsize && new->ctx->rsize < old->ctx->rsize) return 0; - if (!uid_eq(old->mnt_uid, new->mnt_uid) || !gid_eq(old->mnt_gid, new->mnt_gid)) + if (!uid_eq(old->ctx->linux_uid, new->ctx->linux_uid) || + !gid_eq(old->ctx->linux_gid, new->ctx->linux_gid)) return 0; - if (old->mnt_file_mode != new->mnt_file_mode || - old->mnt_dir_mode != new->mnt_dir_mode) + if (old->ctx->file_mode != new->ctx->file_mode || + old->ctx->dir_mode != new->ctx->dir_mode) return 0; if (strcmp(old->local_nls->charset, new->local_nls->charset)) return 0; - if (old->actimeo != new->actimeo) + if (old->ctx->actimeo != new->ctx->actimeo) return 0; return 1; @@ -3484,7 +2297,7 @@ int cifs_match_super(struct super_block *sb, void *data) { struct cifs_mnt_data *mnt_data = (struct cifs_mnt_data *)data; - struct smb_vol *volume_info; + struct smb3_fs_context *ctx; struct cifs_sb_info *cifs_sb; struct TCP_Server_Info *tcp_srv; struct cifs_ses *ses; @@ -3503,11 +2316,11 @@ cifs_match_super(struct super_block *sb, void *data) ses = tcon->ses; tcp_srv = ses->server; - volume_info = mnt_data->vol; + ctx = mnt_data->ctx; - if (!match_server(tcp_srv, volume_info) || - !match_session(ses, volume_info) || - !match_tcon(tcon, volume_info) || + if (!match_server(tcp_srv, ctx) || + !match_session(ses, ctx) || + !match_tcon(tcon, ctx) || !match_prepath(sb, mnt_data)) { rc = 0; goto out; @@ -3792,9 +2605,10 @@ ip_connect(struct TCP_Server_Info *server) } void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info) + struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { - /* if we are reconnecting then should we check to see if + /* + * If we are reconnecting then should we check to see if * any requested capabilities changed locally e.g. via * remount but we can not do much about it here * if they have (even if we could detect it by the following) @@ -3802,15 +2616,16 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, * or if we change to make all sb to same share the same * sb as NFS - then we only have one backpointer to sb. * What if we wanted to mount the server share twice once with - * and once without posixacls or posix paths? */ + * and once without posixacls or posix paths? + */ __u64 saved_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - if (vol_info && vol_info->no_linux_ext) { + if (ctx && ctx->no_linux_ext) { tcon->fsUnixInfo.Capability = 0; tcon->unix_ext = 0; /* Unix Extensions disabled */ cifs_dbg(FYI, "Linux protocol extensions disabled\n"); return; - } else if (vol_info) + } else if (ctx) tcon->unix_ext = 1; /* Unix Extensions supported */ if (tcon->unix_ext == 0) { @@ -3821,11 +2636,15 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, if (!CIFSSMBQFSUnixInfo(xid, tcon)) { __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); cifs_dbg(FYI, "unix caps which server supports %lld\n", cap); - /* check for reconnect case in which we do not - want to change the mount behavior if we can avoid it */ - if (vol_info == NULL) { - /* turn off POSIX ACL and PATHNAMES if not set - originally at mount time */ + /* + * check for reconnect case in which we do not + * want to change the mount behavior if we can avoid it + */ + if (ctx == NULL) { + /* + * turn off POSIX ACL and PATHNAMES if not set + * originally at mount time + */ if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0) cap &= ~CIFS_UNIX_POSIX_ACL_CAP; if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) { @@ -3842,7 +2661,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, cifs_dbg(VFS, "per-share encryption not supported yet\n"); cap &= CIFS_UNIX_CAP_MASK; - if (vol_info && vol_info->no_psx_acl) + if (ctx && ctx->no_psx_acl) cap &= ~CIFS_UNIX_POSIX_ACL_CAP; else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { cifs_dbg(FYI, "negotiated posix acl support\n"); @@ -3851,7 +2670,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, CIFS_MOUNT_POSIXACL; } - if (vol_info && vol_info->posix_paths == 0) + if (ctx && ctx->posix_paths == 0) cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { cifs_dbg(FYI, "negotiate posix pathnames\n"); @@ -3882,129 +2701,59 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, cifs_dbg(FYI, "mandatory transport encryption cap\n"); #endif /* CIFS_DEBUG2 */ if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { - if (vol_info == NULL) { + if (ctx == NULL) cifs_dbg(FYI, "resetting capabilities failed\n"); - } else + else cifs_dbg(VFS, "Negotiating Unix capabilities with the server failed. Consider mounting with the Unix Extensions disabled if problems are found by specifying the nounix mount option.\n"); } } } -int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, - struct cifs_sb_info *cifs_sb) +int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) { + struct smb3_fs_context *ctx = cifs_sb->ctx; + INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); spin_lock_init(&cifs_sb->tlink_tree_lock); cifs_sb->tlink_tree = RB_ROOT; - cifs_sb->bsize = pvolume_info->bsize; - /* - * Temporarily set r/wsize for matching superblock. If we end up using - * new sb then client will later negotiate it downward if needed. - */ - cifs_sb->rsize = pvolume_info->rsize; - cifs_sb->wsize = pvolume_info->wsize; - - cifs_sb->mnt_uid = pvolume_info->linux_uid; - cifs_sb->mnt_gid = pvolume_info->linux_gid; - cifs_sb->mnt_file_mode = pvolume_info->file_mode; - cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n", - cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); - - cifs_sb->actimeo = pvolume_info->actimeo; - cifs_sb->local_nls = pvolume_info->local_nls; - - if (pvolume_info->nodfs) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS; - if (pvolume_info->noperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; - if (pvolume_info->setuids) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID; - if (pvolume_info->setuidfromacl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL; - if (pvolume_info->server_ino) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; - if (pvolume_info->remap) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR; - if (pvolume_info->sfu_remap) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; - if (pvolume_info->no_xattr) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; - if (pvolume_info->sfu_emul) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; - if (pvolume_info->nobrl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; - if (pvolume_info->nohandlecache) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE; - if (pvolume_info->nostrictsync) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC; - if (pvolume_info->mand_lock) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; - if (pvolume_info->rwpidforward) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; - if (pvolume_info->mode_ace) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID; - if (pvolume_info->cifs_acl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; - if (pvolume_info->backupuid_specified) { - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID; - cifs_sb->mnt_backupuid = pvolume_info->backupuid; - } - if (pvolume_info->backupgid_specified) { - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID; - cifs_sb->mnt_backupgid = pvolume_info->backupgid; - } - if (pvolume_info->override_uid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; - if (pvolume_info->override_gid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; - if (pvolume_info->dynperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; - if (pvolume_info->fsc) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE; - if (pvolume_info->multiuser) - cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | - CIFS_MOUNT_NO_PERM); - if (pvolume_info->strict_io) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; - if (pvolume_info->direct_io) { - cifs_dbg(FYI, "mounting share using direct i/o\n"); - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; + ctx->file_mode, ctx->dir_mode); + + /* this is needed for ASCII cp to Unicode converts */ + if (ctx->iocharset == NULL) { + /* load_nls_default cannot return null */ + cifs_sb->local_nls = load_nls_default(); + } else { + cifs_sb->local_nls = load_nls(ctx->iocharset); + if (cifs_sb->local_nls == NULL) { + cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n", + ctx->iocharset); + return -ELIBACC; + } } - if (pvolume_info->cache_ro) { + ctx->local_nls = cifs_sb->local_nls; + + smb3_update_mnt_flags(cifs_sb); + + if (ctx->direct_io) + cifs_dbg(FYI, "mounting share using direct i/o\n"); + if (ctx->cache_ro) { cifs_dbg(VFS, "mounting share with read only caching. Ensure that the share will not be modified while in use.\n"); cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RO_CACHE; - } else if (pvolume_info->cache_rw) { + } else if (ctx->cache_rw) { cifs_dbg(VFS, "mounting share in single client RW caching mode. Ensure that no other systems will be accessing the share.\n"); cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_RO_CACHE | CIFS_MOUNT_RW_CACHE); } - if (pvolume_info->mfsymlinks) { - if (pvolume_info->sfu_emul) { - /* - * Our SFU ("Services for Unix" emulation does not allow - * creating symlinks but does allow reading existing SFU - * symlinks (it does allow both creating and reading SFU - * style mknod and FIFOs though). When "mfsymlinks" and - * "sfu" are both enabled at the same time, it allows - * reading both types of symlinks, but will only create - * them with mfsymlinks format. This allows better - * Apple compatibility (probably better for Samba too) - * while still recognizing old Windows style symlinks. - */ - cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n"); - } - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; - } - if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) + if ((ctx->cifs_acl) && (ctx->dynperm)) cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n"); - if (pvolume_info->prepath) { - cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL); + if (ctx->prepath) { + cifs_sb->prepath = kstrdup(ctx->prepath, GFP_KERNEL); if (cifs_sb->prepath == NULL) return -ENOMEM; } @@ -4012,26 +2761,6 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, return 0; } -void -cifs_cleanup_volume_info_contents(struct smb_vol *volume_info) -{ - kfree(volume_info->username); - kfree_sensitive(volume_info->password); - kfree(volume_info->UNC); - kfree(volume_info->domainname); - kfree(volume_info->iocharset); - kfree(volume_info->prepath); -} - -void -cifs_cleanup_volume_info(struct smb_vol *volume_info) -{ - if (!volume_info) - return; - cifs_cleanup_volume_info_contents(volume_info); - kfree(volume_info); -} - /* Release all succeed connections */ static inline void mount_put_conns(struct cifs_sb_info *cifs_sb, unsigned int xid, @@ -4051,7 +2780,7 @@ static inline void mount_put_conns(struct cifs_sb_info *cifs_sb, } /* Get connections for tcp, ses and tcon */ -static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, +static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, unsigned int *xid, struct TCP_Server_Info **nserver, struct cifs_ses **nses, struct cifs_tcon **ntcon) @@ -4068,7 +2797,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, *xid = get_xid(); /* get a reference to a tcp session */ - server = cifs_get_tcp_session(vol); + server = cifs_get_tcp_session(ctx); if (IS_ERR(server)) { rc = PTR_ERR(server); return rc; @@ -4076,13 +2805,13 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, *nserver = server; - if ((vol->max_credits < 20) || (vol->max_credits > 60000)) + if ((ctx->max_credits < 20) || (ctx->max_credits > 60000)) server->max_credits = SMB2_MAX_CREDITS_AVAILABLE; else - server->max_credits = vol->max_credits; + server->max_credits = ctx->max_credits; /* get a reference to a SMB session */ - ses = cifs_get_smb_ses(server, vol); + ses = cifs_get_smb_ses(server, ctx); if (IS_ERR(ses)) { rc = PTR_ERR(ses); return rc; @@ -4090,14 +2819,14 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, *nses = ses; - if ((vol->persistent == true) && (!(ses->server->capabilities & + if ((ctx->persistent == true) && (!(ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) { cifs_server_dbg(VFS, "persistent handles not supported by server\n"); return -EOPNOTSUPP; } /* search for existing tcon to this server share */ - tcon = cifs_get_tcon(ses, vol); + tcon = cifs_get_tcon(ses, ctx); if (IS_ERR(tcon)) { rc = PTR_ERR(tcon); return rc; @@ -4115,7 +2844,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, * reset of caps checks mount to see if unix extensions disabled * for just this mount. */ - reset_cifs_unix_caps(*xid, tcon, cifs_sb, vol); + reset_cifs_unix_caps(*xid, tcon, cifs_sb, ctx); if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && (le64_to_cpu(tcon->fsUnixInfo.Capability) & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) @@ -4137,8 +2866,17 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, } } - cifs_sb->wsize = server->ops->negotiate_wsize(tcon, vol); - cifs_sb->rsize = server->ops->negotiate_rsize(tcon, vol); + /* + * Clamp the rsize/wsize mount arguments if they are too big for the server + * and set the rsize/wsize to the negotiated values if not passed in by + * the user on mount + */ + if ((cifs_sb->ctx->wsize == 0) || + (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) + cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx); + if ((cifs_sb->ctx->rsize == 0) || + (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) + cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); return 0; } @@ -4175,13 +2913,13 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, * exiting connection (tcon) */ static char * -build_unc_path_to_root(const struct smb_vol *vol, +build_unc_path_to_root(const struct smb3_fs_context *ctx, const struct cifs_sb_info *cifs_sb, bool useppath) { char *full_path, *pos; - unsigned int pplen = useppath && vol->prepath ? - strlen(vol->prepath) + 1 : 0; - unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); + unsigned int pplen = useppath && ctx->prepath ? + strlen(ctx->prepath) + 1 : 0; + unsigned int unc_len = strnlen(ctx->UNC, MAX_TREE_SIZE + 1); if (unc_len > MAX_TREE_SIZE) return ERR_PTR(-EINVAL); @@ -4190,12 +2928,12 @@ build_unc_path_to_root(const struct smb_vol *vol, if (full_path == NULL) return ERR_PTR(-ENOMEM); - memcpy(full_path, vol->UNC, unc_len); + memcpy(full_path, ctx->UNC, unc_len); pos = full_path + unc_len; if (pplen) { *pos = CIFS_DIR_SEP(cifs_sb); - memcpy(pos + 1, vol->prepath, pplen); + memcpy(pos + 1, ctx->prepath, pplen); pos += pplen; } @@ -4208,8 +2946,7 @@ build_unc_path_to_root(const struct smb_vol *vol, /** * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb * - * - * If a referral is found, cifs_sb->mountdata will be (re-)allocated + * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated * to a string containing updated options for the submount. Otherwise it * will be left untouched. * @@ -4218,7 +2955,7 @@ build_unc_path_to_root(const struct smb_vol *vol, */ static int expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, - struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb, + struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, char *ref_path) { int rc; @@ -4228,31 +2965,26 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) return -EREMOTE; - full_path = build_unc_path_to_root(volume_info, cifs_sb, true); + full_path = build_unc_path_to_root(ctx, cifs_sb, true); if (IS_ERR(full_path)) return PTR_ERR(full_path); rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ref_path, &referral, NULL); if (!rc) { - char *fake_devname = NULL; - - mdata = cifs_compose_mount_options(cifs_sb->mountdata, - full_path + 1, &referral, - &fake_devname); + mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, + full_path + 1, &referral); free_dfs_info_param(&referral); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; } else { - cifs_cleanup_volume_info_contents(volume_info); - rc = cifs_setup_volume_info(volume_info, mdata, - fake_devname, false); + smb3_cleanup_fs_context_contents(ctx); + rc = cifs_setup_volume_info(ctx); } - kfree(fake_devname); - kfree(cifs_sb->mountdata); - cifs_sb->mountdata = mdata; + kfree(cifs_sb->ctx->mount_options); + cifs_sb->ctx->mount_options = mdata; } kfree(full_path); return rc; @@ -4270,7 +3002,7 @@ static inline int get_next_dfs_tgt(const char *path, } static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it, - struct smb_vol *fake_vol, struct smb_vol *vol) + struct smb3_fs_context *fake_ctx, struct smb3_fs_context *ctx) { const char *tgt = dfs_cache_get_tgt_name(tgt_it); int len = strlen(tgt) + 2; @@ -4281,29 +3013,29 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it, return -ENOMEM; scnprintf(new_unc, len, "\\%s", tgt); - kfree(vol->UNC); - vol->UNC = new_unc; + kfree(ctx->UNC); + ctx->UNC = new_unc; - if (fake_vol->prepath) { - kfree(vol->prepath); - vol->prepath = fake_vol->prepath; - fake_vol->prepath = NULL; + if (fake_ctx->prepath) { + kfree(ctx->prepath); + ctx->prepath = fake_ctx->prepath; + fake_ctx->prepath = NULL; } - memcpy(&vol->dstaddr, &fake_vol->dstaddr, sizeof(vol->dstaddr)); + memcpy(&ctx->dstaddr, &fake_ctx->dstaddr, sizeof(ctx->dstaddr)); return 0; } static int setup_dfs_tgt_conn(const char *path, const char *full_path, const struct dfs_cache_tgt_iterator *tgt_it, - struct cifs_sb_info *cifs_sb, struct smb_vol *vol, unsigned int *xid, - struct TCP_Server_Info **server, struct cifs_ses **ses, - struct cifs_tcon **tcon) + struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, + unsigned int *xid, struct TCP_Server_Info **server, + struct cifs_ses **ses, struct cifs_tcon **tcon) { int rc; struct dfs_info3_param ref = {0}; - char *mdata = NULL, *fake_devname = NULL; - struct smb_vol fake_vol = {NULL}; + char *mdata = NULL; + struct smb3_fs_context fake_ctx = {NULL}; cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path); @@ -4311,45 +3043,43 @@ static int setup_dfs_tgt_conn(const char *path, const char *full_path, if (rc) return rc; - mdata = cifs_compose_mount_options(cifs_sb->mountdata, full_path + 1, &ref, &fake_devname); + mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, + full_path + 1, &ref); free_dfs_info_param(&ref); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; - } else { - cifs_dbg(FYI, "%s: fake_devname: %s\n", __func__, fake_devname); - rc = cifs_setup_volume_info(&fake_vol, mdata, fake_devname, - false); - } + } else + rc = cifs_setup_volume_info(&fake_ctx); + kfree(mdata); - kfree(fake_devname); if (!rc) { /* - * We use a 'fake_vol' here because we need pass it down to the + * We use a 'fake_ctx' here because we need pass it down to the * mount_{get,put} functions to test connection against new DFS * targets. */ mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon); - rc = mount_get_conns(&fake_vol, cifs_sb, xid, server, ses, + rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses, tcon); if (!rc || (*server && *ses)) { /* * We were able to connect to new target server. - * Update current volume info with new target server. + * Update current context with new target server. */ - rc = update_vol_info(tgt_it, &fake_vol, vol); + rc = update_vol_info(tgt_it, &fake_ctx, ctx); } } - cifs_cleanup_volume_info_contents(&fake_vol); + smb3_cleanup_fs_context_contents(&fake_ctx); return rc; } static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb, - struct smb_vol *vol, struct cifs_ses *root_ses, unsigned int *xid, - struct TCP_Server_Info **server, struct cifs_ses **ses, - struct cifs_tcon **tcon) + struct smb3_fs_context *ctx, struct cifs_ses *root_ses, + unsigned int *xid, struct TCP_Server_Info **server, + struct cifs_ses **ses, struct cifs_tcon **tcon) { int rc; struct dfs_cache_tgt_list tgt_list; @@ -4368,7 +3098,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_ if (rc) break; /* Connect to next DFS target */ - rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, vol, xid, server, ses, + rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses, tcon); if (!rc || (*server && *ses)) break; @@ -4388,22 +3118,21 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_ } #endif +/* TODO: all callers to this are broken. We are not parsing mount_options here + * we should pass a clone of the original context? + */ int -cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, - const char *devname, bool is_smb3) +cifs_setup_volume_info(struct smb3_fs_context *ctx) { int rc = 0; - if (cifs_parse_mount_options(mount_data, devname, volume_info, is_smb3)) - return -EINVAL; - - if (volume_info->nullauth) { + if (ctx->nullauth) { cifs_dbg(FYI, "Anonymous login\n"); - kfree(volume_info->username); - volume_info->username = NULL; - } else if (volume_info->username) { + kfree(ctx->username); + ctx->username = NULL; + } else if (ctx->username) { /* BB fixme parse for domain name here */ - cifs_dbg(FYI, "Username: %s\n", volume_info->username); + cifs_dbg(FYI, "Username: %s\n", ctx->username); } else { cifs_dbg(VFS, "No username specified\n"); /* In userspace mount helper we can get user name from alternate @@ -4411,41 +3140,9 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, return -EINVAL; } - /* this is needed for ASCII cp to Unicode converts */ - if (volume_info->iocharset == NULL) { - /* load_nls_default cannot return null */ - volume_info->local_nls = load_nls_default(); - } else { - volume_info->local_nls = load_nls(volume_info->iocharset); - if (volume_info->local_nls == NULL) { - cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n", - volume_info->iocharset); - return -ELIBACC; - } - } - return rc; } -struct smb_vol * -cifs_get_volume_info(char *mount_data, const char *devname, bool is_smb3) -{ - int rc; - struct smb_vol *volume_info; - - volume_info = kmalloc(sizeof(struct smb_vol), GFP_KERNEL); - if (!volume_info) - return ERR_PTR(-ENOMEM); - - rc = cifs_setup_volume_info(volume_info, mount_data, devname, is_smb3); - if (rc) { - cifs_cleanup_volume_info(volume_info); - volume_info = ERR_PTR(rc); - } - - return volume_info; -} - static int cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, unsigned int xid, @@ -4497,7 +3194,7 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is, * otherwise 0. */ -static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, +static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, const unsigned int xid, struct TCP_Server_Info *server, struct cifs_tcon *tcon) @@ -4511,7 +3208,7 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, /* * cifs_build_path_to_root works only when we have a valid tcon */ - full_path = cifs_build_path_to_root(vol, cifs_sb, tcon, + full_path = cifs_build_path_to_root(ctx, cifs_sb, tcon, tcon->Flags & SMB_SHARE_IS_IN_DFS); if (full_path == NULL) return -ENOMEM; @@ -4560,7 +3257,7 @@ static void put_root_ses(struct cifs_ses *ses) } /* Check if a path component is remote and then update @dfs_path accordingly */ -static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, +static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, const unsigned int xid, struct TCP_Server_Info *server, struct cifs_tcon *tcon, char **dfs_path) { @@ -4571,7 +3268,7 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS; int skip = added_treename; - path = cifs_build_path_to_root(vol, cifs_sb, tcon, added_treename); + path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename); if (!path) return -ENOMEM; @@ -4602,17 +3299,17 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, *s = 0; rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path); if (rc && rc == -EREMOTE) { - struct smb_vol v = {NULL}; + struct smb3_fs_context v = {NULL}; /* if @path contains a tree name, skip it in the prefix path */ if (added_treename) { - rc = cifs_parse_devname(path, &v); + rc = smb3_parse_devname(path, &v); if (rc) break; rc = -EREMOTE; npath = build_unc_path_to_root(&v, cifs_sb, true); - cifs_cleanup_volume_info_contents(&v); + smb3_cleanup_fs_context_contents(&v); } else { - v.UNC = vol->UNC; + v.UNC = ctx->UNC; v.prepath = path + 1; npath = build_unc_path_to_root(&v, cifs_sb, true); } @@ -4630,7 +3327,7 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, return rc; } -int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) +int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { int rc = 0; unsigned int xid; @@ -4642,7 +3339,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) char *oldmnt = NULL; char *mntdata = NULL; - rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon); + rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); /* * Unconditionally try to get an DFS referral (even cached) to determine whether it is an * DFS mount. @@ -4650,26 +3347,27 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem * to respond with PATH_NOT_COVERED to requests that include the prefix. */ - if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), vol->UNC + 1, NULL, + if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL, NULL)) { /* No DFS referral was returned. Looks like a regular share. */ if (rc) goto error; /* Check if it is fully accessible and then mount it */ - rc = is_path_remote(cifs_sb, vol, xid, server, tcon); + rc = is_path_remote(cifs_sb, ctx, xid, server, tcon); if (!rc) goto out; if (rc != -EREMOTE) goto error; } /* Save mount options */ - mntdata = kstrndup(cifs_sb->mountdata, strlen(cifs_sb->mountdata), GFP_KERNEL); + mntdata = kstrndup(cifs_sb->ctx->mount_options, + strlen(cifs_sb->ctx->mount_options), GFP_KERNEL); if (!mntdata) { rc = -ENOMEM; goto error; } /* Get path of DFS root */ - ref_path = build_unc_path_to_root(vol, cifs_sb, false); + ref_path = build_unc_path_to_root(ctx, cifs_sb, false); if (IS_ERR(ref_path)) { rc = PTR_ERR(ref_path); ref_path = NULL; @@ -4680,25 +3378,25 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) do { /* Save full path of last DFS path we used to resolve final target server */ kfree(full_path); - full_path = build_unc_path_to_root(vol, cifs_sb, !!count); + full_path = build_unc_path_to_root(ctx, cifs_sb, !!count); if (IS_ERR(full_path)) { rc = PTR_ERR(full_path); full_path = NULL; break; } /* Chase referral */ - oldmnt = cifs_sb->mountdata; - rc = expand_dfs_referral(xid, root_ses, vol, cifs_sb, ref_path + 1); + oldmnt = cifs_sb->ctx->mount_options; + rc = expand_dfs_referral(xid, root_ses, ctx, cifs_sb, ref_path + 1); if (rc) break; /* Connect to new DFS target only if we were redirected */ - if (oldmnt != cifs_sb->mountdata) { + if (oldmnt != cifs_sb->ctx->mount_options) { mount_put_conns(cifs_sb, xid, server, ses, tcon); - rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon); + rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); } if (rc && !server && !ses) { /* Failed to connect. Try to connect to other targets in the referral. */ - rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, vol, root_ses, &xid, + rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, ctx, root_ses, &xid, &server, &ses, &tcon); } if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses) @@ -4711,7 +3409,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) set_root_ses(cifs_sb, ses, &root_ses); } /* Check for remaining path components and then continue chasing them (-EREMOTE) */ - rc = check_dfs_prepath(cifs_sb, vol, xid, server, tcon, &ref_path); + rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path); /* Prevent recursion on broken link referrals */ if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS) rc = -ELOOP; @@ -4742,8 +3440,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) tcon->remap = cifs_remap(cifs_sb); spin_unlock(&cifs_tcp_ses_lock); - /* Add original volume information for DFS cache to be used when refreshing referrals */ - rc = dfs_cache_add_vol(mntdata, vol, cifs_sb->origin_fullpath); + /* Add original context for DFS cache to be used when refreshing referrals */ + rc = dfs_cache_add_vol(mntdata, ctx, cifs_sb->origin_fullpath); if (rc) goto error; /* @@ -4758,12 +3456,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) */ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; kfree(cifs_sb->prepath); - cifs_sb->prepath = vol->prepath; - vol->prepath = NULL; + cifs_sb->prepath = ctx->prepath; + ctx->prepath = NULL; out: free_xid(xid); - cifs_try_adding_channels(ses); + cifs_try_adding_channels(cifs_sb, ses); return mount_setup_tlink(cifs_sb, ses, tcon); error: @@ -4776,7 +3474,7 @@ error: return rc; } #else -int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) +int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { int rc = 0; unsigned int xid; @@ -4784,12 +3482,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) struct cifs_tcon *tcon; struct TCP_Server_Info *server; - rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon); + rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); if (rc) goto error; if (tcon) { - rc = is_path_remote(cifs_sb, vol, xid, server, tcon); + rc = is_path_remote(cifs_sb, ctx, xid, server, tcon); if (rc == -EREMOTE) rc = -EOPNOTSUPP; if (rc) @@ -4970,9 +3668,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, static void delayed_free(struct rcu_head *p) { - struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu); - unload_nls(sbi->local_nls); - kfree(sbi); + struct cifs_sb_info *cifs_sb = container_of(p, struct cifs_sb_info, rcu); + + unload_nls(cifs_sb->local_nls); + smb3_cleanup_fs_context(cifs_sb->ctx); + kfree(cifs_sb); } void @@ -4997,7 +3697,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); - kfree(cifs_sb->mountdata); kfree(cifs_sb->prepath); #ifdef CONFIG_CIFS_DFS_UPCALL dfs_cache_del_vol(cifs_sb->origin_fullpath); @@ -5066,15 +3765,15 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, } static int -cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) +cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses) { - vol->sectype = ses->sectype; + ctx->sectype = ses->sectype; /* krb5 is special, since we don't need username or pw */ - if (vol->sectype == Kerberos) + if (ctx->sectype == Kerberos) return 0; - return cifs_set_cifscreds(vol, ses); + return cifs_set_cifscreds(ctx, ses); } static struct cifs_tcon * @@ -5084,31 +3783,34 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); struct cifs_ses *ses; struct cifs_tcon *tcon = NULL; - struct smb_vol *vol_info; + struct smb3_fs_context *ctx; - vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); - if (vol_info == NULL) + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx == NULL) return ERR_PTR(-ENOMEM); - vol_info->local_nls = cifs_sb->local_nls; - vol_info->linux_uid = fsuid; - vol_info->cred_uid = fsuid; - vol_info->UNC = master_tcon->treeName; - vol_info->retry = master_tcon->retry; - vol_info->nocase = master_tcon->nocase; - vol_info->nohandlecache = master_tcon->nohandlecache; - vol_info->local_lease = master_tcon->local_lease; - vol_info->no_lease = master_tcon->no_lease; - vol_info->resilient = master_tcon->use_resilient; - vol_info->persistent = master_tcon->use_persistent; - vol_info->handle_timeout = master_tcon->handle_timeout; - vol_info->no_linux_ext = !master_tcon->unix_ext; - vol_info->linux_ext = master_tcon->posix_extensions; - vol_info->sectype = master_tcon->ses->sectype; - vol_info->sign = master_tcon->ses->sign; - vol_info->seal = master_tcon->seal; - - rc = cifs_set_vol_auth(vol_info, master_tcon->ses); + ctx->local_nls = cifs_sb->local_nls; + ctx->linux_uid = fsuid; + ctx->cred_uid = fsuid; + ctx->UNC = master_tcon->treeName; + ctx->retry = master_tcon->retry; + ctx->nocase = master_tcon->nocase; + ctx->nohandlecache = master_tcon->nohandlecache; + ctx->local_lease = master_tcon->local_lease; + ctx->no_lease = master_tcon->no_lease; + ctx->resilient = master_tcon->use_resilient; + ctx->persistent = master_tcon->use_persistent; + ctx->handle_timeout = master_tcon->handle_timeout; + ctx->no_linux_ext = !master_tcon->unix_ext; + ctx->linux_ext = master_tcon->posix_extensions; + ctx->sectype = master_tcon->ses->sectype; + ctx->sign = master_tcon->ses->sign; + ctx->seal = master_tcon->seal; +#ifdef CONFIG_CIFS_SWN_UPCALL + ctx->witness = master_tcon->use_witness; +#endif + + rc = cifs_set_vol_auth(ctx, master_tcon->ses); if (rc) { tcon = ERR_PTR(rc); goto out; @@ -5119,26 +3821,26 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ++master_tcon->ses->server->srv_count; spin_unlock(&cifs_tcp_ses_lock); - ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info); + ses = cifs_get_smb_ses(master_tcon->ses->server, ctx); if (IS_ERR(ses)) { tcon = (struct cifs_tcon *)ses; cifs_put_tcp_session(master_tcon->ses->server, 0); goto out; } - tcon = cifs_get_tcon(ses, vol_info); + tcon = cifs_get_tcon(ses, ctx); if (IS_ERR(tcon)) { cifs_put_smb_ses(ses); goto out; } if (cap_unix(ses)) - reset_cifs_unix_caps(0, tcon, NULL, vol_info); + reset_cifs_unix_caps(0, tcon, NULL, ctx); out: - kfree(vol_info->username); - kfree_sensitive(vol_info->password); - kfree(vol_info); + kfree(ctx->username); + kfree_sensitive(ctx->password); + kfree(ctx); return tcon; } diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 6ee849698962..6ad6ba5f6ebe 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -18,6 +18,7 @@ #include "cifs_debug.h" #include "cifs_unicode.h" #include "smb2glob.h" +#include "fs_context.h" #include "dfs_cache.h" @@ -48,8 +49,8 @@ struct cache_entry { struct vol_info { char *fullpath; - spinlock_t smb_vol_lock; - struct smb_vol smb_vol; + spinlock_t ctx_lock; + struct smb3_fs_context ctx; char *mntdata; struct list_head list; struct list_head rlist; @@ -586,7 +587,7 @@ static void __vol_release(struct vol_info *vi) { kfree(vi->fullpath); kfree(vi->mntdata); - cifs_cleanup_volume_info_contents(&vi->smb_vol); + smb3_cleanup_fs_context_contents(&vi->ctx); kfree(vi); } @@ -1140,80 +1141,22 @@ out_unlock: return rc; } -static int dup_vol(struct smb_vol *vol, struct smb_vol *new) -{ - memcpy(new, vol, sizeof(*new)); - - if (vol->username) { - new->username = kstrndup(vol->username, strlen(vol->username), - GFP_KERNEL); - if (!new->username) - return -ENOMEM; - } - if (vol->password) { - new->password = kstrndup(vol->password, strlen(vol->password), - GFP_KERNEL); - if (!new->password) - goto err_free_username; - } - if (vol->UNC) { - cifs_dbg(FYI, "%s: vol->UNC: %s\n", __func__, vol->UNC); - new->UNC = kstrndup(vol->UNC, strlen(vol->UNC), GFP_KERNEL); - if (!new->UNC) - goto err_free_password; - } - if (vol->domainname) { - new->domainname = kstrndup(vol->domainname, - strlen(vol->domainname), GFP_KERNEL); - if (!new->domainname) - goto err_free_unc; - } - if (vol->iocharset) { - new->iocharset = kstrndup(vol->iocharset, - strlen(vol->iocharset), GFP_KERNEL); - if (!new->iocharset) - goto err_free_domainname; - } - if (vol->prepath) { - cifs_dbg(FYI, "%s: vol->prepath: %s\n", __func__, vol->prepath); - new->prepath = kstrndup(vol->prepath, strlen(vol->prepath), - GFP_KERNEL); - if (!new->prepath) - goto err_free_iocharset; - } - - return 0; - -err_free_iocharset: - kfree(new->iocharset); -err_free_domainname: - kfree(new->domainname); -err_free_unc: - kfree(new->UNC); -err_free_password: - kfree_sensitive(new->password); -err_free_username: - kfree(new->username); - kfree(new); - return -ENOMEM; -} - /** - * dfs_cache_add_vol - add a cifs volume during mount() that will be handled by + * dfs_cache_add_vol - add a cifs context during mount() that will be handled by * DFS cache refresh worker. * * @mntdata: mount data. - * @vol: cifs volume. + * @ctx: cifs context. * @fullpath: origin full path. * - * Return zero if volume was set up correctly, otherwise non-zero. + * Return zero if context was set up correctly, otherwise non-zero. */ -int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol, const char *fullpath) +int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx, const char *fullpath) { int rc; struct vol_info *vi; - if (!vol || !fullpath || !mntdata) + if (!ctx || !fullpath || !mntdata) return -EINVAL; cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath); @@ -1228,12 +1171,12 @@ int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol, const char *fullpath) goto err_free_vi; } - rc = dup_vol(vol, &vi->smb_vol); + rc = smb3_fs_context_dup(&vi->ctx, ctx); if (rc) goto err_free_fullpath; vi->mntdata = mntdata; - spin_lock_init(&vi->smb_vol_lock); + spin_lock_init(&vi->ctx_lock); kref_init(&vi->refcnt); spin_lock(&vol_list_lock); @@ -1289,10 +1232,10 @@ int dfs_cache_update_vol(const char *fullpath, struct TCP_Server_Info *server) spin_unlock(&vol_list_lock); cifs_dbg(FYI, "%s: updating volume info\n", __func__); - spin_lock(&vi->smb_vol_lock); - memcpy(&vi->smb_vol.dstaddr, &server->dstaddr, - sizeof(vi->smb_vol.dstaddr)); - spin_unlock(&vi->smb_vol_lock); + spin_lock(&vi->ctx_lock); + memcpy(&vi->ctx.dstaddr, &server->dstaddr, + sizeof(vi->ctx.dstaddr)); + spin_unlock(&vi->ctx_lock); kref_put(&vi->refcnt, vol_release); @@ -1445,11 +1388,11 @@ static inline void put_tcp_server(struct TCP_Server_Info *server) cifs_put_tcp_session(server, 0); } -static struct TCP_Server_Info *get_tcp_server(struct smb_vol *vol) +static struct TCP_Server_Info *get_tcp_server(struct smb3_fs_context *ctx) { struct TCP_Server_Info *server; - server = cifs_find_tcp_session(vol); + server = cifs_find_tcp_session(ctx); if (IS_ERR_OR_NULL(server)) return NULL; @@ -1473,10 +1416,10 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi, int rc; struct cache_entry *ce; struct dfs_info3_param ref = {0}; - char *mdata = NULL, *devname = NULL; + char *mdata = NULL; struct TCP_Server_Info *server; struct cifs_ses *ses; - struct smb_vol vol = {NULL}; + struct smb3_fs_context ctx = {NULL}; rpath = get_dfs_root(path); if (IS_ERR(rpath)) @@ -1500,8 +1443,7 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi, up_read(&htable_rw_lock); - mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref, - &devname); + mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref); free_dfs_info_param(&ref); if (IS_ERR(mdata)) { @@ -1510,24 +1452,23 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi, goto out; } - rc = cifs_setup_volume_info(&vol, mdata, devname, false); - kfree(devname); + rc = cifs_setup_volume_info(&ctx); if (rc) { ses = ERR_PTR(rc); goto out; } - server = get_tcp_server(&vol); + server = get_tcp_server(&ctx); if (!server) { ses = ERR_PTR(-EHOSTDOWN); goto out; } - ses = cifs_get_smb_ses(server, &vol); + ses = cifs_get_smb_ses(server, &ctx); out: - cifs_cleanup_volume_info_contents(&vol); + smb3_cleanup_fs_context_contents(&ctx); kfree(mdata); kfree(rpath); @@ -1619,7 +1560,7 @@ static void refresh_cache_worker(struct work_struct *work) */ spin_lock(&vol_list_lock); list_for_each_entry(vi, &vol_list, list) { - server = get_tcp_server(&vi->smb_vol); + server = get_tcp_server(&vi->ctx); if (!server) continue; @@ -1631,9 +1572,9 @@ static void refresh_cache_worker(struct work_struct *work) /* Walk through all TCONs and refresh any expired cache entry */ list_for_each_entry_safe(vi, nvi, &vols, rlist) { - spin_lock(&vi->smb_vol_lock); - server = get_tcp_server(&vi->smb_vol); - spin_unlock(&vi->smb_vol_lock); + spin_lock(&vi->ctx_lock); + server = get_tcp_server(&vi->ctx); + spin_unlock(&vi->ctx_lock); if (!server) goto next_vol; diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index 3d7c05194536..1afc4f590c47 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -44,7 +44,7 @@ dfs_cache_noreq_update_tgthint(const char *path, extern int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it, struct dfs_info3_param *ref); -extern int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol, +extern int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx, const char *fullpath); extern int dfs_cache_update_vol(const char *fullpath, struct TCP_Server_Info *server); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 398c1eef7190..68900f1629bf 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -33,6 +33,7 @@ #include "cifs_debug.h" #include "cifs_fs_sb.h" #include "cifs_unicode.h" +#include "fs_context.h" static void renew_parental_timestamps(struct dentry *direntry) @@ -46,10 +47,10 @@ renew_parental_timestamps(struct dentry *direntry) } char * -cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, +cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, int add_treename) { - int pplen = vol->prepath ? strlen(vol->prepath) + 1 : 0; + int pplen = ctx->prepath ? strlen(ctx->prepath) + 1 : 0; int dfsplen; char *full_path = NULL; @@ -71,7 +72,7 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, if (dfsplen) memcpy(full_path, tcon->treeName, dfsplen); full_path[dfsplen] = CIFS_DIR_SEP(cifs_sb); - memcpy(full_path + dfsplen + 1, vol->prepath, pplen); + memcpy(full_path + dfsplen + 1, ctx->prepath, pplen); convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); return full_path; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index be46fab4c96d..6d001905c8e5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -44,6 +44,7 @@ #include "cifs_fs_sb.h" #include "fscache.h" #include "smbdirect.h" +#include "fs_context.h" static inline int cifs_convert_flags(unsigned int flags) { @@ -416,6 +417,8 @@ static void cifsFileInfo_put_work(struct work_struct *work) * cifsFileInfo_put - release a reference of file priv data * * Always potentially wait for oplock handler. See _cifsFileInfo_put(). + * + * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file */ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { @@ -431,8 +434,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) * * If @wait_for_oplock_handler is true and we are releasing the last * reference, wait for any running oplock break handler of the file - * and cancel any pending one. If calling this function from the - * oplock break handler, you need to pass false. + * and cancel any pending one. + * + * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file + * @wait_oplock_handler: must be false if called from oplock_break_handler + * @offload: not offloaded on close and oplock breaks * */ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, @@ -566,7 +572,7 @@ int cifs_open(struct inode *inode, struct file *file) le64_to_cpu(tcon->fsUnixInfo.Capability))) { /* can not refresh inode info since size could be stale */ rc = cifs_posix_open(full_path, &inode, inode->i_sb, - cifs_sb->mnt_file_mode /* ignored */, + cifs_sb->ctx->file_mode /* ignored */, file->f_flags, &oplock, &fid.netfid, xid); if (rc == 0) { cifs_dbg(FYI, "posix open succeeded\n"); @@ -735,7 +741,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) ~(O_CREAT | O_EXCL | O_TRUNC); rc = cifs_posix_open(full_path, NULL, inode->i_sb, - cifs_sb->mnt_file_mode /* ignored */, + cifs_sb->ctx->file_mode /* ignored */, oflags, &oplock, &cfile->fid.netfid, xid); if (rc == 0) { cifs_dbg(FYI, "posix reopen succeeded\n"); @@ -2330,7 +2336,7 @@ static int cifs_writepages(struct address_space *mapping, * If wsize is smaller than the page cache size, default to writing * one page at a time via cifs_writepage */ - if (cifs_sb->wsize < PAGE_SIZE) + if (cifs_sb->ctx->wsize < PAGE_SIZE) return generic_writepages(mapping, wbc); xid = get_xid(); @@ -2363,7 +2369,7 @@ retry: if (rc) get_file_rc = rc; - rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, &wsize, credits); if (rc != 0) { done = true; @@ -2905,7 +2911,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, break; } - rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, &wsize, credits); if (rc) break; @@ -3636,7 +3642,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, break; } - rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) break; @@ -4022,7 +4028,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) cifs_sb = CIFS_FILE_SB(file); /* FIXME: set up handlers for larger reads and/or convert to async */ - rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize); + rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); if (file->private_data == NULL) { rc = -EBADF; @@ -4407,7 +4413,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, break; } - rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) break; diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index ad6c2fed4055..0afccbbed2e6 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -6,8 +6,32 @@ * David Howells <[email protected]> */ +/* +#include <linux/module.h> +#include <linux/nsproxy.h> +#include <linux/slab.h> +#include <linux/magic.h> +#include <linux/security.h> +#include <net/net_namespace.h> +*/ + +#include <linux/ctype.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/parser.h> +#include <linux/utsname.h> +#include "cifsfs.h" +#include "cifspdu.h" #include "cifsglob.h" +#include "cifsproto.h" +#include "cifs_unicode.h" #include "cifs_debug.h" +#include "cifs_fs_sb.h" +#include "ntlmssp.h" +#include "nterr.h" +#include "rfc1002pdu.h" #include "fs_context.h" static const match_table_t cifs_smb_version_tokens = { @@ -24,77 +48,6 @@ static const match_table_t cifs_smb_version_tokens = { { Smb_version_err, NULL } }; -int -cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3) -{ - substring_t args[MAX_OPT_ARGS]; - - switch (match_token(value, cifs_smb_version_tokens, args)) { -#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - case Smb_1: - if (disable_legacy_dialects) { - cifs_dbg(VFS, "mount with legacy dialect disabled\n"); - return 1; - } - if (is_smb3) { - cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n"); - return 1; - } - cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n"); - vol->ops = &smb1_operations; - vol->vals = &smb1_values; - break; - case Smb_20: - if (disable_legacy_dialects) { - cifs_dbg(VFS, "mount with legacy dialect disabled\n"); - return 1; - } - if (is_smb3) { - cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n"); - return 1; - } - vol->ops = &smb20_operations; - vol->vals = &smb20_values; - break; -#else - case Smb_1: - cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n"); - return 1; - case Smb_20: - cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n"); - return 1; -#endif /* CIFS_ALLOW_INSECURE_LEGACY */ - case Smb_21: - vol->ops = &smb21_operations; - vol->vals = &smb21_values; - break; - case Smb_30: - vol->ops = &smb30_operations; - vol->vals = &smb30_values; - break; - case Smb_302: - vol->ops = &smb30_operations; /* currently identical with 3.0 */ - vol->vals = &smb302_values; - break; - case Smb_311: - vol->ops = &smb311_operations; - vol->vals = &smb311_values; - break; - case Smb_3any: - vol->ops = &smb30_operations; /* currently identical with 3.0 */ - vol->vals = &smb3any_values; - break; - case Smb_default: - vol->ops = &smb30_operations; /* currently identical with 3.0 */ - vol->vals = &smbdefault_values; - break; - default: - cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); - return 1; - } - return 0; -} - static const match_table_t cifs_secflavor_tokens = { { Opt_sec_krb5, "krb5" }, { Opt_sec_krb5i, "krb5i" }, @@ -112,7 +65,123 @@ static const match_table_t cifs_secflavor_tokens = { { Opt_sec_err, NULL } }; -int cifs_parse_security_flavors(char *value, struct smb_vol *vol) +const struct fs_parameter_spec smb3_fs_parameters[] = { + /* Mount options that take no arguments */ + fsparam_flag_no("user_xattr", Opt_user_xattr), + fsparam_flag_no("forceuid", Opt_forceuid), + fsparam_flag_no("multichannel", Opt_multichannel), + fsparam_flag_no("forcegid", Opt_forcegid), + fsparam_flag("noblocksend", Opt_noblocksend), + fsparam_flag("noautotune", Opt_noautotune), + fsparam_flag("nolease", Opt_nolease), + fsparam_flag_no("hard", Opt_hard), + fsparam_flag_no("soft", Opt_soft), + fsparam_flag_no("perm", Opt_perm), + fsparam_flag("nodelete", Opt_nodelete), + fsparam_flag_no("mapposix", Opt_mapposix), + fsparam_flag("mapchars", Opt_mapchars), + fsparam_flag("nomapchars", Opt_nomapchars), + fsparam_flag_no("sfu", Opt_sfu), + fsparam_flag("nodfs", Opt_nodfs), + fsparam_flag_no("posixpaths", Opt_posixpaths), + fsparam_flag_no("unix", Opt_unix), + fsparam_flag_no("linux", Opt_unix), + fsparam_flag_no("posix", Opt_unix), + fsparam_flag("nocase", Opt_nocase), + fsparam_flag("ignorecase", Opt_nocase), + fsparam_flag_no("brl", Opt_brl), + fsparam_flag_no("handlecache", Opt_handlecache), + fsparam_flag("forcemandatorylock", Opt_forcemandatorylock), + fsparam_flag("forcemand", Opt_forcemandatorylock), + fsparam_flag("setuidfromacl", Opt_setuidfromacl), + fsparam_flag("idsfromsid", Opt_setuidfromacl), + fsparam_flag_no("setuids", Opt_setuids), + fsparam_flag_no("dynperm", Opt_dynperm), + fsparam_flag_no("intr", Opt_intr), + fsparam_flag_no("strictsync", Opt_strictsync), + fsparam_flag_no("serverino", Opt_serverino), + fsparam_flag("rwpidforward", Opt_rwpidforward), + fsparam_flag("cifsacl", Opt_cifsacl), + fsparam_flag_no("acl", Opt_acl), + fsparam_flag("locallease", Opt_locallease), + fsparam_flag("sign", Opt_sign), + fsparam_flag("ignore_signature", Opt_ignore_signature), + fsparam_flag("signloosely", Opt_ignore_signature), + fsparam_flag("seal", Opt_seal), + fsparam_flag("noac", Opt_noac), + fsparam_flag("fsc", Opt_fsc), + fsparam_flag("mfsymlinks", Opt_mfsymlinks), + fsparam_flag("multiuser", Opt_multiuser), + fsparam_flag("sloppy", Opt_sloppy), + fsparam_flag("nosharesock", Opt_nosharesock), + fsparam_flag_no("persistenthandles", Opt_persistent), + fsparam_flag_no("resilienthandles", Opt_resilient), + fsparam_flag("domainauto", Opt_domainauto), + fsparam_flag("rdma", Opt_rdma), + fsparam_flag("modesid", Opt_modesid), + fsparam_flag("modefromsid", Opt_modesid), + fsparam_flag("rootfs", Opt_rootfs), + fsparam_flag("compress", Opt_compress), + fsparam_flag("witness", Opt_witness), + + /* Mount options which take numeric value */ + fsparam_u32("backupuid", Opt_backupuid), + fsparam_u32("backupgid", Opt_backupgid), + fsparam_u32("uid", Opt_uid), + fsparam_u32("cruid", Opt_cruid), + fsparam_u32("gid", Opt_gid), + fsparam_u32("file_mode", Opt_file_mode), + fsparam_u32("dirmode", Opt_dirmode), + fsparam_u32("dir_mode", Opt_dirmode), + fsparam_u32("port", Opt_port), + fsparam_u32("min_enc_offload", Opt_min_enc_offload), + fsparam_u32("esize", Opt_min_enc_offload), + fsparam_u32("bsize", Opt_blocksize), + fsparam_u32("rsize", Opt_rsize), + fsparam_u32("wsize", Opt_wsize), + fsparam_u32("actimeo", Opt_actimeo), + fsparam_u32("echo_interval", Opt_echo_interval), + fsparam_u32("max_credits", Opt_max_credits), + fsparam_u32("handletimeout", Opt_handletimeout), + fsparam_u32("snapshot", Opt_snapshot), + fsparam_u32("max_channels", Opt_max_channels), + + /* Mount options which take string value */ + fsparam_string("source", Opt_source), + fsparam_string("unc", Opt_source), + fsparam_string("user", Opt_user), + fsparam_string("username", Opt_user), + fsparam_string("pass", Opt_pass), + fsparam_string("password", Opt_pass), + fsparam_string("ip", Opt_ip), + fsparam_string("addr", Opt_ip), + fsparam_string("domain", Opt_domain), + fsparam_string("dom", Opt_domain), + fsparam_string("srcaddr", Opt_srcaddr), + fsparam_string("iocharset", Opt_iocharset), + fsparam_string("netbiosname", Opt_netbiosname), + fsparam_string("servern", Opt_servern), + fsparam_string("ver", Opt_ver), + fsparam_string("vers", Opt_vers), + fsparam_string("sec", Opt_sec), + fsparam_string("cache", Opt_cache), + + /* Arguments that should be ignored */ + fsparam_flag("guest", Opt_ignore), + fsparam_flag("noatime", Opt_ignore), + fsparam_flag("relatime", Opt_ignore), + fsparam_flag("_netdev", Opt_ignore), + fsparam_flag_no("suid", Opt_ignore), + fsparam_flag_no("exec", Opt_ignore), + fsparam_flag_no("dev", Opt_ignore), + fsparam_flag_no("mand", Opt_ignore), + fsparam_string("cred", Opt_ignore), + fsparam_string("credentials", Opt_ignore), + {} +}; + +int +cifs_parse_security_flavors(char *value, struct smb3_fs_context *ctx) { substring_t args[MAX_OPT_ARGS]; @@ -121,44 +190,44 @@ int cifs_parse_security_flavors(char *value, struct smb_vol *vol) * With mount options, the last one should win. Reset any existing * settings back to default. */ - vol->sectype = Unspecified; - vol->sign = false; + ctx->sectype = Unspecified; + ctx->sign = false; switch (match_token(value, cifs_secflavor_tokens, args)) { case Opt_sec_krb5p: cifs_dbg(VFS, "sec=krb5p is not supported!\n"); return 1; case Opt_sec_krb5i: - vol->sign = true; + ctx->sign = true; fallthrough; case Opt_sec_krb5: - vol->sectype = Kerberos; + ctx->sectype = Kerberos; break; case Opt_sec_ntlmsspi: - vol->sign = true; + ctx->sign = true; fallthrough; case Opt_sec_ntlmssp: - vol->sectype = RawNTLMSSP; + ctx->sectype = RawNTLMSSP; break; case Opt_sec_ntlmi: - vol->sign = true; + ctx->sign = true; fallthrough; case Opt_ntlm: - vol->sectype = NTLM; + ctx->sectype = NTLM; break; case Opt_sec_ntlmv2i: - vol->sign = true; + ctx->sign = true; fallthrough; case Opt_sec_ntlmv2: - vol->sectype = NTLMv2; + ctx->sectype = NTLMv2; break; #ifdef CONFIG_CIFS_WEAK_PW_HASH case Opt_sec_lanman: - vol->sectype = LANMAN; + ctx->sectype = LANMAN; break; #endif case Opt_sec_none: - vol->nullauth = 1; + ctx->nullauth = 1; break; default: cifs_dbg(VFS, "bad security option: %s\n", value); @@ -178,40 +247,40 @@ static const match_table_t cifs_cacheflavor_tokens = { }; int -cifs_parse_cache_flavor(char *value, struct smb_vol *vol) +cifs_parse_cache_flavor(char *value, struct smb3_fs_context *ctx) { substring_t args[MAX_OPT_ARGS]; switch (match_token(value, cifs_cacheflavor_tokens, args)) { case Opt_cache_loose: - vol->direct_io = false; - vol->strict_io = false; - vol->cache_ro = false; - vol->cache_rw = false; + ctx->direct_io = false; + ctx->strict_io = false; + ctx->cache_ro = false; + ctx->cache_rw = false; break; case Opt_cache_strict: - vol->direct_io = false; - vol->strict_io = true; - vol->cache_ro = false; - vol->cache_rw = false; + ctx->direct_io = false; + ctx->strict_io = true; + ctx->cache_ro = false; + ctx->cache_rw = false; break; case Opt_cache_none: - vol->direct_io = true; - vol->strict_io = false; - vol->cache_ro = false; - vol->cache_rw = false; + ctx->direct_io = true; + ctx->strict_io = false; + ctx->cache_ro = false; + ctx->cache_rw = false; break; case Opt_cache_ro: - vol->direct_io = false; - vol->strict_io = false; - vol->cache_ro = true; - vol->cache_rw = false; + ctx->direct_io = false; + ctx->strict_io = false; + ctx->cache_ro = true; + ctx->cache_rw = false; break; case Opt_cache_rw: - vol->direct_io = false; - vol->strict_io = false; - vol->cache_ro = false; - vol->cache_rw = true; + ctx->direct_io = false; + ctx->strict_io = false; + ctx->cache_ro = false; + ctx->cache_rw = true; break; default: cifs_dbg(VFS, "bad cache= option: %s\n", value); @@ -219,3 +288,1253 @@ cifs_parse_cache_flavor(char *value, struct smb_vol *vol) } return 0; } + +#define DUP_CTX_STR(field) \ +do { \ + if (ctx->field) { \ + new_ctx->field = kstrdup(ctx->field, GFP_ATOMIC); \ + if (new_ctx->field == NULL) { \ + smb3_cleanup_fs_context_contents(new_ctx); \ + return -ENOMEM; \ + } \ + } \ +} while (0) + +int +smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx) +{ + int rc = 0; + + memcpy(new_ctx, ctx, sizeof(*ctx)); + new_ctx->prepath = NULL; + new_ctx->mount_options = NULL; + new_ctx->nodename = NULL; + new_ctx->username = NULL; + new_ctx->password = NULL; + new_ctx->domainname = NULL; + new_ctx->UNC = NULL; + new_ctx->iocharset = NULL; + + /* + * Make sure to stay in sync with smb3_cleanup_fs_context_contents() + */ + DUP_CTX_STR(prepath); + DUP_CTX_STR(mount_options); + DUP_CTX_STR(username); + DUP_CTX_STR(password); + DUP_CTX_STR(UNC); + DUP_CTX_STR(domainname); + DUP_CTX_STR(nodename); + DUP_CTX_STR(iocharset); + + return rc; +} + +static int +cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3) +{ + substring_t args[MAX_OPT_ARGS]; + + switch (match_token(value, cifs_smb_version_tokens, args)) { +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + case Smb_1: + if (disable_legacy_dialects) { + cifs_dbg(VFS, "mount with legacy dialect disabled\n"); + return 1; + } + if (is_smb3) { + cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n"); + return 1; + } + cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n"); + ctx->ops = &smb1_operations; + ctx->vals = &smb1_values; + break; + case Smb_20: + if (disable_legacy_dialects) { + cifs_dbg(VFS, "mount with legacy dialect disabled\n"); + return 1; + } + if (is_smb3) { + cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n"); + return 1; + } + ctx->ops = &smb20_operations; + ctx->vals = &smb20_values; + break; +#else + case Smb_1: + cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n"); + return 1; + case Smb_20: + cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n"); + return 1; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ + case Smb_21: + ctx->ops = &smb21_operations; + ctx->vals = &smb21_values; + break; + case Smb_30: + ctx->ops = &smb30_operations; + ctx->vals = &smb30_values; + break; + case Smb_302: + ctx->ops = &smb30_operations; /* currently identical with 3.0 */ + ctx->vals = &smb302_values; + break; + case Smb_311: + ctx->ops = &smb311_operations; + ctx->vals = &smb311_values; + break; + case Smb_3any: + ctx->ops = &smb30_operations; /* currently identical with 3.0 */ + ctx->vals = &smb3any_values; + break; + case Smb_default: + ctx->ops = &smb30_operations; /* currently identical with 3.0 */ + ctx->vals = &smbdefault_values; + break; + default: + cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); + return 1; + } + return 0; +} + +/* + * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath + * fields with the result. Returns 0 on success and an error otherwise + * (e.g. ENOMEM or EINVAL) + */ +int +smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) +{ + char *pos; + const char *delims = "/\\"; + size_t len; + + if (unlikely(!devname || !*devname)) { + cifs_dbg(VFS, "Device name not specified\n"); + return -EINVAL; + } + + /* make sure we have a valid UNC double delimiter prefix */ + len = strspn(devname, delims); + if (len != 2) + return -EINVAL; + + /* find delimiter between host and sharename */ + pos = strpbrk(devname + 2, delims); + if (!pos) + return -EINVAL; + + /* skip past delimiter */ + ++pos; + + /* now go until next delimiter or end of string */ + len = strcspn(pos, delims); + + /* move "pos" up to delimiter or NULL */ + pos += len; + ctx->UNC = kstrndup(devname, pos - devname, GFP_KERNEL); + if (!ctx->UNC) + return -ENOMEM; + + convert_delimiter(ctx->UNC, '\\'); + + /* skip any delimiter */ + if (*pos == '/' || *pos == '\\') + pos++; + + /* If pos is NULL then no prepath */ + if (!*pos) + return 0; + + ctx->prepath = kstrdup(pos, GFP_KERNEL); + if (!ctx->prepath) + return -ENOMEM; + + return 0; +} + +static void smb3_fs_context_free(struct fs_context *fc); +static int smb3_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param); +static int smb3_fs_context_parse_monolithic(struct fs_context *fc, + void *data); +static int smb3_get_tree(struct fs_context *fc); +static int smb3_reconfigure(struct fs_context *fc); + +static const struct fs_context_operations smb3_fs_context_ops = { + .free = smb3_fs_context_free, + .parse_param = smb3_fs_context_parse_param, + .parse_monolithic = smb3_fs_context_parse_monolithic, + .get_tree = smb3_get_tree, + .reconfigure = smb3_reconfigure, +}; + +/* + * Parse a monolithic block of data from sys_mount(). + * smb3_fs_context_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * @ctx: The superblock configuration to fill in. + * @data: The data to parse + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be + * called from the ->monolithic_mount_data() fs_context operation. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +static int smb3_fs_context_parse_monolithic(struct fs_context *fc, + void *data) +{ + struct smb3_fs_context *ctx = smb3_fc2context(fc); + char *options = data, *key; + int ret = 0; + + if (!options) + return 0; + + ctx->mount_options = kstrdup(data, GFP_KERNEL); + if (ctx->mount_options == NULL) + return -ENOMEM; + + ret = security_sb_eat_lsm_opts(options, &fc->security); + if (ret) + return ret; + + /* BB Need to add support for sep= here TBD */ + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); + + if (value) { + if (value == key) + continue; + *value++ = 0; + v_len = strlen(value); + } + ret = vfs_parse_fs_string(fc, key, value, v_len); + if (ret < 0) + break; + } + } + + return ret; +} + +/* + * Validate the preparsed information in the config. + */ +static int smb3_fs_context_validate(struct fs_context *fc) +{ + struct smb3_fs_context *ctx = smb3_fc2context(fc); + + if (ctx->rdma && ctx->vals->protocol_id < SMB30_PROT_ID) { + cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n"); + return -1; + } + +#ifndef CONFIG_KEYS + /* Muliuser mounts require CONFIG_KEYS support */ + if (ctx->multiuser) { + cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n"); + return -1; + } +#endif + + if (ctx->got_version == false) + pr_warn_once("No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3.1.1), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3.1.1 (or even SMB3 or SMB2.1) specify vers=1.0 on mount.\n"); + + + if (!ctx->UNC) { + cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n"); + return -1; + } + + /* make sure UNC has a share name */ + if (strlen(ctx->UNC) < 3 || !strchr(ctx->UNC + 3, '\\')) { + cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n"); + return -1; + } + + if (!ctx->got_ip) { + int len; + const char *slash; + + /* No ip= option specified? Try to get it from UNC */ + /* Use the address part of the UNC. */ + slash = strchr(&ctx->UNC[2], '\\'); + len = slash - &ctx->UNC[2]; + if (!cifs_convert_address((struct sockaddr *)&ctx->dstaddr, + &ctx->UNC[2], len)) { + pr_err("Unable to determine destination address\n"); + return -1; + } + } + + /* set the port that we got earlier */ + cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port); + + if (ctx->override_uid && !ctx->uid_specified) { + ctx->override_uid = 0; + pr_notice("ignoring forceuid mount option specified with no uid= option\n"); + } + + if (ctx->override_gid && !ctx->gid_specified) { + ctx->override_gid = 0; + pr_notice("ignoring forcegid mount option specified with no gid= option\n"); + } + + return 0; +} + +static int smb3_get_tree_common(struct fs_context *fc) +{ + struct smb3_fs_context *ctx = smb3_fc2context(fc); + struct dentry *root; + int rc = 0; + + root = cifs_smb3_do_mount(fc->fs_type, 0, ctx); + if (IS_ERR(root)) + return PTR_ERR(root); + + fc->root = root; + + return rc; +} + +/* + * Create an SMB3 superblock from the parameters passed. + */ +static int smb3_get_tree(struct fs_context *fc) +{ + int err = smb3_fs_context_validate(fc); + + if (err) + return err; + return smb3_get_tree_common(fc); +} + +static void smb3_fs_context_free(struct fs_context *fc) +{ + struct smb3_fs_context *ctx = smb3_fc2context(fc); + + smb3_cleanup_fs_context(ctx); +} + +/* + * Compare the old and new proposed context during reconfigure + * and check if the changes are compatible. + */ +static int smb3_verify_reconfigure_ctx(struct smb3_fs_context *new_ctx, + struct smb3_fs_context *old_ctx) +{ + if (new_ctx->posix_paths != old_ctx->posix_paths) { + cifs_dbg(VFS, "can not change posixpaths during remount\n"); + return -EINVAL; + } + if (new_ctx->sectype != old_ctx->sectype) { + cifs_dbg(VFS, "can not change sec during remount\n"); + return -EINVAL; + } + if (new_ctx->multiuser != old_ctx->multiuser) { + cifs_dbg(VFS, "can not change multiuser during remount\n"); + return -EINVAL; + } + if (new_ctx->UNC && + (!old_ctx->UNC || strcmp(new_ctx->UNC, old_ctx->UNC))) { + cifs_dbg(VFS, "can not change UNC during remount\n"); + return -EINVAL; + } + if (new_ctx->username && + (!old_ctx->username || strcmp(new_ctx->username, old_ctx->username))) { + cifs_dbg(VFS, "can not change username during remount\n"); + return -EINVAL; + } + if (new_ctx->password && + (!old_ctx->password || strcmp(new_ctx->password, old_ctx->password))) { + cifs_dbg(VFS, "can not change password during remount\n"); + return -EINVAL; + } + if (new_ctx->domainname && + (!old_ctx->domainname || strcmp(new_ctx->domainname, old_ctx->domainname))) { + cifs_dbg(VFS, "can not change domainname during remount\n"); + return -EINVAL; + } + if (new_ctx->nodename && + (!old_ctx->nodename || strcmp(new_ctx->nodename, old_ctx->nodename))) { + cifs_dbg(VFS, "can not change nodename during remount\n"); + return -EINVAL; + } + if (new_ctx->iocharset && + (!old_ctx->iocharset || strcmp(new_ctx->iocharset, old_ctx->iocharset))) { + cifs_dbg(VFS, "can not change iocharset during remount\n"); + return -EINVAL; + } + + return 0; +} + +#define STEAL_STRING(cifs_sb, ctx, field) \ +do { \ + kfree(ctx->field); \ + ctx->field = cifs_sb->ctx->field; \ + cifs_sb->ctx->field = NULL; \ +} while (0) + +static int smb3_reconfigure(struct fs_context *fc) +{ + struct smb3_fs_context *ctx = smb3_fc2context(fc); + struct dentry *root = fc->root; + struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); + int rc; + + rc = smb3_verify_reconfigure_ctx(ctx, cifs_sb->ctx); + if (rc) + return rc; + + /* + * We can not change UNC/username/password/domainname/nodename/iocharset + * during reconnect so ignore what we have in the new context and + * just use what we already have in cifs_sb->ctx. + */ + STEAL_STRING(cifs_sb, ctx, UNC); + STEAL_STRING(cifs_sb, ctx, username); + STEAL_STRING(cifs_sb, ctx, password); + STEAL_STRING(cifs_sb, ctx, domainname); + STEAL_STRING(cifs_sb, ctx, nodename); + STEAL_STRING(cifs_sb, ctx, iocharset); + + /* if rsize or wsize not passed in on remount, use previous values */ + if (ctx->rsize == 0) + ctx->rsize = cifs_sb->ctx->rsize; + if (ctx->wsize == 0) + ctx->wsize = cifs_sb->ctx->wsize; + + + smb3_cleanup_fs_context_contents(cifs_sb->ctx); + rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); + smb3_update_mnt_flags(cifs_sb); + + return rc; +} + +static int smb3_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct fs_parse_result result; + struct smb3_fs_context *ctx = smb3_fc2context(fc); + int i, opt; + bool is_smb3 = !strcmp(fc->fs_type->name, "smb3"); + bool skip_parsing = false; + + cifs_dbg(FYI, "CIFS: parsing cifs mount option '%s'\n", param->key); + + /* + * fs_parse can not handle string options with an empty value so + * we will need special handling of them. + */ + if (param->type == fs_value_is_string && param->string[0] == 0) { + if (!strcmp("pass", param->key) || !strcmp("password", param->key)) { + skip_parsing = true; + opt = Opt_pass; + } else if (!strcmp("user", param->key) || !strcmp("username", param->key)) { + skip_parsing = true; + opt = Opt_user; + } + } + + if (!skip_parsing) { + opt = fs_parse(fc, smb3_fs_parameters, param, &result); + if (opt < 0) + return ctx->sloppy ? 1 : opt; + } + + switch (opt) { + case Opt_compress: + ctx->compression = UNKNOWN_TYPE; + cifs_dbg(VFS, + "SMB3 compression support is experimental\n"); + break; + case Opt_nodfs: + ctx->nodfs = 1; + break; + case Opt_hard: + if (result.negated) + ctx->retry = 0; + else + ctx->retry = 1; + break; + case Opt_soft: + if (result.negated) + ctx->retry = 1; + else + ctx->retry = 0; + break; + case Opt_mapposix: + if (result.negated) + ctx->remap = false; + else { + ctx->remap = true; + ctx->sfu_remap = false; /* disable SFU mapping */ + } + break; + case Opt_user_xattr: + if (result.negated) + ctx->no_xattr = 1; + else + ctx->no_xattr = 0; + break; + case Opt_forceuid: + if (result.negated) + ctx->override_uid = 0; + else + ctx->override_uid = 1; + break; + case Opt_forcegid: + if (result.negated) + ctx->override_gid = 0; + else + ctx->override_gid = 1; + break; + case Opt_perm: + if (result.negated) + ctx->noperm = 1; + else + ctx->noperm = 0; + break; + case Opt_dynperm: + if (result.negated) + ctx->dynperm = 0; + else + ctx->dynperm = 1; + break; + case Opt_sfu: + if (result.negated) + ctx->sfu_emul = 0; + else + ctx->sfu_emul = 1; + break; + case Opt_noblocksend: + ctx->noblocksnd = 1; + break; + case Opt_noautotune: + ctx->noautotune = 1; + break; + case Opt_nolease: + ctx->no_lease = 1; + break; + case Opt_nodelete: + ctx->nodelete = 1; + break; + case Opt_multichannel: + if (result.negated) { + ctx->multichannel = false; + ctx->max_channels = 1; + } else { + ctx->multichannel = true; + /* if number of channels not specified, default to 2 */ + if (ctx->max_channels < 2) + ctx->max_channels = 2; + } + break; + case Opt_uid: + ctx->linux_uid.val = result.uint_32; + ctx->uid_specified = true; + break; + case Opt_cruid: + ctx->cred_uid.val = result.uint_32; + break; + case Opt_backupgid: + ctx->backupgid.val = result.uint_32; + ctx->backupgid_specified = true; + break; + case Opt_gid: + ctx->linux_gid.val = result.uint_32; + ctx->gid_specified = true; + break; + case Opt_port: + ctx->port = result.uint_32; + break; + case Opt_file_mode: + ctx->file_mode = result.uint_32; + break; + case Opt_dirmode: + ctx->dir_mode = result.uint_32; + break; + case Opt_min_enc_offload: + ctx->min_offload = result.uint_32; + break; + case Opt_blocksize: + /* + * inode blocksize realistically should never need to be + * less than 16K or greater than 16M and default is 1MB. + * Note that small inode block sizes (e.g. 64K) can lead + * to very poor performance of common tools like cp and scp + */ + if ((result.uint_32 < CIFS_MAX_MSGSIZE) || + (result.uint_32 > (4 * SMB3_DEFAULT_IOSIZE))) { + cifs_dbg(VFS, "%s: Invalid blocksize\n", + __func__); + goto cifs_parse_mount_err; + } + ctx->bsize = result.uint_32; + ctx->got_bsize = true; + break; + case Opt_rsize: + ctx->rsize = result.uint_32; + ctx->got_rsize = true; + break; + case Opt_wsize: + ctx->wsize = result.uint_32; + ctx->got_wsize = true; + break; + case Opt_actimeo: + ctx->actimeo = HZ * result.uint_32; + if (ctx->actimeo > CIFS_MAX_ACTIMEO) { + cifs_dbg(VFS, "attribute cache timeout too large\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_echo_interval: + ctx->echo_interval = result.uint_32; + break; + case Opt_snapshot: + ctx->snapshot_time = result.uint_32; + break; + case Opt_max_credits: + if (result.uint_32 < 20 || result.uint_32 > 60000) { + cifs_dbg(VFS, "%s: Invalid max_credits value\n", + __func__); + goto cifs_parse_mount_err; + } + ctx->max_credits = result.uint_32; + break; + case Opt_max_channels: + if (result.uint_32 < 1 || result.uint_32 > CIFS_MAX_CHANNELS) { + cifs_dbg(VFS, "%s: Invalid max_channels value, needs to be 1-%d\n", + __func__, CIFS_MAX_CHANNELS); + goto cifs_parse_mount_err; + } + ctx->max_channels = result.uint_32; + break; + case Opt_handletimeout: + ctx->handle_timeout = result.uint_32; + if (ctx->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) { + cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_source: + kfree(ctx->UNC); + ctx->UNC = NULL; + switch (smb3_parse_devname(param->string, ctx)) { + case 0: + break; + case -ENOMEM: + cifs_dbg(VFS, "Unable to allocate memory for devname\n"); + goto cifs_parse_mount_err; + case -EINVAL: + cifs_dbg(VFS, "Malformed UNC in devname\n"); + goto cifs_parse_mount_err; + default: + cifs_dbg(VFS, "Unknown error parsing devname\n"); + goto cifs_parse_mount_err; + } + fc->source = kstrdup(param->string, GFP_KERNEL); + if (fc->source == NULL) { + cifs_dbg(VFS, "OOM when copying UNC string\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_user: + kfree(ctx->username); + ctx->username = NULL; + if (strlen(param->string) == 0) { + /* null user, ie. anonymous authentication */ + ctx->nullauth = 1; + break; + } + + if (strnlen(param->string, CIFS_MAX_USERNAME_LEN) > + CIFS_MAX_USERNAME_LEN) { + pr_warn("username too long\n"); + goto cifs_parse_mount_err; + } + ctx->username = kstrdup(param->string, GFP_KERNEL); + if (ctx->username == NULL) { + cifs_dbg(VFS, "OOM when copying username string\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_pass: + kfree(ctx->password); + ctx->password = NULL; + if (strlen(param->string) == 0) + break; + + ctx->password = kstrdup(param->string, GFP_KERNEL); + if (ctx->password == NULL) { + cifs_dbg(VFS, "OOM when copying password string\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_ip: + if (strlen(param->string) == 0) { + ctx->got_ip = false; + break; + } + if (!cifs_convert_address((struct sockaddr *)&ctx->dstaddr, + param->string, + strlen(param->string))) { + pr_err("bad ip= option (%s)\n", param->string); + goto cifs_parse_mount_err; + } + ctx->got_ip = true; + break; + case Opt_domain: + if (strnlen(param->string, CIFS_MAX_DOMAINNAME_LEN) + == CIFS_MAX_DOMAINNAME_LEN) { + pr_warn("domain name too long\n"); + goto cifs_parse_mount_err; + } + + kfree(ctx->domainname); + ctx->domainname = kstrdup(param->string, GFP_KERNEL); + if (ctx->domainname == NULL) { + cifs_dbg(VFS, "OOM when copying domainname string\n"); + goto cifs_parse_mount_err; + } + cifs_dbg(FYI, "Domain name set\n"); + break; + case Opt_srcaddr: + if (!cifs_convert_address( + (struct sockaddr *)&ctx->srcaddr, + param->string, strlen(param->string))) { + pr_warn("Could not parse srcaddr: %s\n", + param->string); + goto cifs_parse_mount_err; + } + break; + case Opt_iocharset: + if (strnlen(param->string, 1024) >= 65) { + pr_warn("iocharset name too long\n"); + goto cifs_parse_mount_err; + } + + if (strncasecmp(param->string, "default", 7) != 0) { + kfree(ctx->iocharset); + ctx->iocharset = kstrdup(param->string, GFP_KERNEL); + if (ctx->iocharset == NULL) { + cifs_dbg(VFS, "OOM when copying iocharset string\n"); + goto cifs_parse_mount_err; + } + } + /* if iocharset not set then load_nls_default + * is used by caller + */ + cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset); + break; + case Opt_netbiosname: + memset(ctx->source_rfc1001_name, 0x20, + RFC1001_NAME_LEN); + /* + * FIXME: are there cases in which a comma can + * be valid in workstation netbios name (and + * need special handling)? + */ + for (i = 0; i < RFC1001_NAME_LEN; i++) { + /* don't ucase netbiosname for user */ + if (param->string[i] == 0) + break; + ctx->source_rfc1001_name[i] = param->string[i]; + } + /* The string has 16th byte zero still from + * set at top of the function + */ + if (i == RFC1001_NAME_LEN && param->string[i] != 0) + pr_warn("netbiosname longer than 15 truncated\n"); + break; + case Opt_servern: + /* last byte, type, is 0x20 for servr type */ + memset(ctx->target_rfc1001_name, 0x20, + RFC1001_NAME_LEN_WITH_NULL); + /* + * BB are there cases in which a comma can be valid in this + * workstation netbios name (and need special handling)? + */ + + /* user or mount helper must uppercase the netbios name */ + for (i = 0; i < 15; i++) { + if (param->string[i] == 0) + break; + ctx->target_rfc1001_name[i] = param->string[i]; + } + + /* The string has 16th byte zero still from set at top of function */ + if (i == RFC1001_NAME_LEN && param->string[i] != 0) + pr_warn("server netbiosname longer than 15 truncated\n"); + break; + case Opt_ver: + /* version of mount userspace tools, not dialect */ + /* If interface changes in mount.cifs bump to new ver */ + if (strncasecmp(param->string, "1", 1) == 0) { + if (strlen(param->string) > 1) { + pr_warn("Bad mount helper ver=%s. Did you want SMB1 (CIFS) dialect and mean to type vers=1.0 instead?\n", + param->string); + goto cifs_parse_mount_err; + } + /* This is the default */ + break; + } + /* For all other value, error */ + pr_warn("Invalid mount helper version specified\n"); + goto cifs_parse_mount_err; + case Opt_vers: + /* protocol version (dialect) */ + if (cifs_parse_smb_version(param->string, ctx, is_smb3) != 0) + goto cifs_parse_mount_err; + ctx->got_version = true; + break; + case Opt_sec: + if (cifs_parse_security_flavors(param->string, ctx) != 0) + goto cifs_parse_mount_err; + break; + case Opt_cache: + if (cifs_parse_cache_flavor(param->string, ctx) != 0) + goto cifs_parse_mount_err; + break; + case Opt_witness: +#ifndef CONFIG_CIFS_SWN_UPCALL + cifs_dbg(VFS, "Witness support needs CONFIG_CIFS_SWN_UPCALL config option\n"); + goto cifs_parse_mount_err; +#endif + ctx->witness = true; + pr_warn_once("Witness protocol support is experimental\n"); + break; + case Opt_rootfs: +#ifdef CONFIG_CIFS_ROOT + ctx->rootfs = true; +#endif + break; + case Opt_posixpaths: + if (result.negated) + ctx->posix_paths = 0; + else + ctx->posix_paths = 1; + break; + case Opt_unix: + if (result.negated) + ctx->linux_ext = 0; + else + ctx->no_linux_ext = 1; + break; + case Opt_nocase: + ctx->nocase = 1; + break; + case Opt_brl: + if (result.negated) { + /* + * turn off mandatory locking in mode + * if remote locking is turned off since the + * local vfs will do advisory + */ + if (ctx->file_mode == + (S_IALLUGO & ~(S_ISUID | S_IXGRP))) + ctx->file_mode = S_IALLUGO; + ctx->nobrl = 1; + } else + ctx->nobrl = 0; + break; + case Opt_handlecache: + if (result.negated) + ctx->nohandlecache = 1; + else + ctx->nohandlecache = 0; + break; + case Opt_forcemandatorylock: + ctx->mand_lock = 1; + break; + case Opt_setuids: + ctx->setuids = result.negated; + break; + case Opt_intr: + ctx->intr = !result.negated; + break; + case Opt_setuidfromacl: + ctx->setuidfromacl = 1; + break; + case Opt_strictsync: + ctx->nostrictsync = result.negated; + break; + case Opt_serverino: + ctx->server_ino = !result.negated; + break; + case Opt_rwpidforward: + ctx->rwpidforward = 1; + break; + case Opt_modesid: + ctx->mode_ace = 1; + break; + case Opt_cifsacl: + ctx->cifs_acl = !result.negated; + break; + case Opt_acl: + ctx->no_psx_acl = result.negated; + break; + case Opt_locallease: + ctx->local_lease = 1; + break; + case Opt_sign: + ctx->sign = true; + break; + case Opt_ignore_signature: + ctx->sign = true; + ctx->ignore_signature = true; + break; + case Opt_seal: + /* we do not do the following in secFlags because seal + * is a per tree connection (mount) not a per socket + * or per-smb connection option in the protocol + * vol->secFlg |= CIFSSEC_MUST_SEAL; + */ + ctx->seal = 1; + break; + case Opt_noac: + pr_warn("Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n"); + break; + case Opt_fsc: +#ifndef CONFIG_CIFS_FSCACHE + cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n"); + goto cifs_parse_mount_err; +#endif + ctx->fsc = true; + break; + case Opt_mfsymlinks: + ctx->mfsymlinks = true; + break; + case Opt_multiuser: + ctx->multiuser = true; + break; + case Opt_sloppy: + ctx->sloppy = true; + break; + case Opt_nosharesock: + ctx->nosharesock = true; + break; + case Opt_persistent: + if (result.negated) { + ctx->nopersistent = true; + if (ctx->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + } else { + ctx->persistent = true; + if ((ctx->nopersistent) || (ctx->resilient)) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + } + break; + case Opt_resilient: + if (result.negated) { + ctx->resilient = false; /* already the default */ + } else { + ctx->resilient = true; + if (ctx->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + } + break; + case Opt_domainauto: + ctx->domainauto = true; + break; + case Opt_rdma: + ctx->rdma = true; + break; + } + /* case Opt_ignore: - is ignored as expected ... */ + + return 0; + + cifs_parse_mount_err: + return 1; +} + +int smb3_init_fs_context(struct fs_context *fc) +{ + struct smb3_fs_context *ctx; + char *nodename = utsname()->nodename; + int i; + + ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL); + if (unlikely(!ctx)) + return -ENOMEM; + + /* + * does not have to be perfect mapping since field is + * informational, only used for servers that do not support + * port 445 and it can be overridden at mount time + */ + memset(ctx->source_rfc1001_name, 0x20, RFC1001_NAME_LEN); + for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++) + ctx->source_rfc1001_name[i] = toupper(nodename[i]); + + ctx->source_rfc1001_name[RFC1001_NAME_LEN] = 0; + /* + * null target name indicates to use *SMBSERVR default called name + * if we end up sending RFC1001 session initialize + */ + ctx->target_rfc1001_name[0] = 0; + ctx->cred_uid = current_uid(); + ctx->linux_uid = current_uid(); + ctx->linux_gid = current_gid(); + ctx->bsize = 1024 * 1024; /* can improve cp performance significantly */ + + /* + * default to SFM style remapping of seven reserved characters + * unless user overrides it or we negotiate CIFS POSIX where + * it is unnecessary. Can not simultaneously use more than one mapping + * since then readdir could list files that open could not open + */ + ctx->remap = true; + + /* default to only allowing write access to owner of the mount */ + ctx->dir_mode = ctx->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; + + /* ctx->retry default is 0 (i.e. "soft" limited retry not hard retry) */ + /* default is always to request posix paths. */ + ctx->posix_paths = 1; + /* default to using server inode numbers where available */ + ctx->server_ino = 1; + + /* default is to use strict cifs caching semantics */ + ctx->strict_io = true; + + ctx->actimeo = CIFS_DEF_ACTIMEO; + + /* Most clients set timeout to 0, allows server to use its default */ + ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */ + + /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */ + ctx->ops = &smb30_operations; + ctx->vals = &smbdefault_values; + + ctx->echo_interval = SMB_ECHO_INTERVAL_DEFAULT; + + /* default to no multichannel (single server connection) */ + ctx->multichannel = false; + ctx->max_channels = 1; + + ctx->backupuid_specified = false; /* no backup intent for a user */ + ctx->backupgid_specified = false; /* no backup intent for a group */ + +/* + * short int override_uid = -1; + * short int override_gid = -1; + * char *nodename = strdup(utsname()->nodename); + * struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr; + */ + + fc->fs_private = ctx; + fc->ops = &smb3_fs_context_ops; + return 0; +} + +void +smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) +{ + if (ctx == NULL) + return; + + /* + * Make sure this stays in sync with smb3_fs_context_dup() + */ + kfree(ctx->mount_options); + ctx->mount_options = NULL; + kfree(ctx->username); + ctx->username = NULL; + kfree_sensitive(ctx->password); + ctx->password = NULL; + kfree(ctx->UNC); + ctx->UNC = NULL; + kfree(ctx->domainname); + ctx->domainname = NULL; + kfree(ctx->nodename); + ctx->nodename = NULL; + kfree(ctx->iocharset); + ctx->iocharset = NULL; + kfree(ctx->prepath); + ctx->prepath = NULL; +} + +void +smb3_cleanup_fs_context(struct smb3_fs_context *ctx) +{ + if (!ctx) + return; + smb3_cleanup_fs_context_contents(ctx); + kfree(ctx); +} + +void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb) +{ + struct smb3_fs_context *ctx = cifs_sb->ctx; + + if (ctx->nodfs) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_DFS; + + if (ctx->noperm) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_PERM; + + if (ctx->setuids) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SET_UID; + + if (ctx->setuidfromacl) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UID_FROM_ACL; + + if (ctx->server_ino) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; + + if (ctx->remap) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SFM_CHR; + + if (ctx->sfu_remap) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR; + + if (ctx->no_xattr) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_XATTR; + + if (ctx->sfu_emul) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UNX_EMUL; + + if (ctx->nobrl) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_BRL; + + if (ctx->nohandlecache) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_HANDLE_CACHE; + + if (ctx->nostrictsync) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOSSYNC; + + if (ctx->mand_lock) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOPOSIXBRL; + + if (ctx->rwpidforward) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_RWPIDFORWARD; + + if (ctx->mode_ace) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MODE_FROM_SID; + + if (ctx->cifs_acl) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_ACL; + + if (ctx->backupuid_specified) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPUID; + + if (ctx->backupgid_specified) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPGID; + + if (ctx->override_uid) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_UID; + + if (ctx->override_gid) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_GID; + + if (ctx->dynperm) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DYNPERM; + + if (ctx->fsc) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_FSCACHE; + + if (ctx->multiuser) + cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | + CIFS_MOUNT_NO_PERM); + else + cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MULTIUSER | + CIFS_MOUNT_NO_PERM); + + if (ctx->strict_io) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_STRICT_IO; + + if (ctx->direct_io) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DIRECT_IO; + + if (ctx->mfsymlinks) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; + else + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MF_SYMLINKS; + if (ctx->mfsymlinks) { + if (ctx->sfu_emul) { + /* + * Our SFU ("Services for Unix" emulation does not allow + * creating symlinks but does allow reading existing SFU + * symlinks (it does allow both creating and reading SFU + * style mknod and FIFOs though). When "mfsymlinks" and + * "sfu" are both enabled at the same time, it allows + * reading both types of symlinks, but will only create + * them with mfsymlinks format. This allows better + * Apple compatibility (probably better for Samba too) + * while still recognizing old Windows style symlinks. + */ + cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n"); + } + } + + return; +} diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 886208a1b0ef..3358b33abcd0 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -9,8 +9,11 @@ #ifndef _FS_CONTEXT_H #define _FS_CONTEXT_H -#include <linux/parser.h> #include "cifsglob.h" +#include <linux/parser.h> +#include <linux/fs_parser.h> + +#define cifs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) enum smb_version { Smb_1 = 1, @@ -24,8 +27,6 @@ enum smb_version { Smb_version_err }; -int cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3); - enum { Opt_cache_loose, Opt_cache_strict, @@ -35,8 +36,6 @@ enum { Opt_cache_err }; -int cifs_parse_cache_flavor(char *value, struct smb_vol *vol); - enum cifs_sec_param { Opt_sec_krb5, Opt_sec_krb5i, @@ -53,6 +52,220 @@ enum cifs_sec_param { Opt_sec_err }; -int cifs_parse_security_flavors(char *value, struct smb_vol *vol); +enum cifs_param { + /* Mount options that take no arguments */ + Opt_user_xattr, + Opt_forceuid, + Opt_forcegid, + Opt_noblocksend, + Opt_noautotune, + Opt_nolease, + Opt_hard, + Opt_soft, + Opt_perm, + Opt_nodelete, + Opt_mapposix, + Opt_mapchars, + Opt_nomapchars, + Opt_sfu, + Opt_nodfs, + Opt_posixpaths, + Opt_unix, + Opt_nocase, + Opt_brl, + Opt_handlecache, + Opt_forcemandatorylock, + Opt_setuidfromacl, + Opt_setuids, + Opt_dynperm, + Opt_intr, + Opt_strictsync, + Opt_serverino, + Opt_rwpidforward, + Opt_cifsacl, + Opt_acl, + Opt_locallease, + Opt_sign, + Opt_ignore_signature, + Opt_seal, + Opt_noac, + Opt_fsc, + Opt_mfsymlinks, + Opt_multiuser, + Opt_sloppy, + Opt_nosharesock, + Opt_persistent, + Opt_resilient, + Opt_domainauto, + Opt_rdma, + Opt_modesid, + Opt_rootfs, + Opt_multichannel, + Opt_compress, + Opt_witness, + + /* Mount options which take numeric value */ + Opt_backupuid, + Opt_backupgid, + Opt_uid, + Opt_cruid, + Opt_gid, + Opt_port, + Opt_file_mode, + Opt_dirmode, + Opt_min_enc_offload, + Opt_blocksize, + Opt_rsize, + Opt_wsize, + Opt_actimeo, + Opt_echo_interval, + Opt_max_credits, + Opt_snapshot, + Opt_max_channels, + Opt_handletimeout, + + /* Mount options which take string value */ + Opt_source, + Opt_user, + Opt_pass, + Opt_ip, + Opt_domain, + Opt_srcaddr, + Opt_iocharset, + Opt_netbiosname, + Opt_servern, + Opt_ver, + Opt_vers, + Opt_sec, + Opt_cache, + + /* Mount options to be ignored */ + Opt_ignore, + + Opt_err +}; + +struct smb3_fs_context { + bool uid_specified; + bool gid_specified; + bool sloppy; + bool got_ip; + bool got_version; + bool got_rsize; + bool got_wsize; + bool got_bsize; + unsigned short port; + + char *username; + char *password; + char *domainname; + char *UNC; + char *nodename; + char *iocharset; /* local code page for mapping to and from Unicode */ + char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ + char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ + kuid_t cred_uid; + kuid_t linux_uid; + kgid_t linux_gid; + kuid_t backupuid; + kgid_t backupgid; + umode_t file_mode; + umode_t dir_mode; + enum securityEnum sectype; /* sectype requested via mnt opts */ + bool sign; /* was signing requested via mnt opts? */ + bool ignore_signature:1; + bool retry:1; + bool intr:1; + bool setuids:1; + bool setuidfromacl:1; + bool override_uid:1; + bool override_gid:1; + bool dynperm:1; + bool noperm:1; + bool nodelete:1; + bool mode_ace:1; + bool no_psx_acl:1; /* set if posix acl support should be disabled */ + bool cifs_acl:1; + bool backupuid_specified; /* mount option backupuid is specified */ + bool backupgid_specified; /* mount option backupgid is specified */ + bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ + bool server_ino:1; /* use inode numbers from server ie UniqueId */ + bool direct_io:1; + bool strict_io:1; /* strict cache behavior */ + bool cache_ro:1; + bool cache_rw:1; + bool remap:1; /* set to remap seven reserved chars in filenames */ + bool sfu_remap:1; /* remap seven reserved chars ala SFU */ + bool posix_paths:1; /* unset to not ask for posix pathnames. */ + bool no_linux_ext:1; + bool linux_ext:1; + bool sfu_emul:1; + bool nullauth:1; /* attempt to authenticate with null user */ + bool nocase:1; /* request case insensitive filenames */ + bool nobrl:1; /* disable sending byte range locks to srv */ + bool nohandlecache:1; /* disable caching dir handles if srvr probs */ + bool mand_lock:1; /* send mandatory not posix byte range lock reqs */ + bool seal:1; /* request transport encryption on share */ + bool nodfs:1; /* Do not request DFS, even if available */ + bool local_lease:1; /* check leases only on local system, not remote */ + bool noblocksnd:1; + bool noautotune:1; + bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ + bool no_lease:1; /* disable requesting leases */ + bool fsc:1; /* enable fscache */ + bool mfsymlinks:1; /* use Minshall+French Symlinks */ + bool multiuser:1; + bool rwpidforward:1; /* pid forward for read/write operations */ + bool nosharesock:1; + bool persistent:1; + bool nopersistent:1; + bool resilient:1; /* noresilient not required since not fored for CA */ + bool domainauto:1; + bool rdma:1; + bool multichannel:1; + bool use_client_guid:1; + /* reuse existing guid for multichannel */ + u8 client_guid[SMB2_CLIENT_GUID_SIZE]; + unsigned int bsize; + unsigned int rsize; + unsigned int wsize; + unsigned int min_offload; + bool sockopt_tcp_nodelay:1; + unsigned long actimeo; /* attribute cache timeout (jiffies) */ + struct smb_version_operations *ops; + struct smb_version_values *vals; + char *prepath; + struct sockaddr_storage dstaddr; /* destination address */ + struct sockaddr_storage srcaddr; /* allow binding to a local IP */ + struct nls_table *local_nls; /* This is a copy of the pointer in cifs_sb */ + unsigned int echo_interval; /* echo interval in secs */ + __u64 snapshot_time; /* needed for timewarp tokens */ + __u32 handle_timeout; /* persistent and durable handle timeout in ms */ + unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */ + unsigned int max_channels; + __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */ + bool rootfs:1; /* if it's a SMB root file system */ + bool witness:1; /* use witness protocol */ + + char *mount_options; +}; + +extern const struct fs_parameter_spec smb3_fs_parameters[]; + +extern int cifs_parse_cache_flavor(char *value, + struct smb3_fs_context *ctx); +extern int cifs_parse_security_flavors(char *value, + struct smb3_fs_context *ctx); +extern int smb3_init_fs_context(struct fs_context *fc); +extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx); +extern void smb3_cleanup_fs_context(struct smb3_fs_context *ctx); + +static inline struct smb3_fs_context *smb3_fc2context(const struct fs_context *fc) +{ + return fc->fs_private; +} + +extern int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx); +extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); #endif diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index da688185403c..20d24af33ee2 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -22,6 +22,7 @@ #include "cifsglob.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "cifsproto.h" /* * Key layout of CIFS server cache index object diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 1091633d2adb..e811f2dd7619 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -57,7 +57,6 @@ extern const struct fscache_cookie_def cifs_fscache_inode_object_def; extern int cifs_fscache_register(void); extern void cifs_fscache_unregister(void); -extern char *extract_sharename(const char *); /* * fscache.c diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9ee5f304592f..a83b3a8ffaac 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -37,6 +37,7 @@ #include "cifs_fs_sb.h" #include "cifs_unicode.h" #include "fscache.h" +#include "fs_context.h" static void cifs_set_ops(struct inode *inode) @@ -294,7 +295,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, break; } - fattr->cf_uid = cifs_sb->mnt_uid; + fattr->cf_uid = cifs_sb->ctx->linux_uid; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) { u64 id = le64_to_cpu(info->Uid); if (id < ((uid_t)-1)) { @@ -304,7 +305,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, } } - fattr->cf_gid = cifs_sb->mnt_gid; + fattr->cf_gid = cifs_sb->ctx->linux_gid; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) { u64 id = le64_to_cpu(info->Gid); if (id < ((gid_t)-1)) { @@ -333,8 +334,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) memset(fattr, 0, sizeof(*fattr)); fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; - fattr->cf_uid = cifs_sb->mnt_uid; - fattr->cf_gid = cifs_sb->mnt_gid; + fattr->cf_uid = cifs_sb->ctx->linux_uid; + fattr->cf_gid = cifs_sb->ctx->linux_gid; ktime_get_coarse_real_ts64(&fattr->cf_mtime); fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime; fattr->cf_nlink = 2; @@ -644,8 +645,8 @@ smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct smb311_posix_qinfo * } /* else if reparse point ... TODO: add support for FIFO and blk dev; special file types */ - fattr->cf_uid = cifs_sb->mnt_uid; /* TODO: map uid and gid from SID */ - fattr->cf_gid = cifs_sb->mnt_gid; + fattr->cf_uid = cifs_sb->ctx->linux_uid; /* TODO: map uid and gid from SID */ + fattr->cf_gid = cifs_sb->ctx->linux_gid; cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n", fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink); @@ -685,25 +686,25 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); if (reparse_tag == IO_REPARSE_TAG_LX_SYMLINK) { - fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_LNK; } else if (reparse_tag == IO_REPARSE_TAG_LX_FIFO) { - fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_FIFO; } else if (reparse_tag == IO_REPARSE_TAG_AF_UNIX) { - fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_SOCK; } else if (reparse_tag == IO_REPARSE_TAG_LX_CHR) { - fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_CHR; } else if (reparse_tag == IO_REPARSE_TAG_LX_BLK) { - fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_BLK; } else if (symlink) { /* TODO add more reparse tag checks */ fattr->cf_mode = S_IFLNK; fattr->cf_dtype = DT_LNK; } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { - fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; + fattr->cf_mode = S_IFDIR | cifs_sb->ctx->dir_mode; fattr->cf_dtype = DT_DIR; /* * Server can return wrong NumberOfLinks value for directories @@ -712,7 +713,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, if (!tcon->unix_ext) fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; } else { - fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; + fattr->cf_mode = S_IFREG | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_REG; /* clear write bits if ATTR_READONLY is set */ @@ -731,8 +732,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, } } - fattr->cf_uid = cifs_sb->mnt_uid; - fattr->cf_gid = cifs_sb->mnt_gid; + fattr->cf_uid = cifs_sb->ctx->linux_uid; + fattr->cf_gid = cifs_sb->ctx->linux_gid; } static int @@ -771,6 +772,7 @@ cifs_get_file_info(struct file *filp) */ rc = 0; CIFS_I(inode)->time = 0; + goto cgfi_exit; default: goto cgfi_exit; } @@ -803,11 +805,15 @@ static __u64 simple_hashstr(const char *str) * cifs_backup_query_path_info - SMB1 fallback code to get ino * * Fallback code to get file metadata when we don't have access to - * @full_path (EACCES) and have backup creds. + * full_path (EACCES) and have backup creds. * - * @data will be set to search info result buffer - * @resp_buf will be set to cifs resp buf and needs to be freed with - * cifs_buf_release() when done with @data. + * @xid: transaction id used to identify original request in logs + * @tcon: information about the server share we have mounted + * @sb: the superblock stores info such as disk space available + * @full_path: name of the file we are getting the metadata for + * @resp_buf: will be set to cifs resp buf and needs to be freed with + * cifs_buf_release() when done with @data + * @data: will be set to search info result buffer */ static int cifs_backup_query_path_info(int xid, @@ -1386,8 +1392,8 @@ iget_no_retry: set_nlink(inode, 2); inode->i_op = &cifs_ipc_inode_ops; inode->i_fop = &simple_dir_operations; - inode->i_uid = cifs_sb->mnt_uid; - inode->i_gid = cifs_sb->mnt_gid; + inode->i_uid = cifs_sb->ctx->linux_uid; + inode->i_gid = cifs_sb->ctx->linux_gid; spin_unlock(&inode->i_lock); } else if (rc) { iget_failed(inode); @@ -2192,11 +2198,11 @@ cifs_inode_needs_reval(struct inode *inode) if (!lookupCacheEnabled) return true; - if (!cifs_sb->actimeo) + if (!cifs_sb->ctx->actimeo) return true; if (!time_in_range(jiffies, cifs_i->time, - cifs_i->time + cifs_sb->actimeo)) + cifs_i->time + cifs_sb->ctx->actimeo)) return true; /* hardlinked files w/ noserverino get "special" treatment */ @@ -2228,7 +2234,9 @@ cifs_invalidate_mapping(struct inode *inode) /** * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks - * @word: long word containing the bit lock + * + * @key: currently unused + * @mode: the task state to sleep in */ static int cifs_wait_bit_killable(struct wait_bit_key *key, int mode) @@ -2401,7 +2409,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat, } generic_fillattr(inode, stat); - stat->blksize = cifs_sb->bsize; + stat->blksize = cifs_sb->ctx->bsize; stat->ino = CIFS_I(inode)->uniqueid; /* old CIFS Unix Extensions doesn't return create time */ @@ -2812,7 +2820,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) { if (uid_valid(uid) || gid_valid(gid)) { - rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64, + mode = NO_CHANGE_64; + rc = id_mode_to_cifs_acl(inode, full_path, &mode, uid, gid); if (rc) { cifs_dbg(FYI, "%s: Setting id failed with error: %d\n", @@ -2833,13 +2842,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = 0; if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) { - rc = id_mode_to_cifs_acl(inode, full_path, mode, + rc = id_mode_to_cifs_acl(inode, full_path, &mode, INVALID_UID, INVALID_GID); if (rc) { cifs_dbg(FYI, "%s: Setting ACL failed with error: %d\n", __func__, rc); goto cifs_setattr_exit; } + + /* + * In case of CIFS_MOUNT_CIFS_ACL, we cannot support all modes. + * Pick up the actual mode bits that were set. + */ + if (mode != attrs->ia_mode) + attrs->ia_mode = mode; } else if (((mode & S_IWUGO) == 0) && (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { @@ -2862,10 +2878,10 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) attrs->ia_mode &= ~(S_IALLUGO); if (S_ISDIR(inode->i_mode)) attrs->ia_mode |= - cifs_sb->mnt_dir_mode; + cifs_sb->ctx->dir_mode; else attrs->ia_mode |= - cifs_sb->mnt_file_mode; + cifs_sb->ctx->file_mode; } } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { /* ignore mode change - ATTR_READONLY hasn't changed */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1c14cf01dbef..82e176720ca6 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -35,6 +35,7 @@ #ifdef CONFIG_CIFS_DFS_UPCALL #include "dns_resolve.h" #endif +#include "fs_context.h" extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; @@ -632,11 +633,11 @@ bool backup_cred(struct cifs_sb_info *cifs_sb) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) { - if (uid_eq(cifs_sb->mnt_backupuid, current_fsuid())) + if (uid_eq(cifs_sb->ctx->backupuid, current_fsuid())) return true; } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) { - if (in_group_p(cifs_sb->mnt_backupgid)) + if (in_group_p(cifs_sb->ctx->backupgid)) return true; } diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c new file mode 100644 index 000000000000..5aaabe4cc0a7 --- /dev/null +++ b/fs/cifs/netlink.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Netlink routines for CIFS + * + * Copyright (c) 2020 Samuel Cabrero <[email protected]> + */ + +#include <net/genetlink.h> +#include <uapi/linux/cifs/cifs_netlink.h> + +#include "netlink.h" +#include "cifsglob.h" +#include "cifs_debug.h" +#include "cifs_swn.h" + +static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = { + [CIFS_GENL_ATTR_SWN_REGISTRATION_ID] = { .type = NLA_U32 }, + [CIFS_GENL_ATTR_SWN_NET_NAME] = { .type = NLA_STRING }, + [CIFS_GENL_ATTR_SWN_SHARE_NAME] = { .type = NLA_STRING }, + [CIFS_GENL_ATTR_SWN_IP] = { .len = sizeof(struct sockaddr_storage) }, + [CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY] = { .type = NLA_FLAG }, + [CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY] = { .type = NLA_FLAG }, + [CIFS_GENL_ATTR_SWN_IP_NOTIFY] = { .type = NLA_FLAG }, + [CIFS_GENL_ATTR_SWN_KRB_AUTH] = { .type = NLA_FLAG }, + [CIFS_GENL_ATTR_SWN_USER_NAME] = { .type = NLA_STRING }, + [CIFS_GENL_ATTR_SWN_PASSWORD] = { .type = NLA_STRING }, + [CIFS_GENL_ATTR_SWN_DOMAIN_NAME] = { .type = NLA_STRING }, + [CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE] = { .type = NLA_U32 }, + [CIFS_GENL_ATTR_SWN_RESOURCE_STATE] = { .type = NLA_U32 }, + [CIFS_GENL_ATTR_SWN_RESOURCE_NAME] = { .type = NLA_STRING}, +}; + +static struct genl_ops cifs_genl_ops[] = { + { + .cmd = CIFS_GENL_CMD_SWN_NOTIFY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = cifs_swn_notify, + }, +}; + +static const struct genl_multicast_group cifs_genl_mcgrps[] = { + [CIFS_GENL_MCGRP_SWN] = { .name = CIFS_GENL_MCGRP_SWN_NAME }, +}; + +struct genl_family cifs_genl_family = { + .name = CIFS_GENL_NAME, + .version = CIFS_GENL_VERSION, + .hdrsize = 0, + .maxattr = CIFS_GENL_ATTR_MAX, + .module = THIS_MODULE, + .policy = cifs_genl_policy, + .ops = cifs_genl_ops, + .n_ops = ARRAY_SIZE(cifs_genl_ops), + .mcgrps = cifs_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(cifs_genl_mcgrps), +}; + +/** + * cifs_genl_init - Register generic netlink family + * + * Return zero if initialized successfully, otherwise non-zero. + */ +int cifs_genl_init(void) +{ + int ret; + + ret = genl_register_family(&cifs_genl_family); + if (ret < 0) { + cifs_dbg(VFS, "%s: failed to register netlink family\n", + __func__); + return ret; + } + + return 0; +} + +/** + * cifs_genl_exit - Unregister generic netlink family + */ +void cifs_genl_exit(void) +{ + int ret; + + ret = genl_unregister_family(&cifs_genl_family); + if (ret < 0) { + cifs_dbg(VFS, "%s: failed to unregister netlink family\n", + __func__); + } +} diff --git a/fs/cifs/netlink.h b/fs/cifs/netlink.h new file mode 100644 index 000000000000..e2fa8ed24c54 --- /dev/null +++ b/fs/cifs/netlink.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Netlink routines for CIFS + * + * Copyright (c) 2020 Samuel Cabrero <[email protected]> + */ + +#ifndef _CIFS_NETLINK_H +#define _CIFS_NETLINK_H + +extern struct genl_family cifs_genl_family; + +extern int cifs_genl_init(void); +extern void cifs_genl_exit(void); + +#endif /* _CIFS_NETLINK_H */ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 799be3a5d25e..80bf4c6f4c7b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -33,6 +33,7 @@ #include "cifs_fs_sb.h" #include "cifsfs.h" #include "smb2proto.h" +#include "fs_context.h" /* * To be safe - for UCS to UTF-8 with strings loaded with the rare long @@ -165,8 +166,8 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr) static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { - fattr->cf_uid = cifs_sb->mnt_uid; - fattr->cf_gid = cifs_sb->mnt_gid; + fattr->cf_uid = cifs_sb->ctx->linux_uid; + fattr->cf_gid = cifs_sb->ctx->linux_gid; /* * The IO_REPARSE_TAG_LX_ tags originally were used by WSL but they @@ -177,25 +178,25 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * reasonably map some of them to directories vs. files vs. symlinks */ if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { - fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; + fattr->cf_mode = S_IFDIR | cifs_sb->ctx->dir_mode; fattr->cf_dtype = DT_DIR; } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_SYMLINK) { - fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_LNK; } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_FIFO) { - fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_FIFO; } else if (fattr->cf_cifstag == IO_REPARSE_TAG_AF_UNIX) { - fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_SOCK; } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_CHR) { - fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_CHR; } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_BLK) { - fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode; + fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_BLK; } else { /* TODO: should we mark some other reparse points (like DFSR) as directories? */ - fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; + fattr->cf_mode = S_IFREG | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_REG; } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index de564368a887..213465718fa8 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -32,6 +32,11 @@ #include <linux/slab.h> #include "cifs_spnego.h" #include "smb2proto.h" +#include "fs_context.h" + +static int +cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, + struct cifs_server_iface *iface); bool is_server_using_iface(struct TCP_Server_Info *server, @@ -70,7 +75,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) } /* returns number of channels added */ -int cifs_try_adding_channels(struct cifs_ses *ses) +int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) { int old_chan_count = ses->chan_count; int left = ses->chan_max - ses->chan_count; @@ -133,7 +138,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses) continue; } - rc = cifs_ses_add_channel(ses, iface); + rc = cifs_ses_add_channel(cifs_sb, ses, iface); if (rc) { cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n", i, rc); @@ -166,11 +171,12 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server) return NULL; } -int -cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface) +static int +cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, + struct cifs_server_iface *iface) { struct cifs_chan *chan; - struct smb_vol vol = {NULL}; + struct smb3_fs_context ctx = {NULL}; static const char unc_fmt[] = "\\%s\\foo"; char unc[sizeof(unc_fmt)+SERVER_NAME_LEN_WITH_NULL] = {0}; struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr; @@ -188,67 +194,62 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface) &ipv6->sin6_addr); /* - * Setup a smb_vol with mostly the same info as the existing + * Setup a ctx with mostly the same info as the existing * session and overwrite it with the requested iface data. * * We need to setup at least the fields used for negprot and * sesssetup. * - * We only need the volume here, so we can reuse memory from + * We only need the ctx here, so we can reuse memory from * the session and server without caring about memory * management. */ /* Always make new connection for now (TODO?) */ - vol.nosharesock = true; + ctx.nosharesock = true; /* Auth */ - vol.domainauto = ses->domainAuto; - vol.domainname = ses->domainName; - vol.username = ses->user_name; - vol.password = ses->password; - vol.sectype = ses->sectype; - vol.sign = ses->sign; + ctx.domainauto = ses->domainAuto; + ctx.domainname = ses->domainName; + ctx.username = ses->user_name; + ctx.password = ses->password; + ctx.sectype = ses->sectype; + ctx.sign = ses->sign; /* UNC and paths */ /* XXX: Use ses->server->hostname? */ sprintf(unc, unc_fmt, ses->serverName); - vol.UNC = unc; - vol.prepath = ""; + ctx.UNC = unc; + ctx.prepath = ""; /* Reuse same version as master connection */ - vol.vals = ses->server->vals; - vol.ops = ses->server->ops; + ctx.vals = ses->server->vals; + ctx.ops = ses->server->ops; - vol.noblocksnd = ses->server->noblocksnd; - vol.noautotune = ses->server->noautotune; - vol.sockopt_tcp_nodelay = ses->server->tcp_nodelay; - vol.echo_interval = ses->server->echo_interval / HZ; + ctx.noblocksnd = ses->server->noblocksnd; + ctx.noautotune = ses->server->noautotune; + ctx.sockopt_tcp_nodelay = ses->server->tcp_nodelay; + ctx.echo_interval = ses->server->echo_interval / HZ; /* * This will be used for encoding/decoding user/domain/pw * during sess setup auth. - * - * XXX: We use the default for simplicity but the proper way - * would be to use the one that ses used, which is not - * stored. This might break when dealing with non-ascii - * strings. */ - vol.local_nls = load_nls_default(); + ctx.local_nls = cifs_sb->local_nls; /* Use RDMA if possible */ - vol.rdma = iface->rdma_capable; - memcpy(&vol.dstaddr, &iface->sockaddr, sizeof(struct sockaddr_storage)); + ctx.rdma = iface->rdma_capable; + memcpy(&ctx.dstaddr, &iface->sockaddr, sizeof(struct sockaddr_storage)); /* reuse master con client guid */ - memcpy(&vol.client_guid, ses->server->client_guid, + memcpy(&ctx.client_guid, ses->server->client_guid, SMB2_CLIENT_GUID_SIZE); - vol.use_client_guid = true; + ctx.use_client_guid = true; mutex_lock(&ses->session_mutex); chan = ses->binding_chan = &ses->chans[ses->chan_count]; - chan->server = cifs_get_tcp_session(&vol); + chan->server = cifs_get_tcp_session(&ctx); if (IS_ERR(chan->server)) { rc = PTR_ERR(chan->server); chan->server = NULL; @@ -274,7 +275,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface) if (rc) goto out; - rc = cifs_setup_session(xid, ses, vol.local_nls); + rc = cifs_setup_session(xid, ses, cifs_sb->local_nls); if (rc) goto out; @@ -297,7 +298,6 @@ out: if (rc && chan->server) cifs_put_tcp_session(chan->server, 0); - unload_nls(vol.local_nls); return rc; } @@ -812,6 +812,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) return NTLMv2; if (global_secflags & CIFSSEC_MAY_NTLM) return NTLM; + break; default: break; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 80287c26cfac..e31b939e628c 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -12,6 +12,7 @@ #include "cifs_debug.h" #include "cifspdu.h" #include "cifs_unicode.h" +#include "fs_context.h" /* * An NT cancel request header looks just like the original request except: @@ -428,15 +429,15 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses) } static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; unsigned int wsize; /* start with specified wsize, or default */ - if (volume_info->wsize) - wsize = volume_info->wsize; + if (ctx->wsize) + wsize = ctx->wsize; else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) wsize = CIFS_DEFAULT_IOSIZE; else @@ -463,7 +464,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -488,7 +489,7 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) else defsize = server->maxBuf - sizeof(READ_RSP); - rsize = volume_info->rsize ? volume_info->rsize : defsize; + rsize = ctx->rsize ? ctx->rsize : defsize; /* * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to @@ -1005,7 +1006,7 @@ cifs_is_read_op(__u32 oplock) static unsigned int cifs_wp_retry_size(struct inode *inode) { - return CIFS_SB(inode->i_sb)->wsize; + return CIFS_SB(inode->i_sb)->ctx->wsize; } static bool diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index d88e2683626e..60d4bd1eae2b 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -94,6 +94,8 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24) }; +#define SMB311_NEGPROT_BASE_SIZE (sizeof(struct smb2_sync_hdr) + sizeof(struct smb2_negotiate_rsp)) + static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, __u32 non_ctxlen) { @@ -107,13 +109,28 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, (pneg_rsp->DialectRevision != cpu_to_le16(SMB311_PROT_ID))) return 0; - /* Make sure that negotiate contexts start after gss security blob */ + /* + * if SPNEGO blob present (ie the RFC2478 GSS info which indicates + * which security mechanisms the server supports) make sure that + * the negotiate contexts start after it + */ nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset); - if (nc_offset < non_ctxlen) { - pr_warn_once("Invalid negotiate context offset\n"); + /* + * non_ctxlen is at least shdr->StructureSize + pdu->StructureSize2 + * and the latter is 1 byte bigger than the fix-sized area of the + * NEGOTIATE response + */ + if (nc_offset + 1 < non_ctxlen) { + pr_warn_once("Invalid negotiate context offset %d\n", nc_offset); return 0; - } - size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen; + } else if (nc_offset + 1 == non_ctxlen) { + cifs_dbg(FYI, "no SPNEGO security blob in negprot rsp\n"); + size_of_pad_before_neg_ctxts = 0; + } else if (non_ctxlen == SMB311_NEGPROT_BASE_SIZE) + /* has padding, but no SPNEGO blob */ + size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen + 1; + else + size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen; /* Verify that at least minimal negotiate contexts fit within frame */ if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) { @@ -859,6 +876,10 @@ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server) * * Assumes @iov does not contain the rfc1002 length and iov[0] has the * SMB2 header. + * + * @ses: server session structure + * @iov: array containing the SMB request we will send to the server + * @nvec: number of array entries for the iov */ int smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3d914d7d0d11..f19274857292 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -24,6 +24,7 @@ #include "smb2glob.h" #include "cifs_ioctl.h" #include "smbdirect.h" +#include "fs_context.h" /* Change credits for different ops and return the total number of credits */ static int @@ -99,9 +100,10 @@ smb2_add_credits(struct TCP_Server_Info *server, spin_unlock(&server->req_lock); wake_up(&server->request_q); - if (reconnect_detected) + if (reconnect_detected) { cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n", add, instance); + } if (server->tcpStatus == CifsNeedReconnect || server->tcpStatus == CifsExiting) @@ -123,7 +125,7 @@ smb2_add_credits(struct TCP_Server_Info *server, default: trace_smb3_add_credits(server->CurrentMid, server->hostname, rc, add); - cifs_dbg(FYI, "add %u credits total=%d\n", add, rc); + cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, rc); } } @@ -135,6 +137,11 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val) if (val == 1) server->reconnect_instance++; spin_unlock(&server->req_lock); + + trace_smb3_set_credits(server->CurrentMid, + server->hostname, val, val); + cifs_dbg(FYI, "%s: set %u credits\n", __func__, val); + /* don't log while holding the lock */ if (val == 1) cifs_dbg(FYI, "set credits to 1 due to smb2 reconnect\n"); @@ -201,6 +208,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE); credits->instance = server->reconnect_instance; server->credits -= credits->value; + scredits = server->credits; server->in_flight++; if (server->in_flight > server->max_in_flight) server->max_in_flight = server->in_flight; @@ -208,6 +216,12 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, } } spin_unlock(&server->req_lock); + + trace_smb3_add_credits(server->CurrentMid, + server->hostname, scredits, -(credits->value)); + cifs_dbg(FYI, "%s: removed %u credits total=%d\n", + __func__, credits->value, scredits); + return rc; } @@ -217,13 +231,17 @@ smb2_adjust_credits(struct TCP_Server_Info *server, const unsigned int payload_size) { int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE); + int scredits; if (!credits->value || credits->value == new_val) return 0; if (credits->value < new_val) { - WARN_ONCE(1, "request has less credits (%d) than required (%d)", - credits->value, new_val); + trace_smb3_too_many_credits(server->CurrentMid, + server->hostname, 0, credits->value - new_val); + cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)", + credits->value, new_val); + return -ENOTSUPP; } @@ -231,15 +249,24 @@ smb2_adjust_credits(struct TCP_Server_Info *server, if (server->reconnect_instance != credits->instance) { spin_unlock(&server->req_lock); + trace_smb3_reconnect_detected(server->CurrentMid, + server->hostname, 0, 0); cifs_server_dbg(VFS, "trying to return %d credits to old session\n", credits->value - new_val); return -EAGAIN; } server->credits += credits->value - new_val; + scredits = server->credits; spin_unlock(&server->req_lock); wake_up(&server->request_q); credits->value = new_val; + + trace_smb3_add_credits(server->CurrentMid, + server->hostname, scredits, credits->value - new_val); + cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n", + __func__, credits->value - new_val, scredits); + return 0; } @@ -339,13 +366,13 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) } static unsigned int -smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int wsize; /* start with specified wsize, or default */ - wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; + wsize = ctx->wsize ? ctx->wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); @@ -354,13 +381,13 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static unsigned int -smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int wsize; /* start with specified wsize, or default */ - wsize = volume_info->wsize ? volume_info->wsize : SMB3_DEFAULT_IOSIZE; + wsize = ctx->wsize ? ctx->wsize : SMB3_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { @@ -386,13 +413,13 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static unsigned int -smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int rsize; /* start with specified rsize, or default */ - rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; + rsize = ctx->rsize ? ctx->rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) @@ -402,13 +429,13 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static unsigned int -smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int rsize; /* start with specified rsize, or default */ - rsize = volume_info->rsize ? volume_info->rsize : SMB3_DEFAULT_IOSIZE; + rsize = ctx->rsize ? ctx->rsize : SMB3_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { @@ -477,7 +504,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - if (bytes_left || p->Next) + /* Azure rounds the buffer size up 8, to a 16 byte boundary */ + if ((bytes_left > 8) || p->Next) cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); @@ -2341,6 +2369,7 @@ static bool smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) { struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + int scredits; if (shdr->Status != STATUS_PENDING) return false; @@ -2348,8 +2377,14 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) if (shdr->CreditRequest) { spin_lock(&server->req_lock); server->credits += le16_to_cpu(shdr->CreditRequest); + scredits = server->credits; spin_unlock(&server->req_lock); wake_up(&server->request_q); + + trace_smb3_add_credits(server->CurrentMid, + server->hostname, scredits, le16_to_cpu(shdr->CreditRequest)); + cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n", + __func__, le16_to_cpu(shdr->CreditRequest), scredits); } return true; @@ -3179,7 +3214,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, static struct cifs_ntsd * get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, - const struct cifs_fid *cifsfid, u32 *pacllen) + const struct cifs_fid *cifsfid, u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; unsigned int xid; @@ -3193,7 +3228,8 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, cifs_dbg(FYI, "trying to get acl\n"); rc = SMB2_query_acl(xid, tlink_tcon(tlink), cifsfid->persistent_fid, - cifsfid->volatile_fid, (void **)&pntsd, pacllen); + cifsfid->volatile_fid, (void **)&pntsd, pacllen, + info); free_xid(xid); cifs_put_tlink(tlink); @@ -3207,7 +3243,7 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, static struct cifs_ntsd * get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, - const char *path, u32 *pacllen) + const char *path, u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; u8 oplock = SMB2_OPLOCK_LEVEL_NONE; @@ -3245,12 +3281,16 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms.fid = &fid; oparms.reconnect = false; + if (info & SACL_SECINFO) + oparms.desired_access |= SYSTEM_SECURITY; + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); kfree(utf16_path); if (!rc) { rc = SMB2_query_acl(xid, tlink_tcon(tlink), fid.persistent_fid, - fid.volatile_fid, (void **)&pntsd, pacllen); + fid.volatile_fid, (void **)&pntsd, pacllen, + info); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } @@ -3284,10 +3324,12 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) - access_flags = WRITE_OWNER; - else - access_flags = WRITE_DAC; + if (aclflag & CIFS_ACL_OWNER || aclflag & CIFS_ACL_GROUP) + access_flags |= WRITE_OWNER; + if (aclflag & CIFS_ACL_SACL) + access_flags |= SYSTEM_SECURITY; + if (aclflag & CIFS_ACL_DACL) + access_flags |= WRITE_DAC; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { @@ -3321,18 +3363,18 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, /* Retrieve an ACL from the server */ static struct cifs_ntsd * get_smb2_acl(struct cifs_sb_info *cifs_sb, - struct inode *inode, const char *path, - u32 *pacllen) + struct inode *inode, const char *path, + u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; struct cifsFileInfo *open_file = NULL; - if (inode) + if (inode && !(info & SACL_SECINFO)) open_file = find_readable_file(CIFS_I(inode), true); - if (!open_file) - return get_smb2_acl_by_path(cifs_sb, path, pacllen); + if (!open_file || (info & SACL_SECINFO)) + return get_smb2_acl_by_path(cifs_sb, path, pacllen, info); - pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen); + pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info); cifsFileInfo_put(open_file); return pntsd; } @@ -3949,7 +3991,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) static unsigned int smb2_wp_retry_size(struct inode *inode) { - return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize, + return min_t(unsigned int, CIFS_SB(inode->i_sb)->ctx->wsize, SMB2_MAX_BUFFER_SIZE); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index acb72705062d..067eb44c7baa 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -427,8 +427,8 @@ build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt) pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES; pneg_ctxt->DataLength = cpu_to_le16(38); pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1); - pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE); - get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE); + pneg_ctxt->SaltLength = cpu_to_le16(SMB311_LINUX_CLIENT_SALT_SIZE); + get_random_bytes(pneg_ctxt->Salt, SMB311_LINUX_CLIENT_SALT_SIZE); pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512; } @@ -566,6 +566,9 @@ static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt) if (len < MIN_PREAUTH_CTXT_DATA_LEN) { pr_warn_once("server sent bad preauth context\n"); return; + } else if (len < MIN_PREAUTH_CTXT_DATA_LEN + le16_to_cpu(ctxt->SaltLength)) { + pr_warn_once("server sent invalid SaltLength\n"); + return; } if (le16_to_cpu(ctxt->HashAlgorithmCount) != 1) pr_warn_once("Invalid SMB3 hash algorithm count\n"); @@ -3476,10 +3479,11 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - void **data, u32 *plen) + u64 persistent_fid, u64 volatile_fid, + void **data, u32 *plen, u32 extra_info) { - __u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO; + __u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO | + extra_info; *plen = 0; return query_info(xid, tcon, persistent_fid, volatile_fid, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index fa57b03ca98c..204a622b89ed 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -333,12 +333,20 @@ struct smb2_neg_context { /* Followed by array of data */ } __packed; -#define SMB311_SALT_SIZE 32 +#define SMB311_LINUX_CLIENT_SALT_SIZE 32 /* Hash Algorithm Types */ #define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001) #define SMB2_PREAUTH_HASH_SIZE 64 -#define MIN_PREAUTH_CTXT_DATA_LEN (SMB311_SALT_SIZE + 6) +/* + * SaltLength that the server send can be zero, so the only three required + * fields (all __le16) end up six bytes total, so the minimum context data len + * in the response is six bytes which accounts for + * + * HashAlgorithmCount, SaltLength, and 1 HashAlgorithm. + */ +#define MIN_PREAUTH_CTXT_DATA_LEN 6 + struct smb2_preauth_neg_context { __le16 ContextType; /* 1 */ __le16 DataLength; @@ -346,7 +354,7 @@ struct smb2_preauth_neg_context { __le16 HashAlgorithmCount; /* 1 */ __le16 SaltLength; __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */ - __u8 Salt[SMB311_SALT_SIZE]; + __u8 Salt[SMB311_LINUX_CLIENT_SALT_SIZE]; } __packed; /* Encryption Algorithms Ciphers */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index d4110447ee3a..9565e27681a5 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -200,8 +200,8 @@ extern int SMB2_query_info_init(struct cifs_tcon *tcon, size_t input_len, void *input); extern void SMB2_query_info_free(struct smb_rqst *rqst); extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_file_id, u64 volatile_file_id, - void **data, unsigned int *plen); + u64 persistent_file_id, u64 volatile_file_id, + void **data, unsigned int *plen, u32 info); extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid); diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index b029ed31ef91..10dfe5006792 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -246,6 +246,7 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context) case IB_EVENT_CQ_ERR: case IB_EVENT_QP_FATAL: smbd_disconnect_rdma_connection(info); + break; default: break; diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 90e0fab69bb8..c3d1a584f251 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -909,8 +909,12 @@ DEFINE_EVENT(smb3_credit_class, smb3_##name, \ TP_ARGS(currmid, hostname, credits, credits_to_add)) DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits); +DEFINE_SMB3_CREDIT_EVENT(reconnect_detected); DEFINE_SMB3_CREDIT_EVENT(credit_timeout); +DEFINE_SMB3_CREDIT_EVENT(insufficient_credits); +DEFINE_SMB3_CREDIT_EVENT(too_many_credits); DEFINE_SMB3_CREDIT_EVENT(add_credits); +DEFINE_SMB3_CREDIT_EVENT(set_credits); #endif /* _CIFS_TRACE_H */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 36b2ece43403..e9abb41aa89b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -527,6 +527,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, int *credits; int optype; long int t; + int scredits = server->credits; if (timeout < 0) t = MAX_JIFFY_OFFSET; @@ -624,12 +625,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, /* update # of requests on the wire to server */ if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) { *credits -= num_credits; + scredits = *credits; server->in_flight += num_credits; if (server->in_flight > server->max_in_flight) server->max_in_flight = server->in_flight; *instance = server->reconnect_instance; } spin_unlock(&server->req_lock); + + trace_smb3_add_credits(server->CurrentMid, + server->hostname, scredits, -(num_credits)); + cifs_dbg(FYI, "%s: remove %u credits total=%d\n", + __func__, num_credits, scredits); break; } } @@ -649,10 +656,14 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num, const int flags, unsigned int *instance) { int *credits; + int scredits, sin_flight; credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK); spin_lock(&server->req_lock); + scredits = *credits; + sin_flight = server->in_flight; + if (*credits < num) { /* * Return immediately if not too many requests in flight since @@ -660,6 +671,10 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num, */ if (server->in_flight < num - *credits) { spin_unlock(&server->req_lock); + trace_smb3_insufficient_credits(server->CurrentMid, + server->hostname, scredits, sin_flight); + cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n", + __func__, sin_flight, num, scredits); return -ENOTSUPP; } } diff --git a/fs/cifs/unc.c b/fs/cifs/unc.c new file mode 100644 index 000000000000..394aa00cea40 --- /dev/null +++ b/fs/cifs/unc.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020, Microsoft Corporation. + * + * Author(s): Steve French <[email protected]> + * Suresh Jayaraman <[email protected]> + * Jeff Layton <[email protected]> + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/inet.h> +#include <linux/ctype.h> +#include "cifsglob.h" +#include "cifsproto.h" + +/* extract the host portion of the UNC string */ +char *extract_hostname(const char *unc) +{ + const char *src; + char *dst, *delim; + unsigned int len; + + /* skip double chars at beginning of string */ + /* BB: check validity of these bytes? */ + if (strlen(unc) < 3) + return ERR_PTR(-EINVAL); + for (src = unc; *src && *src == '\\'; src++) + ; + if (!*src) + return ERR_PTR(-EINVAL); + + /* delimiter between hostname and sharename is always '\\' now */ + delim = strchr(src, '\\'); + if (!delim) + return ERR_PTR(-EINVAL); + + len = delim - src; + dst = kmalloc((len + 1), GFP_KERNEL); + if (dst == NULL) + return ERR_PTR(-ENOMEM); + + memcpy(dst, src, len); + dst[len] = '\0'; + + return dst; +} + +char *extract_sharename(const char *unc) +{ + const char *src; + char *delim, *dst; + int len; + + /* skip double chars at the beginning */ + src = unc + 2; + + /* share name is always preceded by '\\' now */ + delim = strchr(src, '\\'); + if (!delim) + return ERR_PTR(-EINVAL); + delim++; + len = strlen(delim); + + /* caller has to free the memory */ + dst = kstrndup(delim, len, GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + return dst; +} diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index b8299173ea7e..6b658a1172ef 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -34,6 +34,7 @@ #define MAX_EA_VALUE_SIZE CIFSMaxBufSize #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" /* DACL only */ #define CIFS_XATTR_CIFS_NTSD "system.cifs_ntsd" /* owner plus DACL */ +#define CIFS_XATTR_CIFS_NTSD_FULL "system.cifs_ntsd_full" /* owner/DACL/SACL */ #define CIFS_XATTR_ATTRIB "cifs.dosattrib" /* full name: user.cifs.dosattrib */ #define CIFS_XATTR_CREATETIME "cifs.creationtime" /* user.cifs.creationtime */ /* @@ -43,12 +44,13 @@ */ #define SMB3_XATTR_CIFS_ACL "system.smb3_acl" /* DACL only */ #define SMB3_XATTR_CIFS_NTSD "system.smb3_ntsd" /* owner plus DACL */ +#define SMB3_XATTR_CIFS_NTSD_FULL "system.smb3_ntsd_full" /* owner/DACL/SACL */ #define SMB3_XATTR_ATTRIB "smb3.dosattrib" /* full name: user.smb3.dosattrib */ #define SMB3_XATTR_CREATETIME "smb3.creationtime" /* user.smb3.creationtime */ /* BB need to add server (Samba e.g) support for security and trusted prefix */ enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT, - XATTR_CIFS_NTSD }; + XATTR_CIFS_NTSD, XATTR_CIFS_NTSD_FULL }; static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon, struct inode *inode, char *full_path, @@ -164,7 +166,8 @@ static int cifs_xattr_set(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: - case XATTR_CIFS_NTSD: { + case XATTR_CIFS_NTSD: + case XATTR_CIFS_NTSD_FULL: { struct cifs_ntsd *pacl; if (!value) @@ -174,23 +177,27 @@ static int cifs_xattr_set(const struct xattr_handler *handler, rc = -ENOMEM; } else { memcpy(pacl, value, size); - if (value && - pTcon->ses->server->ops->set_acl) { + if (pTcon->ses->server->ops->set_acl) { + int aclflags = 0; rc = 0; - if (handler->flags == XATTR_CIFS_NTSD) { - /* set owner and DACL */ - rc = pTcon->ses->server->ops->set_acl( - pacl, size, inode, - full_path, - CIFS_ACL_OWNER); - } - if (rc == 0) { - /* set DACL */ - rc = pTcon->ses->server->ops->set_acl( - pacl, size, inode, - full_path, - CIFS_ACL_DACL); + + switch (handler->flags) { + case XATTR_CIFS_NTSD_FULL: + aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_DACL | + CIFS_ACL_SACL); + break; + case XATTR_CIFS_NTSD: + aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_DACL); + break; + case XATTR_CIFS_ACL: + default: + aclflags = CIFS_ACL_DACL; } + + rc = pTcon->ses->server->ops->set_acl(pacl, + size, inode, full_path, aclflags); } else { rc = -EOPNOTSUPP; } @@ -327,16 +334,25 @@ static int cifs_xattr_get(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: - case XATTR_CIFS_NTSD: { - /* the whole ntsd is fetched regardless */ - u32 acllen; + case XATTR_CIFS_NTSD: + case XATTR_CIFS_NTSD_FULL: { + /* + * fetch owner, DACL, and SACL if asked for full descriptor, + * fetch owner and DACL otherwise + */ + u32 acllen, extra_info; struct cifs_ntsd *pacl; if (pTcon->ses->server->ops->get_acl == NULL) goto out; /* rc already EOPNOTSUPP */ + if (handler->flags == XATTR_CIFS_NTSD_FULL) { + extra_info = SACL_SECINFO; + } else { + extra_info = 0; + } pacl = pTcon->ses->server->ops->get_acl(cifs_sb, - inode, full_path, &acllen); + inode, full_path, &acllen, extra_info); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); cifs_dbg(VFS, "%s: error %zd getting sec desc\n", @@ -486,6 +502,27 @@ static const struct xattr_handler smb3_ntsd_xattr_handler = { .set = cifs_xattr_set, }; +static const struct xattr_handler cifs_cifs_ntsd_full_xattr_handler = { + .name = CIFS_XATTR_CIFS_NTSD_FULL, + .flags = XATTR_CIFS_NTSD_FULL, + .get = cifs_xattr_get, + .set = cifs_xattr_set, +}; + +/* + * Although this is just an alias for the above, need to move away from + * confusing users and using the 20 year old term 'cifs' when it is no + * longer secure and was replaced by SMB2/SMB3 a long time ago, and + * SMB3 and later are highly secure. + */ +static const struct xattr_handler smb3_ntsd_full_xattr_handler = { + .name = SMB3_XATTR_CIFS_NTSD_FULL, + .flags = XATTR_CIFS_NTSD_FULL, + .get = cifs_xattr_get, + .set = cifs_xattr_set, +}; + + static const struct xattr_handler cifs_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = XATTR_ACL_ACCESS, @@ -507,6 +544,8 @@ const struct xattr_handler *cifs_xattr_handlers[] = { &smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_cifs_ntsd_xattr_handler, &smb3_ntsd_xattr_handler, /* alias for above since avoiding "cifs" */ + &cifs_cifs_ntsd_full_xattr_handler, + &smb3_ntsd_full_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_posix_acl_access_xattr_handler, &cifs_posix_acl_default_xattr_handler, NULL diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index b0983e2a4e2c..b839dd1b459f 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -267,6 +267,7 @@ static void configfs_remove_dirent(struct dentry *dentry) * configfs_create_dir - create a directory for an config_item. * @item: config_itemwe're creating directory for. * @dentry: config_item's dentry. + * @frag: config_item's fragment. * * Note: user-created entries won't be allowed under this new directory * until it is validated by configfs_dir_set_ready() diff --git a/fs/dcache.c b/fs/dcache.c index ea0485861d93..97e81a844a96 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -793,10 +793,17 @@ static inline bool fast_dput(struct dentry *dentry) * a reference to the dentry and change that, but * our work is done - we can leave the dentry * around with a zero refcount. + * + * Nevertheless, there are two cases that we should kill + * the dentry anyway. + * 1. free disconnected dentries as soon as their refcount + * reached zero. + * 2. free dentries if they should not be cached. */ smp_rmb(); d_flags = READ_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | + DCACHE_DISCONNECTED | DCACHE_DONTCACHE; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) diff --git a/fs/eventfd.c b/fs/eventfd.c index df466ef81ddd..e265b6dd4f34 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -182,11 +182,14 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait) return events; } -static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) { + lockdep_assert_held(&ctx->wqh.lock); + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; ctx->count -= *cnt; } +EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); /** * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 10b81e69db74..a829af074eb5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -389,19 +389,24 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time) * * we must do our busy polling with irqs enabled */ -static void ep_busy_loop(struct eventpoll *ep, int nonblock) +static bool ep_busy_loop(struct eventpoll *ep, int nonblock) { unsigned int napi_id = READ_ONCE(ep->napi_id); - if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) + if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) { napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false, BUSY_POLL_BUDGET); -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) -{ - if (ep->napi_id) + if (ep_events_available(ep)) + return true; + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ ep->napi_id = 0; + return false; + } + return false; } /* @@ -441,12 +446,9 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) #else -static inline void ep_busy_loop(struct eventpoll *ep, int nonblock) -{ -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) +static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock) { + return false; } static inline void ep_set_busy_poll_napi_id(struct epitem *epi) @@ -1625,6 +1627,14 @@ static int ep_send_events(struct eventpoll *ep, poll_table pt; int res = 0; + /* + * Always short-circuit for fatal signals to allow threads to make a + * timely exit without the chance of finding more events available and + * fetching repeatedly. + */ + if (fatal_signal_pending(current)) + return -EINTR; + init_poll_funcptr(&pt, NULL); mutex_lock(&ep->mtx); @@ -1702,15 +1712,25 @@ static int ep_send_events(struct eventpoll *ep, return res; } -static inline struct timespec64 ep_set_mstimeout(long ms) +static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) { - struct timespec64 now, ts = { - .tv_sec = ms / MSEC_PER_SEC, - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), - }; + struct timespec64 now; + + if (ms < 0) + return NULL; + + if (!ms) { + to->tv_sec = 0; + to->tv_nsec = 0; + return to; + } + + to->tv_sec = ms / MSEC_PER_SEC; + to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC); ktime_get_ts64(&now); - return timespec64_add_safe(now, ts); + *to = timespec64_add_safe(now, *to); + return to; } /** @@ -1722,8 +1742,8 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in - * milliseconds. If the @timeout is zero, the function will not block, - * while if the @timeout is less than zero, the function will block + * timespec. If the timeout is zero, the function will not block, + * while if the @timeout ptr is NULL, the function will block * until at least one event has been retrieved (or an error * occurred). * @@ -1731,55 +1751,59 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout) + int maxevents, struct timespec64 *timeout) { - int res = 0, eavail, timed_out = 0; + int res, eavail, timed_out = 0; u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; lockdep_assert_irqs_enabled(); - if (timeout > 0) { - struct timespec64 end_time = ep_set_mstimeout(timeout); - - slack = select_estimate_accuracy(&end_time); + if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { + slack = select_estimate_accuracy(timeout); to = &expires; - *to = timespec64_to_ktime(end_time); - } else if (timeout == 0) { + *to = timespec64_to_ktime(*timeout); + } else if (timeout) { /* * Avoid the unnecessary trip to the wait queue loop, if the - * caller specified a non blocking operation. We still need - * lock because we could race and not see an epi being added - * to the ready list while in irq callback. Thus incorrectly - * returning 0 back to userspace. + * caller specified a non blocking operation. */ timed_out = 1; - - write_lock_irq(&ep->lock); - eavail = ep_events_available(ep); - write_unlock_irq(&ep->lock); - - goto send_events; } -fetch_events: + /* + * This call is racy: We may or may not see events that are being added + * to the ready list under the lock (e.g., in IRQ callbacks). For, cases + * with a non-zero timeout, this thread will check the ready list under + * lock and will added to the wait queue. For, cases with a zero + * timeout, the user by definition should not care and will have to + * recheck again. + */ + eavail = ep_events_available(ep); + + while (1) { + if (eavail) { + /* + * Try to transfer events to user space. In case we get + * 0 events and there's still timeout left over, we go + * trying again in search of more luck. + */ + res = ep_send_events(ep, events, maxevents); + if (res) + return res; + } - if (!ep_events_available(ep)) - ep_busy_loop(ep, timed_out); + if (timed_out) + return 0; - eavail = ep_events_available(ep); - if (eavail) - goto send_events; + eavail = ep_busy_loop(ep, timed_out); + if (eavail) + continue; - /* - * Busy poll timed out. Drop NAPI ID for now, we can add - * it back in when we have moved a socket with a valid NAPI - * ID onto the ready list. - */ - ep_reset_busy_poll_napi_id(ep); + if (signal_pending(current)) + return -EINTR; - do { /* * Internally init_wait() uses autoremove_wake_function(), * thus wait entry is removed from the wait queue on each @@ -1809,55 +1833,38 @@ fetch_events: * important. */ eavail = ep_events_available(ep); - if (!eavail) { - if (signal_pending(current)) - res = -EINTR; - else - __add_wait_queue_exclusive(&ep->wq, &wait); - } - write_unlock_irq(&ep->lock); - - if (eavail || res) - break; - - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { - timed_out = 1; - break; - } - - /* We were woken up, thus go and try to harvest some events */ - eavail = 1; - - } while (0); + if (!eavail) + __add_wait_queue_exclusive(&ep->wq, &wait); - __set_current_state(TASK_RUNNING); - - if (!list_empty_careful(&wait.entry)) { - write_lock_irq(&ep->lock); - __remove_wait_queue(&ep->wq, &wait); write_unlock_irq(&ep->lock); - } -send_events: - if (fatal_signal_pending(current)) { + if (!eavail) + timed_out = !schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS); + __set_current_state(TASK_RUNNING); + /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. + * We were woken up, thus go and try to harvest some events. + * If timed out and still on the wait queue, recheck eavail + * carefully under lock, below. */ - res = -EINTR; - } - /* - * Try to transfer events to user space. In case we get 0 events and - * there's still timeout left over, we go trying again in search of - * more luck. - */ - if (!res && eavail && - !(res = ep_send_events(ep, events, maxevents)) && !timed_out) - goto fetch_events; + eavail = 1; - return res; + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + /* + * If the thread timed out and is not on the wait queue, + * it means that the thread was woken up after its + * timeout expired before it could reacquire the lock. + * Thus, when wait.entry is empty, it needs to harvest + * events. + */ + if (timed_out) + eavail = list_empty(&wait.entry); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + } } /** @@ -2176,7 +2183,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) + int maxevents, struct timespec64 *to) { int error; struct fd f; @@ -2210,7 +2217,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, ep = f.file->private_data; /* Time to fish for events ... */ - error = ep_poll(ep, events, maxevents, timeout); + error = ep_poll(ep, events, maxevents, to); error_fput: fdput(f); @@ -2220,16 +2227,19 @@ error_fput: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - return do_epoll_wait(epfd, events, maxevents, timeout); + struct timespec64 to; + + return do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ -SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, - int, maxevents, int, timeout, const sigset_t __user *, sigmask, - size_t, sigsetsize) +static int do_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *to, + const sigset_t __user *sigmask, size_t sigsetsize) { int error; @@ -2241,18 +2251,47 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, if (error) return error; - error = do_epoll_wait(epfd, events, maxevents, timeout); + error = do_epoll_wait(epfd, events, maxevents, to); + restore_saved_sigmask_unless(error == -EINTR); return error; } +SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) +{ + struct timespec64 to; + + return do_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, + int, maxevents, const struct __kernel_timespec __user *, timeout, + const sigset_t __user *, sigmask, size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, - struct epoll_event __user *, events, - int, maxevents, int, timeout, - const compat_sigset_t __user *, sigmask, - compat_size_t, sigsetsize) +static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) { long err; @@ -2265,10 +2304,46 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, return err; err = do_epoll_wait(epfd, events, maxevents, timeout); + restore_saved_sigmask_unless(err == -EINTR); return err; } + +COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, + struct epoll_event __user *, events, + int, maxevents, int, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 to; + + return do_compat_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd, + struct epoll_event __user *, events, + int, maxevents, + const struct __kernel_timespec __user *, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_compat_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + #endif static int __init eventpoll_init(void) diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 675d0e7058c5..314d5407a1be 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -659,7 +659,7 @@ static int exfat_load_upcase_table(struct super_block *sb, unsigned char skip = false; unsigned short *upcase_table; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -715,7 +715,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb) unsigned short uni = 0, *upcase_table; unsigned int index = 0; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -803,5 +803,5 @@ load_default: void exfat_free_upcase_table(struct exfat_sb_info *sbi) { - kfree(sbi->vol_utbl); + kvfree(sbi->vol_utbl); } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1d640b145637..f45f9feebe59 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -185,7 +185,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start, tmp; - J_ASSERT_BH(bh, buffer_locked(bh)); + ASSERT(buffer_locked(bh)); /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 8e6ca23ed172..4666b55b736e 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -176,12 +176,10 @@ static int ext4_protect_reserved_inode(struct super_block *sb, err = add_system_zone(system_blks, map.m_pblk, n, ino); if (err < 0) { if (err == -EFSCORRUPTED) { - __ext4_error(sb, __func__, __LINE__, - -err, map.m_pblk, - "blocks %llu-%llu from inode %u overlap system zone", - map.m_pblk, - map.m_pblk + map.m_len - 1, - ino); + EXT4_ERROR_INODE_ERR(inode, -err, + "blocks %llu-%llu from inode overlap system zone", + map.m_pblk, + map.m_pblk + map.m_len - 1); } break; } @@ -206,7 +204,7 @@ static void ext4_destroy_system_zone(struct rcu_head *rcu) * * The update of system_blks pointer in this function is protected by * sb->s_umount semaphore. However we have to be careful as we can be - * racing with ext4_data_block_valid() calls reading system_blks rbtree + * racing with ext4_inode_block_valid() calls reading system_blks rbtree * protected only by RCU. That's why we first build the rbtree and then * swap it in place. */ @@ -258,7 +256,7 @@ int ext4_setup_system_zone(struct super_block *sb) /* * System blks rbtree complete, announce it once to prevent racing - * with ext4_data_block_valid() accessing the rbtree at the same + * with ext4_inode_block_valid() accessing the rbtree at the same * time. */ rcu_assign_pointer(sbi->s_system_blks, system_blks); @@ -278,7 +276,7 @@ err: * * The update of system_blks pointer in this function is protected by * sb->s_umount semaphore. However we have to be careful as we can be - * racing with ext4_data_block_valid() calls reading system_blks rbtree + * racing with ext4_inode_block_valid() calls reading system_blks rbtree * protected only by RCU. So we first clear the system_blks pointer and * then free the rbtree only after RCU grace period expires. */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c64ea8f59ea7..2866d249f3d2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -98,6 +98,16 @@ #define ext_debug(ino, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif +#define ASSERT(assert) \ +do { \ + if (unlikely(!(assert))) { \ + printk(KERN_EMERG \ + "Assertion failure in %s() at %s:%d: '%s'\n", \ + __func__, __FILE__, __LINE__, #assert); \ + BUG(); \ + } \ +} while (0) + /* data type for block offset of block group */ typedef int ext4_grpblk_t; @@ -1619,6 +1629,27 @@ struct ext4_sb_info { errseq_t s_bdev_wb_err; spinlock_t s_bdev_wb_lock; + /* Information about errors that happened during this mount */ + spinlock_t s_error_lock; + int s_add_error_count; + int s_first_error_code; + __u32 s_first_error_line; + __u32 s_first_error_ino; + __u64 s_first_error_block; + const char *s_first_error_func; + time64_t s_first_error_time; + int s_last_error_code; + __u32 s_last_error_line; + __u32 s_last_error_ino; + __u64 s_last_error_block; + const char *s_last_error_func; + time64_t s_last_error_time; + /* + * If we are in a context where we cannot update error information in + * the on-disk superblock, we queue this work to do it. + */ + struct work_struct s_error_work; + /* Ext4 fast commit stuff */ atomic_t s_fc_subtid; atomic_t s_fc_ineligible_updates; @@ -1858,7 +1889,6 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ #define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ -#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV #define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV #define EXT4_GOOD_OLD_INODE_SIZE 128 @@ -2952,9 +2982,9 @@ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, ext4_group_t block_group, unsigned int flags); -extern __printf(6, 7) -void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64, - const char *, ...); +extern __printf(7, 8) +void __ext4_error(struct super_block *, const char *, unsigned int, bool, + int, __u64, const char *, ...); extern __printf(6, 7) void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, int, const char *, ...); @@ -2963,9 +2993,6 @@ void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, const char *, ...); extern void __ext4_std_error(struct super_block *, const char *, unsigned int, int); -extern __printf(5, 6) -void __ext4_abort(struct super_block *, const char *, unsigned int, int, - const char *, ...); extern __printf(4, 5) void __ext4_warning(struct super_block *, const char *, unsigned int, const char *, ...); @@ -2995,6 +3022,9 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define EXT4_ERROR_FILE(file, block, fmt, a...) \ ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) +#define ext4_abort(sb, err, fmt, a...) \ + __ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a) + #ifdef CONFIG_PRINTK #define ext4_error_inode(inode, func, line, block, fmt, ...) \ @@ -3005,11 +3035,11 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define ext4_error_file(file, func, line, block, fmt, ...) \ __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) #define ext4_error(sb, fmt, ...) \ - __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__) + __ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \ + ##__VA_ARGS__) #define ext4_error_err(sb, err, fmt, ...) \ - __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__) -#define ext4_abort(sb, err, fmt, ...) \ - __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__) + __ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \ + ##__VA_ARGS__) #define ext4_warning(sb, fmt, ...) \ __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) #define ext4_warning_inode(inode, fmt, ...) \ @@ -3042,17 +3072,12 @@ do { \ #define ext4_error(sb, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error(sb, "", 0, 0, 0, " "); \ + __ext4_error(sb, "", 0, false, 0, 0, " "); \ } while (0) #define ext4_error_err(sb, err, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error(sb, "", 0, err, 0, " "); \ -} while (0) -#define ext4_abort(sb, err, fmt, ...) \ -do { \ - no_printk(fmt, ##__VA_ARGS__); \ - __ext4_abort(sb, "", 0, err, " "); \ + __ext4_error(sb, "", 0, false, err, 0, " "); \ } while (0) #define ext4_warning(sb, fmt, ...) \ do { \ @@ -3361,6 +3386,21 @@ static inline void ext4_unlock_group(struct super_block *sb, spin_unlock(ext4_group_lock_ptr(sb, group)); } +#ifdef CONFIG_QUOTA +static inline bool ext4_quota_capable(struct super_block *sb) +{ + return (test_opt(sb, QUOTA) || ext4_has_feature_quota(sb)); +} + +static inline bool ext4_is_quota_journalled(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + return (ext4_has_feature_quota(sb) || + sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]); +} +#endif + /* * Block validity checking */ @@ -3609,7 +3649,6 @@ extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc, bool keep_towrite); extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end); extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0fd0c42a4f7d..1a0a827a7f34 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -296,8 +296,8 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); - __ext4_abort(inode->i_sb, where, line, -err, - "error %d when attempting revoke", err); + __ext4_error(inode->i_sb, where, line, true, -err, 0, + "error %d when attempting revoke", err); } BUFFER_TRACE(bh, "exit"); return err; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 00dc668e052b..a124c68b0c75 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -86,17 +86,14 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ? 1 : 0) +#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((ext4_quota_capable(sb)) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ -#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ?\ +#define EXT4_QUOTA_INIT_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_INIT_REWRITE) : 0) -#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ?\ +#define EXT4_QUOTA_DEL_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_DEL_REWRITE) : 0) #else diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 17d7096b3212..3960b7ec3ab7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5815,8 +5815,8 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, int ret; path = ext4_find_extent(inode, start, NULL, 0); - if (!path) - return -EINVAL; + if (IS_ERR(path)) + return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) { ret = -EFSCORRUPTED; @@ -5988,7 +5988,6 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) kfree(path); break; } - ex = path2[path2->p_depth].p_ext; for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { cmp1 = cmp2 = 0; if (i <= path->p_depth) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f2033e13a273..4fcc21c25e79 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -103,8 +103,69 @@ * * Replay code should thus check for all the valid tails in the FC area. * + * Fast Commit Replay Idempotence + * ------------------------------ + * + * Fast commits tags are idempotent in nature provided the recovery code follows + * certain rules. The guiding principle that the commit path follows while + * committing is that it stores the result of a particular operation instead of + * storing the procedure. + * + * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' + * was associated with inode 10. During fast commit, instead of storing this + * operation as a procedure "rename a to b", we store the resulting file system + * state as a "series" of outcomes: + * + * - Link dirent b to inode 10 + * - Unlink dirent a + * - Inode <10> with valid refcount + * + * Now when recovery code runs, it needs "enforce" this state on the file + * system. This is what guarantees idempotence of fast commit replay. + * + * Let's take an example of a procedure that is not idempotent and see how fast + * commits make it idempotent. Consider following sequence of operations: + * + * rm A; mv B A; read A + * (x) (y) (z) + * + * (x), (y) and (z) are the points at which we can crash. If we store this + * sequence of operations as is then the replay is not idempotent. Let's say + * while in replay, we crash at (z). During the second replay, file A (which was + * actually created as a result of "mv B A" operation) would get deleted. Thus, + * file named A would be absent when we try to read A. So, this sequence of + * operations is not idempotent. However, as mentioned above, instead of storing + * the procedure fast commits store the outcome of each procedure. Thus the fast + * commit log for above procedure would be as follows: + * + * (Let's assume dirent A was linked to inode 10 and dirent B was linked to + * inode 11 before the replay) + * + * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] + * (w) (x) (y) (z) + * + * If we crash at (z), we will have file A linked to inode 11. During the second + * replay, we will remove file A (inode 11). But we will create it back and make + * it point to inode 11. We won't find B, so we'll just skip that step. At this + * point, the refcount for inode 11 is not reliable, but that gets fixed by the + * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled + * similarly. Thus, by converting a non-idempotent procedure into a series of + * idempotent outcomes, fast commits ensured idempotence during the replay. + * * TODOs * ----- + * + * 0) Fast commit replay path hardening: Fast commit replay code should use + * journal handles to make sure all the updates it does during the replay + * path are atomic. With that if we crash during fast commit replay, after + * trying to do recovery again, we will find a file system where fast commit + * area is invalid (because new full commit would be found). In order to deal + * with that, fast commit replay code should ensure that the "FC_REPLAY" + * superblock state is persisted before starting the replay, so that after + * the crash, fast commit recovery code can look at that flag and perform + * fast commit recovery even if that area is invalidated by later full + * commits. + * * 1) Make fast commit atomic updates more fine grained. Today, a fast commit * eligible update must be protected within ext4_fc_start_update() and * ext4_fc_stop_update(). These routines are called at much higher @@ -1220,18 +1281,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full) /* Ext4 Replay Path Routines */ -/* Get length of a particular tlv */ -static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) -{ - return le16_to_cpu(tl->fc_len); -} - -/* Get a pointer to "value" of a tlv */ -static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) -{ - return (u8 *)tl + sizeof(*tl); -} - /* Helper struct for dentry replay routines */ struct dentry_info_args { int parent_ino, dname_len, ino, inode_len; @@ -1770,32 +1819,6 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) return 0; } -static inline const char *tag2str(u16 tag) -{ - switch (tag) { - case EXT4_FC_TAG_LINK: - return "TAG_ADD_ENTRY"; - case EXT4_FC_TAG_UNLINK: - return "TAG_DEL_ENTRY"; - case EXT4_FC_TAG_ADD_RANGE: - return "TAG_ADD_RANGE"; - case EXT4_FC_TAG_CREAT: - return "TAG_CREAT_DENTRY"; - case EXT4_FC_TAG_DEL_RANGE: - return "TAG_DEL_RANGE"; - case EXT4_FC_TAG_INODE: - return "TAG_INODE"; - case EXT4_FC_TAG_PAD: - return "TAG_PAD"; - case EXT4_FC_TAG_TAIL: - return "TAG_TAIL"; - case EXT4_FC_TAG_HEAD: - return "TAG_HEAD"; - default: - return "TAG_ERROR"; - } -} - static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) { struct ext4_fc_replay_state *state; diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 3a6e5a1fa1b8..b77f70f55a62 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -3,6 +3,11 @@ #ifndef __FAST_COMMIT_H__ #define __FAST_COMMIT_H__ +/* + * Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and + * linux/fs/ext4/fast_commit.h. These file should always be byte identical. + */ + /* Fast commit tags */ #define EXT4_FC_TAG_ADD_RANGE 0x0001 #define EXT4_FC_TAG_DEL_RANGE 0x0002 @@ -50,7 +55,7 @@ struct ext4_fc_del_range { struct ext4_fc_dentry_info { __le32 fc_parent_ino; __le32 fc_ino; - u8 fc_dname[0]; + __u8 fc_dname[0]; }; /* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */ @@ -66,19 +71,6 @@ struct ext4_fc_tail { }; /* - * In memory list of dentry updates that are performed on the file - * system used by fast commit code. - */ -struct ext4_fc_dentry_update { - int fcd_op; /* Type of update create / unlink / link */ - int fcd_parent; /* Parent inode number */ - int fcd_ino; /* Inode number */ - struct qstr fcd_name; /* Dirent name */ - unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ - struct list_head fcd_list; -}; - -/* * Fast commit reason codes */ enum { @@ -107,6 +99,20 @@ enum { EXT4_FC_REASON_MAX }; +#ifdef __KERNEL__ +/* + * In memory list of dentry updates that are performed on the file + * system used by fast commit code. + */ +struct ext4_fc_dentry_update { + int fcd_op; /* Type of update create / unlink / link */ + int fcd_parent; /* Parent inode number */ + int fcd_ino; /* Inode number */ + struct qstr fcd_name; /* Dirent name */ + unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ + struct list_head fcd_list; +}; + struct ext4_fc_stats { unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX]; unsigned long fc_num_commits; @@ -145,13 +151,51 @@ struct ext4_fc_replay_state { }; #define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1) +#endif #define fc_for_each_tl(__start, __end, __tl) \ - for (tl = (struct ext4_fc_tl *)start; \ - (u8 *)tl < (u8 *)end; \ - tl = (struct ext4_fc_tl *)((u8 *)tl + \ + for (tl = (struct ext4_fc_tl *)(__start); \ + (__u8 *)tl < (__u8 *)(__end); \ + tl = (struct ext4_fc_tl *)((__u8 *)tl + \ sizeof(struct ext4_fc_tl) + \ + le16_to_cpu(tl->fc_len))) +static inline const char *tag2str(__u16 tag) +{ + switch (tag) { + case EXT4_FC_TAG_LINK: + return "ADD_ENTRY"; + case EXT4_FC_TAG_UNLINK: + return "DEL_ENTRY"; + case EXT4_FC_TAG_ADD_RANGE: + return "ADD_RANGE"; + case EXT4_FC_TAG_CREAT: + return "CREAT_DENTRY"; + case EXT4_FC_TAG_DEL_RANGE: + return "DEL_RANGE"; + case EXT4_FC_TAG_INODE: + return "INODE"; + case EXT4_FC_TAG_PAD: + return "PAD"; + case EXT4_FC_TAG_TAIL: + return "TAIL"; + case EXT4_FC_TAG_HEAD: + return "HEAD"; + default: + return "ERROR"; + } +} + +/* Get length of a particular tlv */ +static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) +{ + return le16_to_cpu(tl->fc_len); +} + +/* Get a pointer to "value" of a tlv */ +static inline __u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) +{ + return (__u8 *)tl + sizeof(*tl); +} #endif /* __FAST_COMMIT_H__ */ diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a42ca95840f2..113bfb023a4a 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -136,7 +136,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (unlikely(ext4_forced_shutdown(sbi))) return -EIO; - J_ASSERT(ext4_journal_current_handle() == NULL); + ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file_enter(file, datasync); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 05efa682bc2f..1223a18c3ff9 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -534,8 +534,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t first_block = 0; trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); - J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); - J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); + ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); + ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0d8385aea898..27946882d4ce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -175,6 +175,7 @@ void ext4_evict_inode(struct inode *inode) */ int extra_credits = 6; struct ext4_xattr_inode_array *ea_inode_array = NULL; + bool freeze_protected = false; trace_ext4_evict_inode(inode); @@ -232,9 +233,14 @@ void ext4_evict_inode(struct inode *inode) /* * Protect us against freezing - iput() caller didn't have to have any - * protection against it + * protection against it. When we are in a running transaction though, + * we are already protected against freezing and we cannot grab further + * protection due to lock ordering constraints. */ - sb_start_intwrite(inode->i_sb); + if (!ext4_journal_current_handle()) { + sb_start_intwrite(inode->i_sb); + freeze_protected = true; + } if (!IS_NOQUOTA(inode)) extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); @@ -253,7 +259,8 @@ void ext4_evict_inode(struct inode *inode) * cleaned up. */ ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); goto no_delete; } @@ -294,7 +301,8 @@ void ext4_evict_inode(struct inode *inode) stop_handle: ext4_journal_stop(handle); ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); goto no_delete; } @@ -323,7 +331,8 @@ stop_handle: else ext4_free_inode(handle, inode); ext4_journal_stop(handle); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); return; no_delete: @@ -830,8 +839,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, int create = map_flags & EXT4_GET_BLOCKS_CREATE; int err; - J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - || handle != NULL || create == 0); + ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + || handle != NULL || create == 0); map.m_lblk = block; map.m_len = 1; @@ -846,9 +855,9 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, if (unlikely(!bh)) return ERR_PTR(-ENOMEM); if (map.m_flags & EXT4_MAP_NEW) { - J_ASSERT(create != 0); - J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - || (handle != NULL)); + ASSERT(create != 0); + ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + || (handle != NULL)); /* * Now that we do not always journal data, we should @@ -2055,7 +2064,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); + ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -2089,7 +2098,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) len = size & ~PAGE_MASK; else len = PAGE_SIZE; - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); + err = ext4_bio_write_page(&mpd->io_submit, page, len, false); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; @@ -4610,7 +4619,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { if (flags & EXT4_IGET_HANDLE) return ERR_PTR(-ESTALE); - __ext4_error(sb, function, line, EFSCORRUPTED, 0, + __ext4_error(sb, function, line, false, EFSCORRUPTED, 0, "inode #%lu: comm %s: iget: illegal inode #", ino, current->comm); return ERR_PTR(-EFSCORRUPTED); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 24af9ed5c3e5..99bf091fee10 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -822,24 +822,6 @@ void ext4_mb_generate_buddy(struct super_block *sb, spin_unlock(&sbi->s_bal_lock); } -static void mb_regenerate_buddy(struct ext4_buddy *e4b) -{ - int count; - int order = 1; - void *buddy; - - while ((buddy = mb_find_buddy(e4b, order++, &count))) { - ext4_set_bits(buddy, 0, count); - } - e4b->bd_info->bb_fragments = 0; - memset(e4b->bd_info->bb_counters, 0, - sizeof(*e4b->bd_info->bb_counters) * - (e4b->bd_sb->s_blocksize_bits + 2)); - - ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, - e4b->bd_bitmap, e4b->bd_group); -} - /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1307,22 +1289,18 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) { - int order = 1; - int bb_incr = 1 << (e4b->bd_blkbits - 1); + int order = 1, max; void *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); - bb = e4b->bd_buddy; while (order <= e4b->bd_blkbits + 1) { - block = block >> 1; - if (!mb_test_bit(block, bb)) { + bb = mb_find_buddy(e4b, order, &max); + if (!mb_test_bit(block >> order, bb)) { /* this block is part of buddy of order 'order' */ return order; } - bb += bb_incr; - bb_incr >>= 1; order++; } return 0; @@ -1512,7 +1490,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); } - mb_regenerate_buddy(e4b); goto done; } @@ -2395,9 +2372,9 @@ repeat: nr = sbi->s_mb_prefetch; if (ext4_has_feature_flex_bg(sb)) { - nr = (group / sbi->s_mb_prefetch) * - sbi->s_mb_prefetch; - nr = nr + sbi->s_mb_prefetch - group; + nr = 1 << sbi->s_log_groups_per_flex; + nr -= group & (nr - 1); + nr = min(nr, sbi->s_mb_prefetch); } prefetch_grp = ext4_mb_prefetch(sb, group, nr, &prefetch_ios); @@ -2733,7 +2710,8 @@ static int ext4_mb_init_backend(struct super_block *sb) if (ext4_has_feature_flex_bg(sb)) { /* a single flex group is supposed to be read by a single IO */ - sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; + sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex, + BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { sbi->s_mb_prefetch = 32; @@ -5126,6 +5104,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, cluster), "Block already on to-be-freed list"); + kmem_cache_free(ext4_free_data_cachep, new_entry); return 0; } } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 326fe402e495..b17a082b7db1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -182,10 +182,6 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } -#ifndef assert -#define assert(test) J_ASSERT(test) -#endif - #ifdef DX_DEBUG #define dxtrace(command) command #else @@ -843,7 +839,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, break; } } - assert (at == p - 1); + ASSERT(at == p - 1); } at = p - 1; @@ -1259,8 +1255,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) struct dx_entry *old = frame->at, *new = old + 1; int count = dx_get_count(entries); - assert(count < dx_get_limit(entries)); - assert(old < entries + count); + ASSERT(count < dx_get_limit(entries)); + ASSERT(old < entries + count); memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); dx_set_hash(new, hash); dx_set_block(new, block); @@ -2959,7 +2955,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * hold i_mutex, or the inode can not be referenced from outside, * so i_nlink should not be bumped due to race */ - J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index defd2e10dfd1..03a44a0de86a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -111,9 +111,6 @@ static void ext4_finish_bio(struct bio *bio) unsigned under_io = 0; unsigned long flags; - if (!page) - continue; - if (fscrypt_is_bounce_page(page)) { bounce_page = page; page = fscrypt_pagecache_page(bounce_page); @@ -438,7 +435,6 @@ submit_and_retry: int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc, bool keep_towrite) { struct page *bounce_page = NULL; @@ -448,6 +444,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, int ret = 0; int nr_submitted = 0; int nr_to_submit = 0; + struct writeback_control *wbc = io->io_wbc; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 830c196ec069..21121787c874 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -404,10 +404,8 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); } -static void __ext4_update_tstamp(__le32 *lo, __u8 *hi) +static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now) { - time64_t now = ktime_get_real_seconds(); - now = clamp_val(now, 0, (1ull << 40) - 1); *lo = cpu_to_le32(lower_32_bits(now)); @@ -419,108 +417,11 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo); } #define ext4_update_tstamp(es, tstamp) \ - __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) + __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \ + ktime_get_real_seconds()) #define ext4_get_tstamp(es, tstamp) \ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) -static void __save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - int err; - - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - if (bdev_read_only(sb->s_bdev)) - return; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_update_tstamp(es, s_last_error_time); - strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); - es->s_last_error_line = cpu_to_le32(line); - es->s_last_error_ino = cpu_to_le32(ino); - es->s_last_error_block = cpu_to_le64(block); - switch (error) { - case EIO: - err = EXT4_ERR_EIO; - break; - case ENOMEM: - err = EXT4_ERR_ENOMEM; - break; - case EFSBADCRC: - err = EXT4_ERR_EFSBADCRC; - break; - case 0: - case EFSCORRUPTED: - err = EXT4_ERR_EFSCORRUPTED; - break; - case ENOSPC: - err = EXT4_ERR_ENOSPC; - break; - case ENOKEY: - err = EXT4_ERR_ENOKEY; - break; - case EROFS: - err = EXT4_ERR_EROFS; - break; - case EFBIG: - err = EXT4_ERR_EFBIG; - break; - case EEXIST: - err = EXT4_ERR_EEXIST; - break; - case ERANGE: - err = EXT4_ERR_ERANGE; - break; - case EOVERFLOW: - err = EXT4_ERR_EOVERFLOW; - break; - case EBUSY: - err = EXT4_ERR_EBUSY; - break; - case ENOTDIR: - err = EXT4_ERR_ENOTDIR; - break; - case ENOTEMPTY: - err = EXT4_ERR_ENOTEMPTY; - break; - case ESHUTDOWN: - err = EXT4_ERR_ESHUTDOWN; - break; - case EFAULT: - err = EXT4_ERR_EFAULT; - break; - default: - err = EXT4_ERR_UNKNOWN; - } - es->s_last_error_errcode = err; - if (!es->s_first_error_time) { - es->s_first_error_time = es->s_last_error_time; - es->s_first_error_time_hi = es->s_last_error_time_hi; - strncpy(es->s_first_error_func, func, - sizeof(es->s_first_error_func)); - es->s_first_error_line = cpu_to_le32(line); - es->s_first_error_ino = es->s_last_error_ino; - es->s_first_error_block = es->s_last_error_block; - es->s_first_error_errcode = es->s_last_error_errcode; - } - /* - * Start the daily error reporting function if it hasn't been - * started already - */ - if (!es->s_error_count) - mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); - le32_add_cpu(&es->s_error_count, 1); -} - -static void save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - __save_error_info(sb, error, ino, block, func, line); - if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb, 1); -} - /* * The del_gendisk() function uninitializes the disk-specific data * structures, including the bdi structure, without telling anyone @@ -649,6 +550,83 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } +struct ext4_err_translation { + int code; + int errno; +}; + +#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err } + +static struct ext4_err_translation err_translation[] = { + EXT4_ERR_TRANSLATE(EIO), + EXT4_ERR_TRANSLATE(ENOMEM), + EXT4_ERR_TRANSLATE(EFSBADCRC), + EXT4_ERR_TRANSLATE(EFSCORRUPTED), + EXT4_ERR_TRANSLATE(ENOSPC), + EXT4_ERR_TRANSLATE(ENOKEY), + EXT4_ERR_TRANSLATE(EROFS), + EXT4_ERR_TRANSLATE(EFBIG), + EXT4_ERR_TRANSLATE(EEXIST), + EXT4_ERR_TRANSLATE(ERANGE), + EXT4_ERR_TRANSLATE(EOVERFLOW), + EXT4_ERR_TRANSLATE(EBUSY), + EXT4_ERR_TRANSLATE(ENOTDIR), + EXT4_ERR_TRANSLATE(ENOTEMPTY), + EXT4_ERR_TRANSLATE(ESHUTDOWN), + EXT4_ERR_TRANSLATE(EFAULT), +}; + +static int ext4_errno_to_code(int errno) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(err_translation); i++) + if (err_translation[i].errno == errno) + return err_translation[i].code; + return EXT4_ERR_UNKNOWN; +} + +static void __save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + if (bdev_read_only(sb->s_bdev)) + return; + /* We default to EFSCORRUPTED error... */ + if (error == 0) + error = EFSCORRUPTED; + + spin_lock(&sbi->s_error_lock); + sbi->s_add_error_count++; + sbi->s_last_error_code = error; + sbi->s_last_error_line = line; + sbi->s_last_error_ino = ino; + sbi->s_last_error_block = block; + sbi->s_last_error_func = func; + sbi->s_last_error_time = ktime_get_real_seconds(); + if (!sbi->s_first_error_time) { + sbi->s_first_error_code = error; + sbi->s_first_error_line = line; + sbi->s_first_error_ino = ino; + sbi->s_first_error_block = block; + sbi->s_first_error_func = func; + sbi->s_first_error_time = sbi->s_last_error_time; + } + spin_unlock(&sbi->s_error_lock); +} + +static void save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) +{ + __save_error_info(sb, error, ino, block, func, line); + if (!bdev_read_only(sb->s_bdev)) + ext4_commit_super(sb, 1); +} + /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -662,40 +640,50 @@ static bool system_going_down(void) * We'll just use the jbd2_journal_abort() error code to record an error in * the journal instead. On recovery, the journal will complain about * that error until we've noted it down and cleared it. + * + * If force_ro is set, we unconditionally force the filesystem into an + * ABORT|READONLY state, unless the error response on the fs has been set to + * panic in which case we take the easy way out and panic immediately. This is + * used to deal with unrecoverable failures such as journal IO errors or ENOMEM + * at a critical moment in log management. */ - -static void ext4_handle_error(struct super_block *sb) +static void ext4_handle_error(struct super_block *sb, bool force_ro) { + journal_t *journal = EXT4_SB(sb)->s_journal; + if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (sb_rdonly(sb)) + if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) return; - if (!test_opt(sb, ERRORS_CONT)) { - journal_t *journal = EXT4_SB(sb)->s_journal; - - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (journal) - jbd2_journal_abort(journal, -EIO); - } + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + if (journal) + jbd2_journal_abort(journal, -EIO); /* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already * disabled. */ - if (test_opt(sb, ERRORS_RO) || system_going_down()) { - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); - /* - * Make sure updated value of ->s_mount_flags will be visible - * before ->s_flags update - */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; - } else if (test_opt(sb, ERRORS_PANIC)) { + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); } + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); + /* + * Make sure updated value of ->s_mount_flags will be visible before + * ->s_flags update + */ + smp_wmb(); + sb->s_flags |= SB_RDONLY; +} + +static void flush_stashed_error_work(struct work_struct *work) +{ + struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, + s_error_work); + + ext4_commit_super(sbi->s_sb, 1); } #define ext4_error_ratelimit(sb) \ @@ -703,7 +691,7 @@ static void ext4_handle_error(struct super_block *sb) "EXT4-fs error") void __ext4_error(struct super_block *sb, const char *function, - unsigned int line, int error, __u64 block, + unsigned int line, bool force_ro, int error, __u64 block, const char *fmt, ...) { struct va_format vaf; @@ -723,7 +711,7 @@ void __ext4_error(struct super_block *sb, const char *function, va_end(args); } save_error_info(sb, error, 0, block, function, line); - ext4_handle_error(sb); + ext4_handle_error(sb, force_ro); } void __ext4_error_inode(struct inode *inode, const char *function, @@ -755,7 +743,7 @@ void __ext4_error_inode(struct inode *inode, const char *function, } save_error_info(inode->i_sb, error, inode->i_ino, block, function, line); - ext4_handle_error(inode->i_sb); + ext4_handle_error(inode->i_sb, false); } void __ext4_error_file(struct file *file, const char *function, @@ -794,7 +782,7 @@ void __ext4_error_file(struct file *file, const char *function, } save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block, function, line); - ext4_handle_error(inode->i_sb); + ext4_handle_error(inode->i_sb, false); } const char *ext4_decode_error(struct super_block *sb, int errno, @@ -862,51 +850,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, } save_error_info(sb, -errno, 0, 0, function, line); - ext4_handle_error(sb); -} - -/* - * ext4_abort is a much stronger failure handler than ext4_error. The - * abort function may be used to deal with unrecoverable failures such - * as journal IO errors or ENOMEM at a critical moment in log management. - * - * We unconditionally force the filesystem into an ABORT|READONLY state, - * unless the error response on the fs has been set to panic in which - * case we take the easy way out and panic immediately. - */ - -void __ext4_abort(struct super_block *sb, const char *function, - unsigned int line, int error, const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) - return; - - save_error_info(sb, error, 0, 0, function, line); - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n", - sb->s_id, function, line, &vaf); - va_end(args); - - if (sb_rdonly(sb) == 0) { - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (EXT4_SB(sb)->s_journal) - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); - - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); - /* - * Make sure updated value of ->s_mount_flags will be visible - * before ->s_flags update - */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; - } - if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) - panic("EXT4-fs panic from previous error\n"); + ext4_handle_error(sb, false); } void __ext4_msg(struct super_block *sb, @@ -982,8 +926,6 @@ __acquires(bitlock) return; trace_ext4_error(sb, function, line); - __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - if (ext4_error_ratelimit(sb)) { va_start(args, fmt); vaf.fmt = fmt; @@ -999,17 +941,16 @@ __acquires(bitlock) va_end(args); } - if (test_opt(sb, WARN_ON_ERROR)) - WARN_ON_ONCE(1); - if (test_opt(sb, ERRORS_CONT)) { - ext4_commit_super(sb, 0); + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); + schedule_work(&EXT4_SB(sb)->s_error_work); return; } - ext4_unlock_group(sb, grp); - ext4_commit_super(sb, 1); - ext4_handle_error(sb); + save_error_info(sb, EFSCORRUPTED, ino, block, function, line); + ext4_handle_error(sb, false); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the @@ -1181,6 +1122,7 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); + flush_work(&sbi->s_error_work); destroy_workqueue(sbi->rsv_conversion_wq); /* @@ -1240,7 +1182,7 @@ static void ext4_put_super(struct super_block *sb) * in-memory list had better be clean by this point. */ if (!list_empty(&sbi->s_orphan)) dump_orphan_list(sb, sbi); - J_ASSERT(list_empty(&sbi->s_orphan)); + ASSERT(list_empty(&sbi->s_orphan)); sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); @@ -4005,6 +3947,21 @@ static void ext4_set_resv_clusters(struct super_block *sb) atomic64_set(&sbi->s_resv_clusters, resv_clusters); } +static const char *ext4_quota_mode(struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + if (!ext4_quota_capable(sb)) + return "none"; + + if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb)) + return "journalled"; + else + return "writeback"; +#else + return "disabled"; +#endif +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); @@ -4073,7 +4030,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(bh)) { ext4_msg(sb, KERN_ERR, "unable to read superblock"); ret = PTR_ERR(bh); - bh = NULL; goto out_fail; } /* @@ -4187,19 +4143,26 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - - if (blocksize == PAGE_SIZE) - set_opt(sb, DIOREAD_NOLOCK); - - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); goto failed_mount; } + blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + + if (blocksize == PAGE_SIZE) + set_opt(sb, DIOREAD_NOLOCK); + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; @@ -4417,21 +4380,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - if (le32_to_cpu(es->s_log_block_size) > - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log block size: %u", - le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } - if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, "Number of reserved GDT blocks insanely large: %d", @@ -4702,7 +4650,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "can't read group descriptor %d", i); db_count = i; ret = PTR_ERR(bh); - bh = NULL; goto failed_mount2; } rcu_read_lock(); @@ -4717,6 +4664,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } timer_setup(&sbi->s_err_report, print_daily_error_info, 0); + spin_lock_init(&sbi->s_error_lock); + INIT_WORK(&sbi->s_error_work, flush_stashed_error_work); /* Register extent status tree shrinker */ if (ext4_es_register_shrinker(sbi)) @@ -4872,6 +4821,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "requested data journaling mode"); goto failed_mount_wq; } + break; default: break; } @@ -5000,13 +4950,11 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); - ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeclusters_counter, block, GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); - ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } @@ -5086,10 +5034,11 @@ no_journal: if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %.*s%s%s", descr, + "Opts: %.*s%s%s. Quota mode: %s.", descr, (int) sizeof(sbi->s_es->s_mount_opts), sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data); + *sbi->s_es->s_mount_opts ? "; " : "", orig_data, + ext4_quota_mode(sb)); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -5154,6 +5103,7 @@ failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: del_timer_sync(&sbi->s_err_report); + flush_work(&sbi->s_error_work); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: @@ -5480,6 +5430,7 @@ err_out: static int ext4_commit_super(struct super_block *sb, int sync) { + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; @@ -5511,6 +5462,46 @@ static int ext4_commit_super(struct super_block *sb, int sync) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); + /* Copy error information to the on-disk superblock */ + spin_lock(&sbi->s_error_lock); + if (sbi->s_add_error_count > 0) { + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + if (!es->s_first_error_time && !es->s_first_error_time_hi) { + __ext4_update_tstamp(&es->s_first_error_time, + &es->s_first_error_time_hi, + sbi->s_first_error_time); + strncpy(es->s_first_error_func, sbi->s_first_error_func, + sizeof(es->s_first_error_func)); + es->s_first_error_line = + cpu_to_le32(sbi->s_first_error_line); + es->s_first_error_ino = + cpu_to_le32(sbi->s_first_error_ino); + es->s_first_error_block = + cpu_to_le64(sbi->s_first_error_block); + es->s_first_error_errcode = + ext4_errno_to_code(sbi->s_first_error_code); + } + __ext4_update_tstamp(&es->s_last_error_time, + &es->s_last_error_time_hi, + sbi->s_last_error_time); + strncpy(es->s_last_error_func, sbi->s_last_error_func, + sizeof(es->s_last_error_func)); + es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line); + es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino); + es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block); + es->s_last_error_errcode = + ext4_errno_to_code(sbi->s_last_error_code); + /* + * Start the daily error reporting function if it hasn't been + * started already + */ + if (!es->s_error_count) + mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); + le32_add_cpu(&es->s_error_count, sbi->s_add_error_count); + sbi->s_add_error_count = 0; + } + spin_unlock(&sbi->s_error_lock); + BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); if (sync) @@ -5864,6 +5855,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } + /* Flush outstanding errors before changing fs state */ + flush_work(&sbi->s_error_work); + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { err = -EROFS; @@ -6022,7 +6016,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) */ *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); - ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", + orig_data, ext4_quota_mode(sb)); kfree(orig_data); return 0; @@ -6201,11 +6196,8 @@ static int ext4_release_dquot(struct dquot *dquot) static int ext4_mark_dquot_dirty(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - /* Are we journaling quotas? */ - if (ext4_has_feature_quota(sb) || - sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + if (ext4_is_quota_journalled(sb)) { dquot_mark_dquot_dirty(dquot); return ext4_write_dquot(dquot); } else { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 6127e94ea4f5..4e3b1f8c2e81 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1927,7 +1927,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } else { /* Allocate a buffer where we construct the new block. */ s->base = kzalloc(sb->s_blocksize, GFP_NOFS); - /* assert(header == s->base) */ error = -ENOMEM; if (s->base == NULL) goto cleanup; diff --git a/fs/file.c b/fs/file.c index 8434e0afecc7..dab120b71e44 100644 --- a/fs/file.c +++ b/fs/file.c @@ -21,7 +21,6 @@ #include <linux/rcupdate.h> #include <linux/close_range.h> #include <net/sock.h> -#include <linux/io_uring.h> unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; @@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk) struct files_struct * files = tsk->files; if (files) { - io_uring_files_cancel(files); task_lock(tsk); tsk->files = NULL; task_unlock(tsk); @@ -694,8 +692,10 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) * If the requested range is greater than the current maximum, * we're closing everything so only copy all file descriptors * beneath the lowest file descriptor. + * If the caller requested all fds to be made cloexec copy all + * of the file descriptors since they still want to use them. */ - if (max_fd >= cur_max) + if (!(flags & CLOSE_RANGE_CLOEXEC) && (max_fd >= cur_max)) max_unshare_fds = fd; ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 35a6fd103761..d87a5bc3607b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -857,12 +857,6 @@ static void delete_work_func(struct work_struct *work) clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); - /* If someone's using this glock to create a new dinode, the block must - have been freed by another node, then re-used, in which case our - iopen callback is too late after the fact. Ignore it. */ - if (test_bit(GLF_INODE_CREATING, &gl->gl_flags)) - goto out; - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { /* * If we can evict the inode, give the remote node trying to @@ -2112,8 +2106,6 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'o'; if (test_bit(GLF_BLOCKING, gflags)) *p++ = 'b'; - if (test_bit(GLF_INODE_CREATING, gflags)) - *p++ = 'c'; if (test_bit(GLF_PENDING_DELETE, gflags)) *p++ = 'P'; if (test_bit(GLF_FREEING, gflags)) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index f8858d995b24..8e1ab8ed4abc 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -348,7 +348,6 @@ enum { GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, - GLF_INODE_CREATING = 16, /* Inode creation occurring */ GLF_PENDING_DELETE = 17, GLF_FREEING = 18, /* Wait for glock to be freed */ }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 65ae4fc28ede..c1b77e8d6b1c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -36,6 +36,10 @@ #include "super.h" #include "glops.h" +static const struct inode_operations gfs2_file_iops; +static const struct inode_operations gfs2_dir_iops; +static const struct inode_operations gfs2_symlink_iops; + static int iget_test(struct inode *inode, void *opaque) { u64 no_addr = *(u64 *)opaque; @@ -605,7 +609,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_glock *io_gl = NULL; + struct gfs2_glock *io_gl; int error, free_vfs_inode = 1; u32 aflags = 0; unsigned blocks = 1; @@ -746,8 +750,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, init_dinode(dip, ip, symname); gfs2_trans_end(sdp); - BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags)); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (error) goto fail_gunlock2; @@ -793,7 +795,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_glock_dq_uninit(ghs); gfs2_qa_put(ip); gfs2_glock_dq_uninit(ghs + 1); - clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); gfs2_qa_put(dip); return error; @@ -802,7 +803,6 @@ fail_gunlock3: glock_clear_object(io_gl, ip); gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: - clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); glock_clear_object(io_gl, ip); gfs2_glock_put(io_gl); fail_free_inode: @@ -2136,7 +2136,7 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time, return generic_update_time(inode, time, flags); } -const struct inode_operations gfs2_file_iops = { +static const struct inode_operations gfs2_file_iops = { .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, @@ -2147,7 +2147,7 @@ const struct inode_operations gfs2_file_iops = { .update_time = gfs2_update_time, }; -const struct inode_operations gfs2_dir_iops = { +static const struct inode_operations gfs2_dir_iops = { .create = gfs2_create, .lookup = gfs2_lookup, .link = gfs2_link, @@ -2168,7 +2168,7 @@ const struct inode_operations gfs2_dir_iops = { .atomic_open = gfs2_atomic_open, }; -const struct inode_operations gfs2_symlink_iops = { +static const struct inode_operations gfs2_symlink_iops = { .get_link = gfs2_get_link, .permission = gfs2_permission, .setattr = gfs2_setattr, diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index b52ecf4ffe63..8073b8d2c7fa 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -107,9 +107,6 @@ extern int gfs2_open_common(struct inode *inode, struct file *file); extern loff_t gfs2_seek_data(struct file *file, loff_t offset); extern loff_t gfs2_seek_hole(struct file *file, loff_t offset); -extern const struct inode_operations gfs2_file_iops; -extern const struct inode_operations gfs2_dir_iops; -extern const struct inode_operations gfs2_symlink_iops; extern const struct file_operations gfs2_file_fops_nolock; extern const struct file_operations gfs2_dir_fops_nolock; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b3d951ab8068..2f56acc41c04 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -353,7 +353,6 @@ int gfs2_statfs_sync(struct super_block *sb, int type) struct buffer_head *m_bh, *l_bh; int error; - sb_start_write(sb); error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &gh); if (error) @@ -392,7 +391,6 @@ out_bh: out_unlock: gfs2_glock_dq_uninit(&gh); out: - sb_end_write(sb); return error; } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 0fba3bf64189..a374397f4273 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -137,7 +137,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) gfs2_glock_dq(&sdp->sd_jinode_gh); if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { /* Make sure gfs2_unfreeze works if partially-frozen */ - flush_workqueue(gfs2_freeze_wq); + flush_work(&sdp->sd_freeze_work); atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); thaw_super(sdp->sd_vfs); } else { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index d7562981b3a0..a4443dd8a94b 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -151,7 +151,7 @@ extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, bool verbose); #define gfs2_io_error(sdp) \ -gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); +gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__) void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -159,10 +159,10 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, bool withdraw); #define gfs2_io_error_bh_wd(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true) #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false) extern struct kmem_cache *gfs2_glock_cachep; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index c070c0d8e3e9..aea35459d390 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -315,7 +315,7 @@ retry: if (mode & FMODE_WRITE) r = w = 1; - name = dentry_name(file->f_path.dentry); + name = dentry_name(d_real(file->f_path.dentry, file->f_inode)); if (name == NULL) return -ENOMEM; diff --git a/fs/inode.c b/fs/inode.c index cb008acf0efd..6442d97d9a4a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1624,7 +1624,9 @@ static void iput_final(struct inode *inode) else drop = generic_drop_inode(inode); - if (!drop && (sb->s_flags & SB_ACTIVE)) { + if (!drop && + !(inode->i_state & I_DONTCACHE) && + (sb->s_flags & SB_ACTIVE)) { inode_add_lru(inode); spin_unlock(&inode->i_lock); return; diff --git a/fs/io-wq.c b/fs/io-wq.c index f72d53848dcb..a564f36e260c 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -36,8 +36,7 @@ enum { enum { IO_WQ_BIT_EXIT = 0, /* wq exiting */ - IO_WQ_BIT_CANCEL = 1, /* cancel work on list */ - IO_WQ_BIT_ERROR = 2, /* error on setup */ + IO_WQ_BIT_ERROR = 1, /* error on setup */ }; enum { @@ -561,12 +560,6 @@ get_next: next_hashed = wq_next_work(work); io_impersonate_work(worker, work); - /* - * OK to set IO_WQ_WORK_CANCEL even for uncancellable - * work, the worker function will do the right thing. - */ - if (test_bit(IO_WQ_BIT_CANCEL, &wq->state)) - work->flags |= IO_WQ_WORK_CANCEL; old_work = work; linked = wq->do_work(work); @@ -732,12 +725,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) return acct->nr_workers < acct->max_workers; } -static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data) -{ - send_sig(SIGINT, worker->task, 1); - return false; -} - /* * Iterate the passed in list and call the specific function for each * worker that isn't exiting @@ -938,21 +925,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val) work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); } -void io_wq_cancel_all(struct io_wq *wq) -{ - int node; - - set_bit(IO_WQ_BIT_CANCEL, &wq->state); - - rcu_read_lock(); - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL); - } - rcu_read_unlock(); -} - struct io_cb_cancel_data { work_cancel_fn *fn; void *data; diff --git a/fs/io-wq.h b/fs/io-wq.h index 069496c6d4f9..b158f8addcf3 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -59,6 +59,7 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, list->last->next = node; list->last = node; } + node->next = NULL; } static inline void wq_list_cut(struct io_wq_work_list *list, @@ -128,8 +129,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work) return work->flags & IO_WQ_WORK_HASHED; } -void io_wq_cancel_all(struct io_wq *wq); - typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, diff --git a/fs/io_uring.c b/fs/io_uring.c index 6f9392c35eef..ca46f314640b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -992,6 +992,10 @@ enum io_mem_account { ACCT_PINNED, }; +static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); +static struct fixed_file_ref_node *alloc_fixed_file_ref_node( + struct io_ring_ctx *ctx); + static void __io_complete_rw(struct io_kiocb *req, long res, long res2, struct io_comp_state *cs); static void io_cqring_fill_event(struct io_kiocb *req, long res); @@ -1501,6 +1505,13 @@ static bool io_grab_identity(struct io_kiocb *req) spin_unlock_irq(&ctx->inflight_lock); req->work.flags |= IO_WQ_WORK_FILES; } + if (!(req->work.flags & IO_WQ_WORK_MM) && + (def->work_flags & IO_WQ_WORK_MM)) { + if (id->mm != current->mm) + return false; + mmgrab(id->mm); + req->work.flags |= IO_WQ_WORK_MM; + } return true; } @@ -1525,13 +1536,6 @@ static void io_prep_async_work(struct io_kiocb *req) req->work.flags |= IO_WQ_WORK_UNBOUND; } - /* ->mm can never change on us */ - if (!(req->work.flags & IO_WQ_WORK_MM) && - (def->work_flags & IO_WQ_WORK_MM)) { - mmgrab(id->mm); - req->work.flags |= IO_WQ_WORK_MM; - } - /* if we fail grabbing identity, we must COW, regrab, and retry */ if (io_grab_identity(req)) return; @@ -1693,6 +1697,11 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) return io_wq_current_is_worker(); } +static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx) +{ + return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); +} + static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { if (waitqueue_active(&ctx->wait)) @@ -1703,15 +1712,6 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) eventfd_signal(ctx->cq_ev_fd, 1); } -static void io_cqring_mark_overflow(struct io_ring_ctx *ctx) -{ - if (list_empty(&ctx->cq_overflow_list)) { - clear_bit(0, &ctx->sq_check_overflow); - clear_bit(0, &ctx->cq_check_overflow); - ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; - } -} - /* Returns true if there are no backlogged entries after the flush */ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -1721,23 +1721,13 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct io_kiocb *req, *tmp; struct io_uring_cqe *cqe; unsigned long flags; + bool all_flushed; LIST_HEAD(list); - if (!force) { - if (list_empty_careful(&ctx->cq_overflow_list)) - return true; - if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) == - rings->cq_ring_entries)) - return false; - } + if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries) + return false; spin_lock_irqsave(&ctx->completion_lock, flags); - - /* if force is set, the ring is going away. always drop after that */ - if (force) - ctx->cq_overflow_flushed = 1; - - cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (!io_match_task(req, tsk, files)) continue; @@ -1758,9 +1748,14 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, } } - io_commit_cqring(ctx); - io_cqring_mark_overflow(ctx); + all_flushed = list_empty(&ctx->cq_overflow_list); + if (all_flushed) { + clear_bit(0, &ctx->sq_check_overflow); + clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; + } + io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1770,7 +1765,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, io_put_req(req); } - return cqe != NULL; + return all_flushed; } static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) @@ -2320,8 +2315,6 @@ static void io_double_put_req(struct io_kiocb *req) static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) { - struct io_rings *rings = ctx->rings; - if (test_bit(0, &ctx->cq_check_overflow)) { /* * noflush == true is from the waitqueue handler, just ensure @@ -2336,7 +2329,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) /* See comment at the top of this file */ smp_rmb(); - return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); + return __io_cqring_events(ctx); } static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) @@ -3136,9 +3129,7 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, iov[0].iov_len = kbuf->len; return 0; } - if (!req->rw.len) - return 0; - else if (req->rw.len > 1) + if (req->rw.len != 1) return -EINVAL; #ifdef CONFIG_COMPAT @@ -3784,6 +3775,8 @@ static int io_shutdown(struct io_kiocb *req, bool force_nonblock) return -ENOTSOCK; ret = __sys_shutdown_sock(sock, req->shutdown.how); + if (ret < 0) + req_set_fail_links(req); io_req_complete(req, ret); return 0; #else @@ -6107,15 +6100,15 @@ static void io_req_drop_files(struct io_kiocb *req) struct io_uring_task *tctx = req->task->io_uring; unsigned long flags; + put_files_struct(req->work.identity->files); + put_nsproxy(req->work.identity->nsproxy); spin_lock_irqsave(&ctx->inflight_lock, flags); list_del(&req->inflight_entry); - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; - put_files_struct(req->work.identity->files); - put_nsproxy(req->work.identity->nsproxy); req->work.flags &= ~IO_WQ_WORK_FILES; + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); } static void __io_clean_op(struct io_kiocb *req) @@ -6343,19 +6336,28 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) } if (ret) { + struct io_ring_ctx *lock_ctx = NULL; + + if (req->ctx->flags & IORING_SETUP_IOPOLL) + lock_ctx = req->ctx; + /* - * io_iopoll_complete() does not hold completion_lock to complete - * polled io, so here for polled io, just mark it done and still let - * io_iopoll_complete() complete it. + * io_iopoll_complete() does not hold completion_lock to + * complete polled io, so here for polled io, we can not call + * io_req_complete() directly, otherwise there maybe concurrent + * access to cqring, defer_list, etc, which is not safe. Given + * that io_iopoll_complete() is always called under uring_lock, + * so here for polled io, we also get uring_lock to complete + * it. */ - if (req->ctx->flags & IORING_SETUP_IOPOLL) { - struct kiocb *kiocb = &req->rw.kiocb; + if (lock_ctx) + mutex_lock(&lock_ctx->uring_lock); - kiocb_done(kiocb, ret, NULL); - } else { - req_set_fail_links(req); - io_req_complete(req, ret); - } + req_set_fail_links(req); + io_req_complete(req, ret); + + if (lock_ctx) + mutex_unlock(&lock_ctx->uring_lock); } return io_steal_work(req); @@ -6824,8 +6826,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { - if (!list_empty(&ctx->cq_overflow_list) && - !io_cqring_overflow_flush(ctx, false, NULL, NULL)) + if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) return -EBUSY; } @@ -7234,14 +7235,28 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } +static void io_sqe_files_set_node(struct fixed_file_data *file_data, + struct fixed_file_ref_node *ref_node) +{ + spin_lock_bh(&file_data->lock); + file_data->node = ref_node; + list_add_tail(&ref_node->node, &file_data->ref_list); + spin_unlock_bh(&file_data->lock); + percpu_ref_get(&file_data->refs); +} + static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; - struct fixed_file_ref_node *ref_node = NULL; + struct fixed_file_ref_node *backup_node, *ref_node = NULL; unsigned nr_tables, i; + int ret; if (!data) return -ENXIO; + backup_node = alloc_fixed_file_ref_node(ctx); + if (!backup_node) + return -ENOMEM; spin_lock_bh(&data->lock); ref_node = data->node; @@ -7253,7 +7268,18 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) /* wait for all refs nodes to complete */ flush_delayed_work(&ctx->file_put_work); - wait_for_completion(&data->done); + do { + ret = wait_for_completion_interruptible(&data->done); + if (!ret) + break; + ret = io_run_task_work_sig(); + if (ret < 0) { + percpu_ref_resurrect(&data->refs); + reinit_completion(&data->done); + io_sqe_files_set_node(data, backup_node); + return ret; + } + } while (1); __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); @@ -7264,6 +7290,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) kfree(data); ctx->file_data = NULL; ctx->nr_user_files = 0; + destroy_fixed_file_ref_node(backup_node); return 0; } @@ -7761,11 +7788,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return PTR_ERR(ref_node); } - file_data->node = ref_node; - spin_lock_bh(&file_data->lock); - list_add_tail(&ref_node->node, &file_data->ref_list); - spin_unlock_bh(&file_data->lock); - percpu_ref_get(&file_data->refs); + io_sqe_files_set_node(file_data, ref_node); return ret; out_fput: for (i = 0; i < ctx->nr_user_files; i++) { @@ -7921,11 +7944,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); - spin_lock_bh(&data->lock); - list_add_tail(&ref_node->node, &data->ref_list); - data->node = ref_node; - spin_unlock_bh(&data->lock); - percpu_ref_get(&ctx->file_data->refs); + io_sqe_files_set_node(data, ref_node); } else destroy_fixed_file_ref_node(ref_node); @@ -8155,10 +8174,13 @@ static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, __io_unaccount_mem(ctx->user, nr_pages); if (ctx->mm_account) { - if (acct == ACCT_LOCKED) + if (acct == ACCT_LOCKED) { + mmap_write_lock(ctx->mm_account); ctx->mm_account->locked_vm -= nr_pages; - else if (acct == ACCT_PINNED) + mmap_write_unlock(ctx->mm_account); + }else if (acct == ACCT_PINNED) { atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); + } } } @@ -8174,10 +8196,13 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, } if (ctx->mm_account) { - if (acct == ACCT_LOCKED) + if (acct == ACCT_LOCKED) { + mmap_write_lock(ctx->mm_account); ctx->mm_account->locked_vm += nr_pages; - else if (acct == ACCT_PINNED) + mmap_write_unlock(ctx->mm_account); + } else if (acct == ACCT_PINNED) { atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); + } } return 0; @@ -8643,10 +8668,19 @@ static void io_ring_exit_work(struct work_struct *work) io_ring_ctx_free(ctx); } +static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + return req->ctx == data; +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + /* if force is set, the ring is going away. always drop after that */ + ctx->cq_overflow_flushed = 1; if (ctx->rings) io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); @@ -8655,7 +8689,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_poll_remove_all(ctx, NULL, NULL); if (ctx->io_wq) - io_wq_cancel_all(ctx->io_wq); + io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true); /* if we failed setting up the ctx, we might not have any rings */ io_iopoll_try_reap_events(ctx); @@ -8798,9 +8832,9 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, ret |= io_poll_remove_all(ctx, task, NULL); ret |= io_kill_timeouts(ctx, task, NULL); + ret |= io_run_task_work(); if (!ret) break; - io_run_task_work(); cond_resched(); } } @@ -8849,10 +8883,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) { struct io_uring_task *tctx = current->io_uring; + int ret; if (unlikely(!tctx)) { - int ret; - ret = io_uring_alloc_task_context(current); if (unlikely(ret)) return ret; @@ -8863,7 +8896,12 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) if (!old) { get_file(file); - xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL); + ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, + file, GFP_KERNEL)); + if (ret) { + fput(file); + return ret; + } } tctx->last = file; } @@ -8986,9 +9024,9 @@ void __io_uring_task_cancel(void) if (inflight != tctx_inflight(tctx)) continue; schedule(); + finish_wait(&tctx->wait, &wait); } while (1); - finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); } @@ -9156,10 +9194,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); - if (!list_empty_careful(&ctx->cq_overflow_list)) + if (!list_empty_careful(&ctx->cq_overflow_list)) { + bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL; + + io_ring_submit_lock(ctx, needs_lock); io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); + io_ring_submit_unlock(ctx, needs_lock); + } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) @@ -9369,55 +9410,52 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, return 0; } +static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) +{ + int ret, fd; + + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) + return fd; + + ret = io_uring_add_task_file(ctx, file); + if (ret) { + put_unused_fd(fd); + return ret; + } + fd_install(fd, file); + return fd; +} + /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, * we have to tie this fd to a socket for file garbage collection purposes. */ -static int io_uring_get_fd(struct io_ring_ctx *ctx) +static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { struct file *file; +#if defined(CONFIG_UNIX) int ret; - int fd; -#if defined(CONFIG_UNIX) ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, &ctx->ring_sock); if (ret) - return ret; + return ERR_PTR(ret); #endif - ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (ret < 0) - goto err; - fd = ret; - file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC); +#if defined(CONFIG_UNIX) if (IS_ERR(file)) { - put_unused_fd(fd); - ret = PTR_ERR(file); - goto err; + sock_release(ctx->ring_sock); + ctx->ring_sock = NULL; + } else { + ctx->ring_sock->file = file; } - -#if defined(CONFIG_UNIX) - ctx->ring_sock->file = file; #endif - ret = io_uring_add_task_file(ctx, file); - if (ret) { - fput(file); - put_unused_fd(fd); - goto err; - } - fd_install(fd, file); - return fd; -err: -#if defined(CONFIG_UNIX) - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; -#endif - return ret; + return file; } static int io_uring_create(unsigned entries, struct io_uring_params *p, @@ -9425,6 +9463,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, { struct user_struct *user = NULL; struct io_ring_ctx *ctx; + struct file *file; bool limit_mem; int ret; @@ -9572,13 +9611,22 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, goto err; } + file = io_uring_get_file(ctx); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err; + } + /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup */ - ret = io_uring_get_fd(ctx); - if (ret < 0) - goto err; + ret = io_uring_install_fd(ctx, file); + if (ret < 0) { + /* fput will clean it up */ + fput(file); + return ret; + } trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 188f79d76988..2dc944442802 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1869,9 +1869,7 @@ static int load_superblock(journal_t *journal) if (jbd2_has_feature_fast_commit(journal)) { journal->j_fc_last = be32_to_cpu(sb->s_maxlen); - num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks); - if (!num_fc_blocks) - num_fc_blocks = JBD2_MIN_FC_BLOCKS; + num_fc_blocks = jbd2_journal_get_num_fc_blks(sb); if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS) journal->j_last = journal->j_fc_last - num_fc_blocks; journal->j_fc_first = journal->j_last + 1; @@ -2102,9 +2100,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal) journal_superblock_t *sb = journal->j_superblock; unsigned long long num_fc_blks; - num_fc_blks = be32_to_cpu(sb->s_num_fc_blks); - if (num_fc_blks == 0) - num_fc_blks = JBD2_MIN_FC_BLOCKS; + num_fc_blks = jbd2_journal_get_num_fc_blks(sb); if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS) return -ENOSPC; diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 4fd9be4cbc98..40e203b6e5c1 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -13,6 +13,7 @@ #ifndef _JFFS2_DEBUG_H_ #define _JFFS2_DEBUG_H_ +#include <linux/printk.h> #include <linux/sched.h> #ifndef CONFIG_JFFS2_FS_DEBUG @@ -99,73 +100,73 @@ do { \ #ifdef JFFS2_DBG_READINODE_MESSAGES #define dbg_readinode(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_readinode(fmt, ...) +#define dbg_readinode(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #ifdef JFFS2_DBG_READINODE2_MESSAGES #define dbg_readinode2(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_readinode2(fmt, ...) +#define dbg_readinode2(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Fragtree build debugging messages */ #ifdef JFFS2_DBG_FRAGTREE_MESSAGES #define dbg_fragtree(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_fragtree(fmt, ...) +#define dbg_fragtree(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #ifdef JFFS2_DBG_FRAGTREE2_MESSAGES #define dbg_fragtree2(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_fragtree2(fmt, ...) +#define dbg_fragtree2(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Directory entry list manilulation debugging messages */ #ifdef JFFS2_DBG_DENTLIST_MESSAGES #define dbg_dentlist(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_dentlist(fmt, ...) +#define dbg_dentlist(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Print the messages about manipulating node_refs */ #ifdef JFFS2_DBG_NODEREF_MESSAGES #define dbg_noderef(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_noderef(fmt, ...) +#define dbg_noderef(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Manipulations with the list of inodes (JFFS2 inocache) */ #ifdef JFFS2_DBG_INOCACHE_MESSAGES #define dbg_inocache(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_inocache(fmt, ...) +#define dbg_inocache(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Summary debugging messages */ #ifdef JFFS2_DBG_SUMMARY_MESSAGES #define dbg_summary(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_summary(fmt, ...) +#define dbg_summary(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* File system build messages */ #ifdef JFFS2_DBG_FSBUILD_MESSAGES #define dbg_fsbuild(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_fsbuild(fmt, ...) +#define dbg_fsbuild(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Watch the object allocations */ #ifdef JFFS2_DBG_MEMALLOC_MESSAGES #define dbg_memalloc(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_memalloc(fmt, ...) +#define dbg_memalloc(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Watch the XATTR subsystem */ #ifdef JFFS2_DBG_XATTR_MESSAGES #define dbg_xattr(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_xattr(fmt, ...) +#define dbg_xattr(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* "Sanity" checks */ diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 778275f48a87..5a7091746f68 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -38,6 +38,7 @@ struct jffs2_mount_opts { * users. This is implemented simply by means of not allowing the * latter users to write to the file system if the amount if the * available space is less then 'rp_size'. */ + bool set_rp_size; unsigned int rp_size; }; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index 8ff4d1a1e774..2e98fa277dab 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -349,14 +349,14 @@ static inline struct jffs2_node_frag *frag_last(struct rb_root *root) #define frag_parent(frag) rb_entry(rb_parent(&(frag)->rb), struct jffs2_node_frag, rb) #define frag_left(frag) rb_entry((frag)->rb.rb_left, struct jffs2_node_frag, rb) #define frag_right(frag) rb_entry((frag)->rb.rb_right, struct jffs2_node_frag, rb) -#define frag_erase(frag, list) rb_erase(&frag->rb, list); +#define frag_erase(frag, list) rb_erase(&frag->rb, list) #define tn_next(tn) rb_entry(rb_next(&(tn)->rb), struct jffs2_tmp_dnode_info, rb) #define tn_prev(tn) rb_entry(rb_prev(&(tn)->rb), struct jffs2_tmp_dnode_info, rb) #define tn_parent(tn) rb_entry(rb_parent(&(tn)->rb), struct jffs2_tmp_dnode_info, rb) #define tn_left(tn) rb_entry((tn)->rb.rb_left, struct jffs2_tmp_dnode_info, rb) #define tn_right(tn) rb_entry((tn)->rb.rb_right, struct jffs2_tmp_dnode_info, rb) -#define tn_erase(tn, list) rb_erase(&tn->rb, list); +#define tn_erase(tn, list) rb_erase(&tn->rb, list) #define tn_last(list) rb_entry(rb_last(list), struct jffs2_tmp_dnode_info, rb) #define tn_first(list) rb_entry(rb_first(list), struct jffs2_tmp_dnode_info, rb) diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 2f6f0b140c05..03b4f99614be 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r jffs2_free_full_dirent(fd); return -EIO; } + +#ifdef CONFIG_JFFS2_SUMMARY + /* + * we use CONFIG_JFFS2_SUMMARY because without it, we + * have checked it while mounting + */ + crc = crc32(0, fd->name, rd->nsize); + if (unlikely(crc != je32_to_cpu(rd->name_crc))) { + JFFS2_NOTICE("name CRC failed on dirent node at" + "%#08x: read %#08x,calculated %#08x\n", + ref_offset(ref), je32_to_cpu(rd->node_crc), crc); + jffs2_mark_node_obsolete(c, ref); + jffs2_free_full_dirent(fd); + return 0; + } +#endif } fd->nhash = full_name_hash(NULL, fd->name, rd->nsize); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 05d7878dfad1..81ca58c10b72 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -88,7 +88,7 @@ static int jffs2_show_options(struct seq_file *s, struct dentry *root) if (opts->override_compr) seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr)); - if (opts->rp_size) + if (opts->set_rp_size) seq_printf(s, ",rp_size=%u", opts->rp_size / 1024); return 0; @@ -202,11 +202,8 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_rp_size: if (result.uint_32 > UINT_MAX / 1024) return invalf(fc, "jffs2: rp_size unrepresentable"); - opt = result.uint_32 * 1024; - if (opt > c->mtd->size) - return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB", - c->mtd->size / 1024); - c->mount_opts.rp_size = opt; + c->mount_opts.rp_size = result.uint_32 * 1024; + c->mount_opts.set_rp_size = true; break; default: return -EINVAL; @@ -215,11 +212,30 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } +static inline void jffs2_update_mount_opts(struct fs_context *fc) +{ + struct jffs2_sb_info *new_c = fc->s_fs_info; + struct jffs2_sb_info *c = JFFS2_SB_INFO(fc->root->d_sb); + + mutex_lock(&c->alloc_sem); + if (new_c->mount_opts.override_compr) { + c->mount_opts.override_compr = new_c->mount_opts.override_compr; + c->mount_opts.compr = new_c->mount_opts.compr; + } + if (new_c->mount_opts.set_rp_size) { + c->mount_opts.set_rp_size = new_c->mount_opts.set_rp_size; + c->mount_opts.rp_size = new_c->mount_opts.rp_size; + } + mutex_unlock(&c->alloc_sem); +} + static int jffs2_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; sync_filesystem(sb); + jffs2_update_mount_opts(fc); + return jffs2_do_remount_fs(sb, fc); } @@ -249,6 +265,10 @@ static int jffs2_fill_super(struct super_block *sb, struct fs_context *fc) c->mtd = sb->s_mtd; c->os_priv = sb; + if (c->mount_opts.rp_size > c->mtd->size) + return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB", + c->mtd->size / 1024); + /* Initialize JFFS2 superblock locks, the further initialization will * be done later */ mutex_init(&c->alloc_sem); diff --git a/fs/namei.c b/fs/namei.c index 03d0e11e4f36..78443a85480a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2114,8 +2114,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) return PTR_ERR(name); while (*name=='/') name++; - if (!*name) + if (!*name) { + nd->dir_mode = 0; // short-circuit the 'hardening' idiocy return 0; + } /* At this point we know we have a real path component. */ for(;;) { diff --git a/fs/namespace.c b/fs/namespace.c index 2b681f65ca04..d2db7dfe232b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -156,10 +156,10 @@ static inline void mnt_add_count(struct mount *mnt, int n) /* * vfsmount lock must be held for write */ -unsigned int mnt_get_count(struct mount *mnt) +int mnt_get_count(struct mount *mnt) { #ifdef CONFIG_SMP - unsigned int count = 0; + int count = 0; int cpu; for_each_possible_cpu(cpu) { @@ -1139,6 +1139,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { LIST_HEAD(list); + int count; rcu_read_lock(); if (likely(READ_ONCE(mnt->mnt_ns))) { @@ -1162,7 +1163,9 @@ static void mntput_no_expire(struct mount *mnt) */ smp_mb(); mnt_add_count(mnt, -1); - if (mnt_get_count(mnt)) { + count = mnt_get_count(mnt); + if (count != 0) { + WARN_ON(count < 0); rcu_read_unlock(); unlock_mount_hash(); return; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index af375e049aae..ec8ae4257975 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -663,6 +663,8 @@ const struct file_operations orangefs_file_operations = { .unlocked_ioctl = orangefs_ioctl, .mmap = orangefs_file_mmap, .open = generic_file_open, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .flush = orangefs_flush, .release = orangefs_file_release, .fsync = orangefs_fsync, diff --git a/fs/pnode.h b/fs/pnode.h index 49a058c73e4c..26f74e092bd9 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -44,7 +44,7 @@ int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -unsigned int mnt_get_count(struct mount *mnt); +int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index e59d4bb3a89e..eafb75755fa3 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -320,7 +320,8 @@ static int mountstats_open(struct inode *inode, struct file *file) const struct file_operations proc_mounts_operations = { .open = mounts_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -328,7 +329,8 @@ const struct file_operations proc_mounts_operations = { const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -336,7 +338,8 @@ const struct file_operations proc_mountinfo_operations = { const struct file_operations proc_mountstats_operations = { .open = mountstats_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, }; diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index 0886d835f597..51a7c8c2c3f0 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -337,8 +337,10 @@ int ubifs_init_authentication(struct ubifs_info *c) c->authenticated = true; c->log_hash = ubifs_hash_get_desc(c); - if (IS_ERR(c->log_hash)) + if (IS_ERR(c->log_hash)) { + err = PTR_ERR(c->log_hash); goto out_free_hmac; + } err = 0; diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index b5cdac9b0368..c4fc1047fc07 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -701,13 +701,13 @@ out: out_dump: ubifs_err(c, "dumping index node (iip=%d)", i->iip); - ubifs_dump_node(c, idx); + ubifs_dump_node(c, idx, ubifs_idx_node_sz(c, c->fanout)); list_del(&i->list); kfree(i); if (!list_empty(&list)) { i = list_entry(list.prev, struct idx_node, list); ubifs_err(c, "dumping parent index node"); - ubifs_dump_node(c, &i->idx); + ubifs_dump_node(c, &i->idx, ubifs_idx_node_sz(c, c->fanout)); } out_free: while (!list_empty(&list)) { diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ebff43f8009c..1bbb9fe661b1 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -291,9 +291,9 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) kfree(pdent); } -void ubifs_dump_node(const struct ubifs_info *c, const void *node) +void ubifs_dump_node(const struct ubifs_info *c, const void *node, int node_len) { - int i, n; + int i, n, type, safe_len, max_node_len, min_node_len; union ubifs_key key; const struct ubifs_ch *ch = node; char key_buf[DBG_KEY_BUF_LEN]; @@ -306,10 +306,40 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) return; } + /* Skip dumping unknown type node */ + type = ch->node_type; + if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { + pr_err("node type %d was not recognized\n", type); + return; + } + spin_lock(&dbg_lock); dump_ch(node); - switch (ch->node_type) { + if (c->ranges[type].max_len == 0) { + max_node_len = min_node_len = c->ranges[type].len; + } else { + max_node_len = c->ranges[type].max_len; + min_node_len = c->ranges[type].min_len; + } + safe_len = le32_to_cpu(ch->len); + safe_len = safe_len > 0 ? safe_len : 0; + safe_len = min3(safe_len, max_node_len, node_len); + if (safe_len < min_node_len) { + pr_err("node len(%d) is too short for %s, left %d bytes:\n", + safe_len, dbg_ntype(type), + safe_len > UBIFS_CH_SZ ? + safe_len - (int)UBIFS_CH_SZ : 0); + if (safe_len > UBIFS_CH_SZ) + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node + UBIFS_CH_SZ, + safe_len - UBIFS_CH_SZ, 0); + goto out_unlock; + } + if (safe_len != le32_to_cpu(ch->len)) + pr_err("\ttruncated node length %d\n", safe_len); + + switch (type) { case UBIFS_PAD_NODE: { const struct ubifs_pad_node *pad = node; @@ -453,7 +483,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("\tnlen %d\n", nlen); pr_err("\tname "); - if (nlen > UBIFS_MAX_NLEN) + if (nlen > UBIFS_MAX_NLEN || + nlen > safe_len - UBIFS_DENT_NODE_SZ) pr_err("(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) @@ -467,7 +498,6 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) case UBIFS_DATA_NODE: { const struct ubifs_data_node *dn = node; - int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; key_read(c, &dn->key, &key); pr_err("\tkey %s\n", @@ -475,10 +505,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("\tsize %u\n", le32_to_cpu(dn->size)); pr_err("\tcompr_typ %d\n", (int)le16_to_cpu(dn->compr_type)); - pr_err("\tdata size %d\n", dlen); - pr_err("\tdata:\n"); + pr_err("\tdata size %u\n", + le32_to_cpu(ch->len) - (unsigned int)UBIFS_DATA_NODE_SZ); + pr_err("\tdata (length = %d):\n", + safe_len - (int)UBIFS_DATA_NODE_SZ); print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, - (void *)&dn->data, dlen, 0); + (void *)&dn->data, + safe_len - (int)UBIFS_DATA_NODE_SZ, 0); break; } case UBIFS_TRUN_NODE: @@ -495,13 +528,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) case UBIFS_IDX_NODE: { const struct ubifs_idx_node *idx = node; + int max_child_cnt = (safe_len - UBIFS_IDX_NODE_SZ) / + (ubifs_idx_node_sz(c, 1) - + UBIFS_IDX_NODE_SZ); - n = le16_to_cpu(idx->child_cnt); - pr_err("\tchild_cnt %d\n", n); + n = min_t(int, le16_to_cpu(idx->child_cnt), max_child_cnt); + pr_err("\tchild_cnt %d\n", (int)le16_to_cpu(idx->child_cnt)); pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level)); pr_err("\tBranches:\n"); - for (i = 0; i < n && i < c->fanout - 1; i++) { + for (i = 0; i < n && i < c->fanout; i++) { const struct ubifs_branch *br; br = ubifs_idx_branch(c, idx, i); @@ -525,7 +561,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) le64_to_cpu(orph->cmt_no) & LLONG_MAX); pr_err("\tlast node flag %llu\n", (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); - n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; + n = (safe_len - UBIFS_ORPH_NODE_SZ) >> 3; pr_err("\t%d orphan inode numbers:\n", n); for (i = 0; i < n; i++) pr_err("\t ino %llu\n", @@ -537,9 +573,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) break; } default: - pr_err("node type %d was not recognized\n", - (int)ch->node_type); + pr_err("node type %d was not recognized\n", type); } + +out_unlock: spin_unlock(&dbg_lock); } @@ -764,7 +801,7 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) pr_err("\tnnode_sz: %d\n", c->nnode_sz); pr_err("\tltab_sz: %d\n", c->ltab_sz); pr_err("\tlsave_sz: %d\n", c->lsave_sz); - pr_err("\tbig_lpt: %d\n", c->big_lpt); + pr_err("\tbig_lpt: %u\n", c->big_lpt); pr_err("\tlpt_hght: %d\n", c->lpt_hght); pr_err("\tpnode_cnt: %d\n", c->pnode_cnt); pr_err("\tnnode_cnt: %d\n", c->nnode_cnt); @@ -791,22 +828,6 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) spin_unlock(&dbg_lock); } -void ubifs_dump_sleb(const struct ubifs_info *c, - const struct ubifs_scan_leb *sleb, int offs) -{ - struct ubifs_scan_node *snod; - - pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n", - current->pid, sleb->lnum, offs); - - list_for_each_entry(snod, &sleb->nodes, list) { - cond_resched(); - pr_err("Dumping node at LEB %d:%d len %d\n", - sleb->lnum, snod->offs, snod->len); - ubifs_dump_node(c, snod->node); - } -} - void ubifs_dump_leb(const struct ubifs_info *c, int lnum) { struct ubifs_scan_leb *sleb; @@ -834,7 +855,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) cond_resched(); pr_err("Dumping node at LEB %d:%d len %d\n", lnum, snod->offs, snod->len); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); } pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); @@ -1012,7 +1033,7 @@ void dbg_save_space_info(struct ubifs_info *c) * * This function compares current flash space information with the information * which was saved when the 'dbg_save_space_info()' function was called. - * Returns zero if the information has not changed, and %-EINVAL it it has + * Returns zero if the information has not changed, and %-EINVAL if it has * changed. */ int dbg_check_space_info(struct ubifs_info *c) @@ -1212,7 +1233,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr1->key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_dump_node(c, dent1); + ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ); goto out_free; } @@ -1224,7 +1245,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr2->key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_dump_node(c, dent2); + ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ); goto out_free; } @@ -1243,9 +1264,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs); - ubifs_dump_node(c, dent1); + ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ); ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs); - ubifs_dump_node(c, dent2); + ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ); out_free: kfree(dent2); @@ -2110,7 +2131,7 @@ out: out_dump: ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); out_free: kfree(node); return err; @@ -2243,7 +2264,7 @@ out_dump: ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d", (unsigned long)fscki->inum, zbr->lnum, zbr->offs); - ubifs_dump_node(c, ino); + ubifs_dump_node(c, ino, zbr->len); kfree(ino); return -EINVAL; } @@ -2314,12 +2335,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_DATA_NODE) { ubifs_err(c, "bad node type %d", sa->type); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); return -EINVAL; } if (sb->type != UBIFS_DATA_NODE) { ubifs_err(c, "bad node type %d", sb->type); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2350,8 +2371,8 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) return 0; error_dump: - ubifs_dump_node(c, sa->node); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2382,13 +2403,13 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && sa->type != UBIFS_XENT_NODE) { ubifs_err(c, "bad node type %d", sa->type); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); return -EINVAL; } if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE && sb->type != UBIFS_XENT_NODE) { ubifs_err(c, "bad node type %d", sb->type); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2438,11 +2459,10 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) error_dump: ubifs_msg(c, "dumping first node"); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); ubifs_msg(c, "dumping second node"); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; - return 0; } static inline int chance(unsigned int n, unsigned int out_of) diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 7763639a426b..ed966108da80 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -242,7 +242,8 @@ const char *dbg_get_key_dump(const struct ubifs_info *c, const char *dbg_snprintf_key(const struct ubifs_info *c, const union ubifs_key *key, char *buffer, int len); void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode); -void ubifs_dump_node(const struct ubifs_info *c, const void *node); +void ubifs_dump_node(const struct ubifs_info *c, const void *node, + int node_len); void ubifs_dump_budget_req(const struct ubifs_budget_req *req); void ubifs_dump_lstats(const struct ubifs_lp_stats *lst); void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); @@ -251,8 +252,6 @@ void ubifs_dump_lprop(const struct ubifs_info *c, void ubifs_dump_lprops(struct ubifs_info *c); void ubifs_dump_lpt_info(struct ubifs_info *c); void ubifs_dump_leb(const struct ubifs_info *c, int lnum); -void ubifs_dump_sleb(const struct ubifs_info *c, - const struct ubifs_scan_leb *sleb, int offs); void ubifs_dump_znode(const struct ubifs_info *c, const struct ubifs_znode *znode); void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7949d7c9aa8c..9a6b8660425a 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -844,7 +844,7 @@ out_fname: * * This function checks if directory @dir is empty. Returns zero if the * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes - * in case of of errors. + * in case of errors. */ int ubifs_check_dir_empty(struct inode *dir) { @@ -1632,9 +1632,7 @@ const struct inode_operations ubifs_dir_inode_operations = { .rename = ubifs_rename, .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, .tmpfile = ubifs_tmpfile, }; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b77d1637bbbc..2bc7780d2963 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -92,7 +92,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, dump: ubifs_err(c, "bad data node (block %u, inode %lu)", block, inode->i_ino); - ubifs_dump_node(c, dn); + ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ); return -EINVAL; } @@ -205,7 +205,7 @@ static void release_new_page_budget(struct ubifs_info *c) * @c: UBIFS file-system description object * * This is a helper function which releases budget corresponding to the budget - * of changing one one page of data which already exists on the flash media. + * of changing one page of data which already exists on the flash media. */ static void release_existing_page_budget(struct ubifs_info *c) { @@ -1645,9 +1645,7 @@ const struct address_space_operations ubifs_file_address_operations = { const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, }; @@ -1655,9 +1653,7 @@ const struct inode_operations ubifs_symlink_inode_operations = { .get_link = ubifs_get_link, .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, }; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 7e4bfaf2871f..00b61dba62b7 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -198,6 +198,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum) * ubifs_check_node - check node. * @c: UBIFS file-system description object * @buf: node to check + * @len: node length * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * @quiet: print no messages @@ -222,10 +223,10 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum) * This function returns zero in case of success and %-EUCLEAN in case of bad * CRC or magic. */ -int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int must_chk_crc) +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, + int lnum, int offs, int quiet, int must_chk_crc) { - int err = -EINVAL, type, node_len, dump_node = 1; + int err = -EINVAL, type, node_len; uint32_t crc, node_crc, magic; const struct ubifs_ch *ch = buf; @@ -278,22 +279,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, out_len: if (!quiet) ubifs_err(c, "bad node length %d", node_len); - if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ) - dump_node = 0; out: if (!quiet) { ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); - if (dump_node) { - ubifs_dump_node(c, buf); - } else { - int safe_len = min3(node_len, c->leb_size - offs, - (int)UBIFS_MAX_DATA_NODE_SZ); - pr_err("\tprevent out-of-bounds memory access\n"); - pr_err("\ttruncated data node length %d\n", safe_len); - pr_err("\tcorrupted data node:\n"); - print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, - buf, safe_len, 0); - } + ubifs_dump_node(c, buf, len); dump_stack(); } return err; @@ -319,7 +308,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) { uint32_t crc; - ubifs_assert(c, pad >= 0 && !(pad & 7)); + ubifs_assert(c, pad >= 0); if (pad >= UBIFS_PAD_NODE_SZ) { struct ubifs_ch *ch = buf; @@ -730,7 +719,7 @@ out_timers: int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) { struct ubifs_info *c = wbuf->c; - int err, written, n, aligned_len = ALIGN(len, 8); + int err, n, written = 0, aligned_len = ALIGN(len, 8); dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, dbg_ntype(((struct ubifs_ch *)buf)->node_type), @@ -764,6 +753,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * write-buffer. */ memcpy(wbuf->buf + wbuf->used, buf, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len); + } if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", @@ -793,8 +786,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) goto exit; } - written = 0; - if (wbuf->used) { /* * The node is large enough and does not fit entirely within @@ -856,13 +847,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) } spin_lock(&wbuf->lock); - if (aligned_len) + if (aligned_len) { /* * And now we have what's left and what does not take whole * max. write unit, so write it to the write-buffer and we are * done. */ memcpy(wbuf->buf, buf + written, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + len, aligned_len - len); + } + } if (c->leb_size - wbuf->offs >= c->max_write_size) wbuf->size = c->max_write_size; @@ -890,7 +886,7 @@ exit: out: ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d", len, wbuf->lnum, wbuf->offs, err); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, written + len); dump_stack(); ubifs_dump_leb(c, wbuf->lnum); return err; @@ -933,7 +929,7 @@ int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, err = ubifs_leb_write(c, lnum, buf, offs, buf_len); if (err) - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); return err; } @@ -1016,7 +1012,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, goto out; } - err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); if (err) { ubifs_err(c, "expected node type %d", type); return err; @@ -1032,7 +1028,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, out: ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); dump_stack(); return -EINVAL; } @@ -1046,7 +1042,7 @@ out: * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * - * This function reads a node of known type and and length, checks it and + * This function reads a node of known type and length, checks it and * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched * and a negative error code in case of failure. */ @@ -1072,7 +1068,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, goto out; } - err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); if (err) { ubifs_errc(c, "expected node type %d", type); return err; @@ -1090,7 +1086,7 @@ out: ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, ubi_is_mapped(c->ubi, lnum)); if (!c->probing) { - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); dump_stack(); } return -EINVAL; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 091c2ad8f211..03410ae0813a 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1559,7 +1559,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) { ubifs_err(c, "bad data node (block %u, inode %lu)", blk, inode->i_ino); - ubifs_dump_node(c, dn); + ubifs_dump_node(c, dn, sz - UBIFS_INO_NODE_SZ - + UBIFS_TRUN_NODE_SZ); goto out_free; } diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 6e0a153b7194..778a22bf9a92 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -851,7 +851,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, dbg_lp("lsave_sz %d", c->lsave_sz); dbg_lp("lsave_cnt %d", c->lsave_cnt); dbg_lp("lpt_hght %d", c->lpt_hght); - dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("big_lpt %u", c->big_lpt); dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); @@ -1824,7 +1824,7 @@ static int lpt_init_rd(struct ubifs_info *c) dbg_lp("lsave_sz %d", c->lsave_sz); dbg_lp("lsave_cnt %d", c->lsave_cnt); dbg_lp("lpt_hght %d", c->lpt_hght); - dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("big_lpt %u", c->big_lpt); dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 911d0555b9f2..0df9a3dd0aaa 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -314,7 +314,7 @@ static int validate_master(const struct ubifs_info *c) out: ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err); - ubifs_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node, c->mst_node_alsz); return -EINVAL; } @@ -392,7 +392,7 @@ int ubifs_read_master(struct ubifs_info *c) if (c->leb_cnt < old_leb_cnt || c->leb_cnt < UBIFS_MIN_LEB_CNT) { ubifs_err(c, "bad leb_cnt on master node"); - ubifs_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node, c->mst_node_alsz); return -EINVAL; } diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 0fb61956146d..4909321d84cf 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -646,7 +646,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, if (snod->type != UBIFS_ORPH_NODE) { ubifs_err(c, "invalid node type %d in orphan area at %d:%d", snod->type, sleb->lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, + c->leb_size - snod->offs); err = -EINVAL; goto out_free; } @@ -674,7 +675,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, if (!first) { ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d", cmt_no, sleb->lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, + c->leb_size - snod->offs); err = -EINVAL; goto out_free; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index f116f7b3f9e5..f0d51dd21c9e 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -352,11 +352,11 @@ out_free: ubifs_err(c, "failed to recover master node"); if (mst1) { ubifs_err(c, "dumping first master node"); - ubifs_dump_node(c, mst1); + ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1)); } if (mst2) { ubifs_err(c, "dumping second master node"); - ubifs_dump_node(c, mst2); + ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2)); } vfree(buf2); vfree(buf1); @@ -469,7 +469,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, * The area after the common header size is not empty, so the common * header must be intact. Check it. */ - if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { + if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) { dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); return 0; } diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 2f8d8f4f411a..79801c9a5b87 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -574,7 +574,7 @@ static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_h * @c: UBIFS file-system description object * @sleb: the scan LEB to authenticate * @log_hash: - * @is_last: if true, this is is the last LEB + * @is_last: if true, this is the last LEB * * This function iterates over the buds of a single LEB authenticating all buds * with the authentication nodes on this LEB. Authentication nodes are written @@ -827,7 +827,7 @@ out: out_dump: ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); ubifs_scan_destroy(sleb); return -EINVAL; } @@ -1123,7 +1123,7 @@ out: out_dump: ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); ubifs_scan_destroy(sleb); return -EINVAL; } diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index c0d3e4008d23..c160f718c288 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -503,7 +503,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) failed: ubifs_err(c, "bad superblock, error %d", err); - ubifs_dump_node(c, sup); + ubifs_dump_node(c, sup, ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size)); return -EINVAL; } diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index c69cdb5e65bc..84a9157dcc32 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -76,7 +76,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, dbg_scan("scanning %s at LEB %d:%d", dbg_ntype(ch->node_type), lnum, offs); - if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) + if (ubifs_check_node(c, buf, len, lnum, offs, quiet, 1)) return SCANNED_A_CORRUPT_NODE; if (ch->node_type == UBIFS_PAD_NODE) { @@ -90,7 +90,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (!quiet) { ubifs_err(c, "bad pad node at LEB %d:%d", lnum, offs); - ubifs_dump_node(c, pad); + ubifs_dump_node(c, pad, len); } return SCANNED_A_BAD_PAD_NODE; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index cb3acfb7dd1f..138b9426c6c1 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -253,7 +253,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) out_invalid: ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err); - ubifs_dump_node(c, ino); + ubifs_dump_node(c, ino, UBIFS_MAX_INO_NODE_SZ); ubifs_dump_inode(c, inode); err = -EINVAL; out_ino: @@ -1572,7 +1572,7 @@ static int mount_ubifs(struct ubifs_info *c) dbg_gen("main area LEBs: %d (%d - %d)", c->main_lebs, c->main_first, c->leb_cnt - 1); dbg_gen("index LEBs: %d", c->lst.idx_lebs); - dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("total index bytes: %llu (%llu KiB, %llu MiB)", c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, c->bi.old_idx_sz >> 20); dbg_gen("key hash type: %d", c->key_hash_type); @@ -2207,9 +2207,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; sb->s_op = &ubifs_super_operations; -#ifdef CONFIG_UBIFS_FS_XATTR sb->s_xattr = ubifs_xattr_handlers; -#endif fscrypt_set_ops(sb, &ubifs_crypt_operations); mutex_lock(&c->umount_mutex); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 894f1ab14616..488f3da7a6c6 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -316,7 +316,7 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, dent); if (err) { dump_stack(); - ubifs_dump_node(c, dent); + ubifs_dump_node(c, dent, zbr->len); return err; } @@ -349,7 +349,7 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, node); if (err) { dump_stack(); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); return err; } @@ -377,7 +377,7 @@ static void lnc_free(struct ubifs_zbranch *zbr) * * This function reads a "hashed" node defined by @zbr from the leaf node cache * (in it is there) or from the hash media, in which case the node is also - * added to LNC. Returns zero in case of success or a negative negative error + * added to LNC. Returns zero in case of success or a negative error * code in case of failure. */ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, @@ -1699,7 +1699,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf, goto out_err; } - err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); + err = ubifs_check_node(c, buf, zbr->len, zbr->lnum, zbr->offs, 0, 0); if (err) { ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE); goto out; @@ -1733,7 +1733,7 @@ out_err: err = -EINVAL; out: ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, zbr->len); dump_stack(); return err; } diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index ccaf94ea5be3..4d686e34e64d 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -390,7 +390,7 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, out_dump: ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err); - ubifs_dump_node(c, idx); + ubifs_dump_node(c, idx, c->max_idx_node_sz); kfree(idx); return -EINVAL; } @@ -455,8 +455,7 @@ out: * @node: node is returned here * * This function reads a node defined by @zbr from the flash media. Returns - * zero in case of success or a negative negative error code in case of - * failure. + * zero in case of success or a negative error code in case of failure. */ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, void *node) @@ -489,7 +488,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, zbr->lnum, zbr->offs); dbg_tnck(key, "looked for key "); dbg_tnck(&key1, "but found node's key "); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); return -EINVAL; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4ffd832e3b93..fc2cdde3b549 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1719,8 +1719,8 @@ int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, int offs); int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, int offs, int hmac_offs); -int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int must_chk_crc); +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, + int lnum, int offs, int quiet, int must_chk_crc); void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad); void ubifs_crc_node(struct ubifs_info *c, void *buf, int len); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); @@ -2000,17 +2000,19 @@ int ubifs_getattr(const struct path *path, struct kstat *stat, int ubifs_check_dir_empty(struct inode *dir); /* xattr.c */ -extern const struct xattr_handler *ubifs_xattr_handlers[]; -ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); int ubifs_xattr_set(struct inode *host, const char *name, const void *value, size_t size, int flags, bool check_lock); ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, size_t size); #ifdef CONFIG_UBIFS_FS_XATTR +extern const struct xattr_handler *ubifs_xattr_handlers[]; +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum); int ubifs_purge_xattrs(struct inode *host); #else +#define ubifs_listxattr NULL +#define ubifs_xattr_handlers NULL static inline void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum) { } static inline int ubifs_purge_xattrs(struct inode *host) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 15640015be9d..7cb9f064ac64 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -690,9 +690,9 @@ xfs_alloc_read_agfl( xfs_mount_t *mp, /* mount point structure */ xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_buf_t **bpp) /* buffer for the ag free block array */ + struct xfs_buf **bpp) /* buffer for the ag free block array */ { - xfs_buf_t *bp; /* return value */ + struct xfs_buf *bp; /* return value */ int error; ASSERT(agno != NULLAGNUMBER); @@ -2647,12 +2647,12 @@ out_no_agbp: int /* error */ xfs_alloc_get_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer containing the agf structure */ + struct xfs_buf *agbp, /* buffer containing the agf structure */ xfs_agblock_t *bnop, /* block address retrieved from freelist */ int btreeblk) /* destination is a AGF btree */ { struct xfs_agf *agf = agbp->b_addr; - xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ + struct xfs_buf *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ __be32 *agfl_bno; int error; @@ -2711,7 +2711,7 @@ xfs_alloc_get_freelist( void xfs_alloc_log_agf( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer for a.g. freelist header */ + struct xfs_buf *bp, /* buffer for a.g. freelist header */ int fields) /* mask of fields to be logged (XFS_AGF_...) */ { int first; /* first byte offset */ @@ -2757,7 +2757,7 @@ xfs_alloc_pagf_init( xfs_agnumber_t agno, /* allocation group number */ int flags) /* XFS_ALLOC_FLAGS_... */ { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp); @@ -2772,8 +2772,8 @@ xfs_alloc_pagf_init( int /* error */ xfs_alloc_put_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ - xfs_buf_t *agflbp,/* buffer for a.g. free block array */ + struct xfs_buf *agbp, /* buffer for a.g. freelist header */ + struct xfs_buf *agflbp,/* buffer for a.g. free block array */ xfs_agblock_t bno, /* block being freed */ int btreeblk) /* block came from a AGF btree */ { diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d9a692484eae..bc446418e227 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -321,7 +321,7 @@ xfs_bmap_check_leaf_extents( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ + struct xfs_buf *bp; /* buffer for "block" */ int error; /* error return value */ xfs_extnum_t i=0, j; /* index into the extents list */ int level; /* btree level, for checking */ @@ -592,7 +592,7 @@ xfs_bmap_btree_to_extents( struct xfs_btree_block *rblock = ifp->if_broot; struct xfs_btree_block *cblock;/* child btree block */ xfs_fsblock_t cbno; /* child block number */ - xfs_buf_t *cbp; /* child block's buffer */ + struct xfs_buf *cbp; /* child block's buffer */ int error; /* error return value */ __be64 *pp; /* ptr to block address */ struct xfs_owner_info oinfo; @@ -830,7 +830,7 @@ xfs_bmap_local_to_extents( int flags; /* logging flags returned */ struct xfs_ifork *ifp; /* inode fork pointer */ xfs_alloc_arg_t args; /* allocation arguments */ - xfs_buf_t *bp; /* buffer for extent block */ + struct xfs_buf *bp; /* buffer for extent block */ struct xfs_bmbt_irec rec; struct xfs_iext_cursor icur; @@ -6226,23 +6226,17 @@ xfs_bmap_validate_extent( struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = ip->i_mount; - xfs_fsblock_t endfsb; - bool isrt; - isrt = XFS_IS_REALTIME_INODE(ip); - endfsb = irec->br_startblock + irec->br_blockcount - 1; - if (isrt && whichfork == XFS_DATA_FORK) { - if (!xfs_verify_rtbno(mp, irec->br_startblock)) - return __this_address; - if (!xfs_verify_rtbno(mp, endfsb)) + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + return __this_address; + + if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) { + if (!xfs_verify_rtext(mp, irec->br_startblock, + irec->br_blockcount)) return __this_address; } else { - if (!xfs_verify_fsbno(mp, irec->br_startblock)) - return __this_address; - if (!xfs_verify_fsbno(mp, endfsb)) - return __this_address; - if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) != - XFS_FSB_TO_AGNO(mp, endfsb)) + if (!xfs_verify_fsbext(mp, irec->br_startblock, + irec->br_blockcount)) return __this_address; } if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index ecec604e6e4d..976659190d27 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -639,8 +639,6 @@ xfs_bmbt_change_owner( ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); - if (!cur) - return -ENOMEM; cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2d25bab68764..c4d7a9241dc3 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -397,7 +397,7 @@ xfs_btree_dup_cursor( xfs_btree_cur_t *cur, /* input cursor */ xfs_btree_cur_t **ncur) /* output cursor */ { - xfs_buf_t *bp; /* btree block's buffer pointer */ + struct xfs_buf *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ @@ -701,7 +701,7 @@ xfs_btree_firstrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -731,7 +731,7 @@ xfs_btree_lastrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -993,7 +993,7 @@ STATIC void xfs_btree_setbuf( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ - xfs_buf_t *bp) /* new buffer to set */ + struct xfs_buf *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ @@ -1636,7 +1636,7 @@ xfs_btree_decrement( int *stat) /* success/failure */ { struct xfs_btree_block *block; - xfs_buf_t *bp; + struct xfs_buf *bp; int error; /* error return value */ int lev; union xfs_btree_ptr ptr; @@ -4070,7 +4070,7 @@ xfs_btree_delrec( * surviving block, and log it. */ xfs_btree_set_numrecs(left, lrecs + rrecs); - xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), + xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB); xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index dd764da08f6f..630388b72dbe 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -468,11 +468,13 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */ #define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */ #define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */ +#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ XFS_SB_FEAT_INCOMPAT_SPINODES| \ XFS_SB_FEAT_INCOMPAT_META_UUID| \ - XFS_SB_FEAT_INCOMPAT_BIGTIME) + XFS_SB_FEAT_INCOMPAT_BIGTIME| \ + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -584,6 +586,12 @@ static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT); } +static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR); +} + /* * end of superblock version macros */ @@ -625,7 +633,6 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) #define XFS_B_TO_FSB(mp,b) \ ((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) #define XFS_B_TO_FSBT(mp,b) (((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) -#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) /* * Allocation group header diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 974e71bc4a3a..69b228fce81a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -607,13 +607,13 @@ error: /* * Allocate new inodes in the allocation group specified by agbp. - * Return 0 for success, else error code. + * Returns 0 if inodes were allocated in this AG; 1 if there was no space + * in this AG; or the usual negative error code. */ STATIC int xfs_ialloc_ag_alloc( struct xfs_trans *tp, - struct xfs_buf *agbp, - int *alloc) + struct xfs_buf *agbp) { struct xfs_agi *agi; struct xfs_alloc_arg args; @@ -795,10 +795,9 @@ sparse_alloc: allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1; } - if (args.fsbno == NULLFSBLOCK) { - *alloc = 0; - return 0; - } + if (args.fsbno == NULLFSBLOCK) + return 1; + ASSERT(args.len == args.minlen); /* @@ -903,7 +902,6 @@ sparse_alloc: */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); - *alloc = 1; return 0; } @@ -1570,7 +1568,7 @@ xfs_dialloc_ag_update_inobt( * The caller selected an AG for us, and made sure that free inodes are * available. */ -STATIC int +int xfs_dialloc_ag( struct xfs_trans *tp, struct xfs_buf *agbp, @@ -1682,65 +1680,78 @@ error_cur: return error; } +static int +xfs_dialloc_roll( + struct xfs_trans **tpp, + struct xfs_buf *agibp) +{ + struct xfs_trans *tp = *tpp; + struct xfs_dquot_acct *dqinfo; + int error; + + /* + * Hold to on to the agibp across the commit so no other allocation can + * come in and take the free inodes we just allocated for our caller. + */ + xfs_trans_bhold(tp, agibp); + + /* + * We want the quota changes to be associated with the next transaction, + * NOT this one. So, detach the dqinfo from this and attach it to the + * next transaction. + */ + dqinfo = tp->t_dqinfo; + tp->t_dqinfo = NULL; + + error = xfs_trans_roll(&tp); + + /* Re-attach the quota info that we detached from prev trx. */ + tp->t_dqinfo = dqinfo; + + *tpp = tp; + if (error) + return error; + xfs_trans_bjoin(tp, agibp); + return 0; +} + /* - * Allocate an inode on disk. - * - * Mode is used to tell whether the new inode will need space, and whether it - * is a directory. + * Select and prepare an AG for inode allocation. * - * This function is designed to be called twice if it has to do an allocation - * to make more free inodes. On the first call, *IO_agbp should be set to NULL. - * If an inode is available without having to performn an allocation, an inode - * number is returned. In this case, *IO_agbp is set to NULL. If an allocation - * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. - * The caller should then commit the current transaction, allocate a - * new transaction, and call xfs_dialloc() again, passing in the previous value - * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI - * buffer is locked across the two calls, the second call is guaranteed to have - * a free inode available. + * Mode is used to tell whether the new inode is a directory and hence where to + * locate it. * - * Once we successfully pick an inode its number is returned and the on-disk - * data structures are updated. The inode itself is not read in, since doing so - * would break ordering constraints with xfs_reclaim. + * This function will ensure that the selected AG has free inodes available to + * allocate from. The selected AGI will be returned locked to the caller, and it + * will allocate more free inodes if required. If no free inodes are found or + * can be allocated, no AGI will be returned. */ int -xfs_dialloc( - struct xfs_trans *tp, +xfs_dialloc_select_ag( + struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode, - struct xfs_buf **IO_agbp, - xfs_ino_t *inop) + struct xfs_buf **IO_agbp) { - struct xfs_mount *mp = tp->t_mountp; + struct xfs_mount *mp = (*tpp)->t_mountp; struct xfs_buf *agbp; xfs_agnumber_t agno; int error; - int ialloced; - int noroom = 0; + bool noroom = false; xfs_agnumber_t start_agno; struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); - int okalloc = 1; + bool okalloc = true; - if (*IO_agbp) { - /* - * If the caller passes in a pointer to the AGI buffer, - * continue where we left off before. In this case, we - * know that the allocation group has free inodes. - */ - agbp = *IO_agbp; - goto out_alloc; - } + *IO_agbp = NULL; /* * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode); - if (start_agno == NULLAGNUMBER) { - *inop = NULLFSINO; + start_agno = xfs_ialloc_ag_select(*tpp, parent, mode); + if (start_agno == NULLAGNUMBER) return 0; - } /* * If we have already hit the ceiling of inode blocks then clear @@ -1753,8 +1764,8 @@ xfs_dialloc( if (igeo->maxicount && percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos > igeo->maxicount) { - noroom = 1; - okalloc = 0; + noroom = true; + okalloc = false; } /* @@ -1771,9 +1782,9 @@ xfs_dialloc( } if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, agno); + error = xfs_ialloc_pagi_init(mp, *tpp, agno); if (error) - goto out_error; + break; } /* @@ -1786,64 +1797,59 @@ xfs_dialloc( * Then read in the AGI buffer and recheck with the AGI buffer * lock held. */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_ialloc_read_agi(mp, *tpp, agno, &agbp); if (error) - goto out_error; + break; if (pag->pagi_freecount) { xfs_perag_put(pag); - goto out_alloc; + goto found_ag; } if (!okalloc) goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(*tpp, agbp); + if (error < 0) { + xfs_trans_brelse(*tpp, agbp); - error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); - if (error) { - xfs_trans_brelse(tp, agbp); - - if (error != -ENOSPC) - goto out_error; - - xfs_perag_put(pag); - *inop = NULLFSINO; - return 0; + if (error == -ENOSPC) + error = 0; + break; } - if (ialloced) { + if (error == 0) { /* - * We successfully allocated some inodes, return - * the current context to the caller so that it - * can commit the current transaction and call - * us again where we left off. + * We successfully allocated space for an inode cluster + * in this AG. Roll the transaction so that we can + * allocate one of the new inodes. */ ASSERT(pag->pagi_freecount > 0); xfs_perag_put(pag); - *IO_agbp = agbp; - *inop = NULLFSINO; - return 0; + error = xfs_dialloc_roll(tpp, agbp); + if (error) { + xfs_buf_relse(agbp); + return error; + } + goto found_ag; } nextag_relse_buffer: - xfs_trans_brelse(tp, agbp); + xfs_trans_brelse(*tpp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) agno = 0; - if (agno == start_agno) { - *inop = NULLFSINO; + if (agno == start_agno) return noroom ? -ENOSPC : 0; - } } -out_alloc: - *IO_agbp = NULL; - return xfs_dialloc_ag(tp, agbp, parent, inop); -out_error: xfs_perag_put(pag); return error; +found_ag: + *IO_agbp = agbp; + return 0; } /* @@ -2453,7 +2459,7 @@ out_map: void xfs_ialloc_log_agi( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* allocation group header buffer */ + struct xfs_buf *bp, /* allocation group header buffer */ int fields) /* bitmask of fields to log */ { int first; /* first byte number */ @@ -2674,7 +2680,7 @@ xfs_ialloc_pagi_init( xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno) /* allocation group number */ { - xfs_buf_t *bp = NULL; + struct xfs_buf *bp = NULL; int error; error = xfs_ialloc_read_agi(mp, tp, agno, &bp); diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 72b3468b97b1..3511086a7ae1 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -37,30 +37,26 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) * Mode is used to tell whether the new inode will need space, and whether * it is a directory. * - * To work within the constraint of one allocation per transaction, - * xfs_dialloc() is designed to be called twice if it has to do an - * allocation to make more free inodes. If an inode is - * available without an allocation, agbp would be set to the current - * agbp and alloc_done set to false. - * If an allocation needed to be done, agbp would be set to the - * inode header of the allocation group and alloc_done set to true. - * The caller should then commit the current transaction and allocate a new - * transaction. xfs_dialloc() should then be called again with - * the agbp value returned from the previous call. - * - * Once we successfully pick an inode its number is returned and the - * on-disk data structures are updated. The inode itself is not read - * in, since doing so would break ordering constraints with xfs_reclaim. - * - * *agbp should be set to NULL on the first call, *alloc_done set to FALSE. + * There are two phases to inode allocation: selecting an AG and ensuring + * that it contains free inodes, followed by allocating one of the free + * inodes. xfs_dialloc_select_ag() does the former and returns a locked AGI + * to the caller, ensuring that followup call to xfs_dialloc_ag() will + * have free inodes to allocate from. xfs_dialloc_ag() will return the inode + * number of the free inode we allocated. */ int /* error */ -xfs_dialloc( - struct xfs_trans *tp, /* transaction pointer */ +xfs_dialloc_select_ag( + struct xfs_trans **tpp, /* double pointer of transaction */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ - struct xfs_buf **agbp, /* buf for a.g. inode header */ - xfs_ino_t *inop); /* inode number allocated */ + struct xfs_buf **IO_agbp); + +int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop); /* * Free disk inode. Carefully avoids touching the incore inode, all diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index cc919a2ee870..4c5831646bd9 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -672,11 +672,6 @@ xfs_inobt_cur( return error; cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); - if (!cur) { - xfs_trans_brelse(tp, *agi_bpp); - *agi_bpp = NULL; - return -ENOMEM; - } *curpp = cur; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index c667c63f2cb0..4d7410e49db4 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -547,10 +547,6 @@ xfs_dinode_verify( if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) return __this_address; - /* don't let reflink and dax mix */ - if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) - return __this_address; - /* COW extent size hint validation */ fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), mode, flags, flags2); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 2076627243b0..2037b9f23069 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1179,10 +1179,6 @@ xfs_refcount_finish_one( return error; rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); - if (!rcur) { - error = -ENOMEM; - goto out_cur; - } rcur->bc_ag.refc.nr_ops = nr_ops; rcur->bc_ag.refc.shape_changes = shape_changes; } @@ -1217,11 +1213,6 @@ xfs_refcount_finish_one( trace_xfs_refcount_finish_one_leftover(mp, agno, type, bno, blockcount, new_agbno, *new_len); return error; - -out_cur: - xfs_trans_brelse(tp, agbp); - - return error; } /* diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 2668ebe02865..10e0cf9949a2 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2404,10 +2404,6 @@ xfs_rmap_finish_one( return -EFSCORRUPTED; rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); - if (!rcur) { - error = -ENOMEM; - goto out_cur; - } } *pcur = rcur; @@ -2446,11 +2442,6 @@ xfs_rmap_finish_one( error = -EFSCORRUPTED; } return error; - -out_cur: - xfs_trans_brelse(tp, agbp); - - return error; } /* diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 6c1aba16113c..fe3a49575ff3 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -56,9 +56,9 @@ xfs_rtbuf_get( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t block, /* block number in bitmap or summary */ int issum, /* is summary not bitmap */ - xfs_buf_t **bpp) /* output: buffer for the block */ + struct xfs_buf **bpp) /* output: buffer for the block */ { - xfs_buf_t *bp; /* block buffer, result */ + struct xfs_buf *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; int nmap = 1; @@ -101,7 +101,7 @@ xfs_rtfind_back( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t firstbit; /* first useful bit in the word */ @@ -276,7 +276,7 @@ xfs_rtfind_forw( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -447,11 +447,11 @@ xfs_rtmodify_summary_int( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { - xfs_buf_t *bp; /* buffer for the summary block */ + struct xfs_buf *bp; /* buffer for the summary block */ int error; /* error value */ xfs_fsblock_t sb; /* summary fsblock */ int so; /* index into the summary file */ @@ -517,7 +517,7 @@ xfs_rtmodify_summary( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { return xfs_rtmodify_summary_int(mp, tp, log, bbno, @@ -539,7 +539,7 @@ xfs_rtmodify_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtword_t *first; /* first used word in the buffer */ @@ -690,7 +690,7 @@ xfs_rtfree_range( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* starting block to free */ xfs_extlen_t len, /* length to free */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { xfs_rtblock_t end; /* end of the freed extent */ @@ -773,7 +773,7 @@ xfs_rtcheck_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -969,7 +969,7 @@ xfs_rtfree_extent( int error; /* error value */ xfs_mount_t *mp; /* file system mount structure */ xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp = NULL; /* summary file block buffer */ + struct xfs_buf *sumbp = NULL; /* summary file block buffer */ mp = tp->t_mountp; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5aeafa59ed27..bbda117e5d85 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -360,21 +360,18 @@ xfs_validate_sb_common( } } - if (sbp->sb_unit) { - if (!xfs_sb_version_hasdalign(sbp) || - sbp->sb_unit > sbp->sb_width || - (sbp->sb_width % sbp->sb_unit) != 0) { - xfs_notice(mp, "SB stripe unit sanity check failed"); - return -EFSCORRUPTED; - } - } else if (xfs_sb_version_hasdalign(sbp)) { + /* + * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) + * would imply the image is corrupted. + */ + if (!!sbp->sb_unit ^ xfs_sb_version_hasdalign(sbp)) { xfs_notice(mp, "SB stripe alignment sanity check failed"); return -EFSCORRUPTED; - } else if (sbp->sb_width) { - xfs_notice(mp, "SB stripe width sanity check failed"); - return -EFSCORRUPTED; } + if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), + XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) + return -EFSCORRUPTED; if (xfs_sb_version_hascrc(&mp->m_sb) && sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { @@ -383,17 +380,6 @@ xfs_validate_sb_common( } /* - * Until this is fixed only page-sized or smaller data blocks work. - */ - if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - xfs_warn(mp, - "File system with blocksize %d bytes. " - "Only pagesize (%ld) or less will currently work.", - sbp->sb_blocksize, PAGE_SIZE); - return -ENOSYS; - } - - /* * Currently only very few inode sizes are supported. */ switch (sbp->sb_inodesize) { @@ -408,22 +394,6 @@ xfs_validate_sb_common( return -ENOSYS; } - if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || - xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { - xfs_warn(mp, - "file system too large to be mounted on this system."); - return -EFBIG; - } - - /* - * Don't touch the filesystem if a user tool thinks it owns the primary - * superblock. mkfs doesn't clear the flag from secondary supers, so - * we don't check them at all. - */ - if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) { - xfs_warn(mp, "Offline file system operation in progress!"); - return -EFSCORRUPTED; - } return 0; } @@ -1233,3 +1203,61 @@ xfs_sb_get_secondary( *bpp = bp; return 0; } + +/* + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, + * so users won't be confused by values in error messages. + */ +bool +xfs_validate_stripe_geometry( + struct xfs_mount *mp, + __s64 sunit, + __s64 swidth, + int sectorsize, + bool silent) +{ + if (swidth > INT_MAX) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) is too large", swidth); + return false; + } + + if (sunit > swidth) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); + return false; + } + + if (sectorsize && (int)sunit % sectorsize) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) must be a multiple of the sector size (%d)", + sunit, sectorsize); + return false; + } + + if (sunit && !swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe unit (%lld) and stripe width of 0", sunit); + return false; + } + + if (!sunit && swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe width (%lld) and stripe unit of 0", swidth); + return false; + } + + if (sunit && (int)swidth % (int)sunit) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) must be a multiple of the stripe unit (%lld)", + swidth, sunit); + return false; + } + return true; +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 92465a9a5162..f79f9dc632b6 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -42,4 +42,7 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); +extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool silent); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c795ae47b3c9..8c61a461bf7b 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -62,7 +62,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ #define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ -#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ #define XFS_TRANS_RES_FDBLKS 0x80 /* reserve newly freed blocks */ diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 4f595546a639..b254fbeaaa50 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -61,6 +61,29 @@ xfs_verify_fsbno( return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } +/* + * Verify that a data device extent is fully contained inside the filesystem, + * does not cross an AG boundary, and does not point at static metadata. + */ +bool +xfs_verify_fsbext( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + xfs_fsblock_t len) +{ + if (fsbno + len <= fsbno) + return false; + + if (!xfs_verify_fsbno(mp, fsbno)) + return false; + + if (!xfs_verify_fsbno(mp, fsbno + len - 1)) + return false; + + return XFS_FSB_TO_AGNO(mp, fsbno) == + XFS_FSB_TO_AGNO(mp, fsbno + len - 1); +} + /* Calculate the first and last possible inode number in an AG. */ void xfs_agino_range( @@ -175,6 +198,22 @@ xfs_verify_rtbno( return rtbno < mp->m_sb.sb_rblocks; } +/* Verify that a realtime device extent is fully contained inside the volume. */ +bool +xfs_verify_rtext( + struct xfs_mount *mp, + xfs_rtblock_t rtbno, + xfs_rtblock_t len) +{ + if (rtbno + len <= rtbno) + return false; + + if (!xfs_verify_rtbno(mp, rtbno)) + return false; + + return xfs_verify_rtbno(mp, rtbno + len - 1); +} + /* Calculate the range of valid icount values. */ void xfs_icount_range( @@ -219,3 +258,28 @@ xfs_verify_dablk( return dabno <= max_dablk; } + +/* Check that a file block offset does not exceed the maximum. */ +bool +xfs_verify_fileoff( + struct xfs_mount *mp, + xfs_fileoff_t off) +{ + return off <= XFS_MAX_FILEOFF; +} + +/* Check that a range of file block offsets do not exceed the maximum. */ +bool +xfs_verify_fileext( + struct xfs_mount *mp, + xfs_fileoff_t off, + xfs_fileoff_t len) +{ + if (off + len <= off) + return false; + + if (!xfs_verify_fileoff(mp, off)) + return false; + + return xfs_verify_fileoff(mp, off + len - 1); +} diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 397d94775440..064bd6e8c922 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -184,6 +184,8 @@ xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, + xfs_fsblock_t len); void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t *first, xfs_agino_t *last); @@ -195,9 +197,14 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno, + xfs_rtblock_t len); bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount); bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off); void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min, unsigned long long *max); +bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); +bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, + xfs_fileoff_t len); #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 401f71579ce6..23690f824ffa 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -829,8 +829,6 @@ xrep_agi_calc_from_btrees( cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno, XFS_BTNUM_FINO); - if (error) - goto err; error = xfs_btree_count_blocks(cur, &blocks); if (error) goto err; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index fed56d213a3f..33559c3a4bc3 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -319,7 +319,6 @@ xchk_bmap_iextent( struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = info->sc->mp; - xfs_filblks_t end; int error = 0; /* @@ -330,6 +329,10 @@ xchk_bmap_iextent( xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + xchk_bmap_dirattr_extent(ip, info, irec); /* There should never be a "hole" extent in either extent list. */ @@ -349,20 +352,12 @@ xchk_bmap_iextent( if (irec->br_blockcount > MAXEXTLEN) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) - xchk_fblock_set_corrupt(info->sc, info->whichfork, - irec->br_startoff); - end = irec->br_startblock + irec->br_blockcount - 1; if (info->is_rt && - (!xfs_verify_rtbno(mp, irec->br_startblock) || - !xfs_verify_rtbno(mp, end))) + !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount)) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (!info->is_rt && - (!xfs_verify_fsbno(mp, irec->br_startblock) || - !xfs_verify_fsbno(mp, end) || - XFS_FSB_TO_AGNO(mp, irec->br_startblock) != - XFS_FSB_TO_AGNO(mp, end))) + !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); @@ -563,10 +558,6 @@ xchk_bmap_check_ag_rmaps( return error; cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno); - if (!cur) { - error = -ENOMEM; - goto out_agf; - } sbcri.sc = sc; sbcri.whichfork = whichfork; @@ -575,7 +566,6 @@ xchk_bmap_check_ag_rmaps( error = 0; xfs_btree_del_cursor(cur, error); -out_agf: xfs_trans_brelse(sc->tp, agf); return error; } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 18876056e5e0..8ea6d4aa3f55 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -466,8 +466,6 @@ xchk_ag_btcur_init( /* Set up a bnobt cursor for cross-referencing. */ sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, agno, XFS_BTNUM_BNO); - if (!sa->bno_cur) - goto err; } if (sa->agf_bp && @@ -475,8 +473,6 @@ xchk_ag_btcur_init( /* Set up a cntbt cursor for cross-referencing. */ sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, agno, XFS_BTNUM_CNT); - if (!sa->cnt_cur) - goto err; } /* Set up a inobt cursor for cross-referencing. */ @@ -484,8 +480,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) { sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, agno, XFS_BTNUM_INO); - if (!sa->ino_cur) - goto err; } /* Set up a finobt cursor for cross-referencing. */ @@ -493,8 +487,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, agno, XFS_BTNUM_FINO); - if (!sa->fino_cur) - goto err; } /* Set up a rmapbt cursor for cross-referencing. */ @@ -502,8 +494,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) { sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, agno); - if (!sa->rmap_cur) - goto err; } /* Set up a refcountbt cursor for cross-referencing. */ @@ -511,13 +501,9 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) { sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, sa->agf_bp, agno); - if (!sa->refc_cur) - goto err; } return 0; -err: - return -ENOMEM; } /* Release the AG header context and btree cursors. */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index b045e95c2ea7..178b3455a170 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -66,8 +66,18 @@ xchk_dir_check_ftype( * eofblocks cleanup (which allocates what would be a nested * transaction), we can't use DONTCACHE here because DONTCACHE * inodes can trigger immediate inactive cleanup of the inode. + * + * If _iget returns -EINVAL or -ENOENT then the child inode number is + * garbage and the directory is corrupt. If the _iget returns + * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a + * cross referencing error. Any other error is an operational error. */ error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); + if (error == -EINVAL || error == -ENOENT) { + error = -EFSCORRUPTED; + xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error); + goto out; + } if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; @@ -105,6 +115,7 @@ xchk_dir_actor( struct xfs_name xname; xfs_ino_t lookup_ino; xfs_dablk_t offset; + bool checked_ftype = false; int error = 0; sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter); @@ -133,6 +144,7 @@ xchk_dir_actor( if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + checked_ftype = true; if (ino != ip->i_ino) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -144,6 +156,7 @@ xchk_dir_actor( if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + checked_ftype = true; if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -167,9 +180,11 @@ xchk_dir_actor( } /* Verify the file type. This function absorbs error codes. */ - error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); - if (error) - goto out; + if (!checked_ftype) { + error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); + if (error) + goto out; + } out: /* * A negative error code returned here is supposed to cause the diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index bb25ff1b770d..faf65eb5bd31 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -185,10 +185,6 @@ xchk_inode_flags2( if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) goto bad; - /* dax and reflink make no sense, currently */ - if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) - goto bad; - /* no bigtime iflag without the bigtime feature */ if (xfs_dinode_has_bigtime(dip) && !xfs_sb_version_hasbigtime(&mp->m_sb)) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 855aa8bcab64..66c35f6dfc24 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -164,13 +164,13 @@ xchk_parent_validate( * can't use DONTCACHE here because DONTCACHE inodes can trigger * immediate inactive cleanup of the inode. * - * If _iget returns -EINVAL then the parent inode number is garbage - * and the directory is corrupt. If the _iget returns -EFSCORRUPTED - * or -EFSBADCRC then the parent is corrupt which is a cross - * referencing error. Any other error is an operational error. + * If _iget returns -EINVAL or -ENOENT then the parent inode number is + * garbage and the directory is corrupt. If the _iget returns + * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a + * cross referencing error. Any other error is an operational error. */ error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); - if (error == -EINVAL) { + if (error == -EINVAL || error == -ENOENT) { error = -EFSCORRUPTED; xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); goto out; diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 76e4ffe0315b..d409ca592178 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -52,9 +52,7 @@ xchk_rtbitmap_rec( startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize; blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize; - if (startblock + blockcount <= startblock || - !xfs_verify_rtbno(sc->mp, startblock) || - !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1)) + if (!xfs_verify_rtext(sc->mp, startblock, blockcount)) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c544951a0c07..779cb73b3d00 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -16,6 +16,7 @@ #include "xfs_acl.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_trans.h" #include <linux/posix_acl_xattr.h> @@ -212,21 +213,28 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } static int -xfs_set_mode(struct inode *inode, umode_t mode) +xfs_acl_set_mode( + struct inode *inode, + umode_t mode) { - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; - iattr.ia_valid = ATTR_MODE | ATTR_CTIME; - iattr.ia_mode = mode; - iattr.ia_ctime = current_time(inode); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); + if (error) + return error; - error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); - } + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - return error; + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp); } int @@ -251,18 +259,14 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } set_acl: - error = __xfs_set_acl(inode, acl, type); - if (error) - return error; - /* * We set the mode after successfully updating the ACL xattr because the * xattr update can fail at ENOSPC and we don't want to change the mode * if the ACL update hasn't been applied. */ - if (set_mode) - error = xfs_set_mode(inode, mode); - + error = __xfs_set_acl(inode, acl, type); + if (!error && set_mode && mode != inode->i_mode) + error = xfs_acl_set_mode(inode, mode); return error; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 9e16a4d0f97c..93e4d8ae6e92 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -417,6 +417,40 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = { .cancel_item = xfs_bmap_update_cancel_item, }; +/* Is this recovered BUI ok? */ +static inline bool +xfs_bui_validate( + struct xfs_mount *mp, + struct xfs_bui_log_item *buip) +{ + struct xfs_map_extent *bmap; + + /* Only one mapping operation per BUI... */ + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + return false; + + bmap = &buip->bui_format.bui_extents[0]; + + if (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS) + return false; + + switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + break; + default: + return false; + } + + if (!xfs_verify_ino(mp, bmap->me_owner)) + return false; + + if (!xfs_verify_fileext(mp, bmap->me_startoff, bmap->me_len)) + return false; + + return xfs_verify_fsbext(mp, bmap->me_startblock, bmap->me_len); +} + /* * Process a bmap update intent item that was recovered from the log. * We need to update some inode's bmbt. @@ -433,47 +467,24 @@ xfs_bui_item_recover( struct xfs_mount *mp = lip->li_mountp; struct xfs_map_extent *bmap; struct xfs_bud_log_item *budp; - xfs_fsblock_t startblock_fsb; - xfs_fsblock_t inode_fsb; xfs_filblks_t count; xfs_exntst_t state; unsigned int bui_type; int whichfork; int error = 0; - /* Only one mapping operation per BUI... */ - if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + if (!xfs_bui_validate(mp, buip)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &buip->bui_format, sizeof(buip->bui_format)); return -EFSCORRUPTED; + } - /* - * First check the validity of the extent described by the - * BUI. If anything is bad, then toss the BUI. - */ bmap = &buip->bui_format.bui_extents[0]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, bmap->me_startblock)); - inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp, - XFS_INO_TO_FSB(mp, bmap->me_owner))); state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM; whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; - switch (bui_type) { - case XFS_BMAP_MAP: - case XFS_BMAP_UNMAP: - break; - default: - return -EFSCORRUPTED; - } - if (startblock_fsb == 0 || - bmap->me_len == 0 || - inode_fsb == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - bmap->me_len >= mp->m_sb.sb_agblocks || - inode_fsb >= mp->m_sb.sb_dblocks || - (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) - return -EFSCORRUPTED; /* Grab the inode. */ error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4e4cf91f4f9f..f8400bbd6473 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -278,7 +278,7 @@ _xfs_buf_alloc( */ STATIC int _xfs_buf_get_pages( - xfs_buf_t *bp, + struct xfs_buf *bp, int page_count) { /* Make sure that we have a page list */ @@ -302,7 +302,7 @@ _xfs_buf_get_pages( */ STATIC void _xfs_buf_free_pages( - xfs_buf_t *bp) + struct xfs_buf *bp) { if (bp->b_pages != bp->b_page_array) { kmem_free(bp->b_pages); @@ -319,7 +319,7 @@ _xfs_buf_free_pages( */ static void xfs_buf_free( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_free(bp, _RET_IP_); @@ -352,7 +352,7 @@ xfs_buf_free( */ STATIC int xfs_buf_allocate_memory( - xfs_buf_t *bp, + struct xfs_buf *bp, uint flags) { size_t size; @@ -463,7 +463,7 @@ out_free_pages: */ STATIC int _xfs_buf_map_pages( - xfs_buf_t *bp, + struct xfs_buf *bp, uint flags) { ASSERT(bp->b_flags & _XBF_PAGES); @@ -590,7 +590,7 @@ xfs_buf_find( struct xfs_buf **found_bp) { struct xfs_perag *pag; - xfs_buf_t *bp; + struct xfs_buf *bp; struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; xfs_daddr_t eofs; int i; @@ -762,7 +762,7 @@ found: int _xfs_buf_read( - xfs_buf_t *bp, + struct xfs_buf *bp, xfs_buf_flags_t flags) { ASSERT(!(flags & XBF_WRITE)); @@ -1005,7 +1005,7 @@ xfs_buf_get_uncached( */ void xfs_buf_hold( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_hold(bp, _RET_IP_); atomic_inc(&bp->b_hold); @@ -1017,7 +1017,7 @@ xfs_buf_hold( */ void xfs_buf_rele( - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_perag *pag = bp->b_pag; bool release; @@ -1161,7 +1161,7 @@ xfs_buf_unlock( STATIC void xfs_buf_wait_unpin( - xfs_buf_t *bp) + struct xfs_buf *bp) { DECLARE_WAITQUEUE (wait, current); @@ -1373,7 +1373,7 @@ xfs_buf_ioend_work( struct work_struct *work) { struct xfs_buf *bp = - container_of(work, xfs_buf_t, b_ioend_work); + container_of(work, struct xfs_buf, b_ioend_work); xfs_buf_ioend(bp); } @@ -1388,7 +1388,7 @@ xfs_buf_ioend_async( void __xfs_buf_ioerror( - xfs_buf_t *bp, + struct xfs_buf *bp, int error, xfs_failaddr_t failaddr) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index bfd2907e7bc4..5d91a31298a4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -124,7 +124,7 @@ struct xfs_buf_ops { xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp); }; -typedef struct xfs_buf { +struct xfs_buf { /* * first cacheline holds all the fields needed for an uncontended cache * hit to be fully processed. The semaphore straddles the cacheline @@ -190,7 +190,7 @@ typedef struct xfs_buf { int b_last_error; const struct xfs_buf_ops *b_ops; -} xfs_buf_t; +}; /* Finding and Reading Buffers */ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, @@ -253,16 +253,16 @@ int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ -extern void xfs_buf_rele(xfs_buf_t *); +extern void xfs_buf_rele(struct xfs_buf *); /* Locking and Unlocking Buffers */ -extern int xfs_buf_trylock(xfs_buf_t *); -extern void xfs_buf_lock(xfs_buf_t *); -extern void xfs_buf_unlock(xfs_buf_t *); +extern int xfs_buf_trylock(struct xfs_buf *); +extern void xfs_buf_lock(struct xfs_buf *); +extern void xfs_buf_unlock(struct xfs_buf *); #define xfs_buf_islocked(bp) \ ((bp)->b_sema.count <= 0) -static inline void xfs_buf_relse(xfs_buf_t *bp) +static inline void xfs_buf_relse(struct xfs_buf *bp) { xfs_buf_unlock(bp); xfs_buf_rele(bp); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 0356f2e340a1..dc0be2a639cc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -412,7 +412,7 @@ xfs_buf_item_unpin( int remove) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); - xfs_buf_t *bp = bip->bli_buf; + struct xfs_buf *bp = bip->bli_buf; int stale = bip->bli_flags & XFS_BLI_STALE; int freed; @@ -942,7 +942,7 @@ xfs_buf_item_free( */ void xfs_buf_item_relse( - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 6c11bfc3d452..93223ebb3372 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -578,6 +578,15 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = { .cancel_item = xfs_extent_free_cancel_item, }; +/* Is this recovered EFI ok? */ +static inline bool +xfs_efi_validate_ext( + struct xfs_mount *mp, + struct xfs_extent *extp) +{ + return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len); +} + /* * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. @@ -592,7 +601,6 @@ xfs_efi_item_recover( struct xfs_efd_log_item *efdp; struct xfs_trans *tp; struct xfs_extent *extp; - xfs_fsblock_t startblock_fsb; int i; int error = 0; @@ -602,14 +610,13 @@ xfs_efi_item_recover( * just toss the EFI. */ for (i = 0; i < efip->efi_format.efi_nextents; i++) { - extp = &efip->efi_format.efi_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, extp->ext_start)); - if (startblock_fsb == 0 || - extp->ext_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - extp->ext_len >= mp->m_sb.sb_agblocks) + if (!xfs_efi_validate_ext(mp, + &efip->efi_format.efi_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &efip->efi_format, + sizeof(efip->efi_format)); return -EFSCORRUPTED; + } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b7c5783a031c..959ce91a3755 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -28,7 +28,7 @@ xfs_growfs_data_private( xfs_mount_t *mp, /* mount point for filesystem */ xfs_growfs_data_t *in) /* growfs data input struct */ { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2bfbcf28b1bd..b7352bc4c815 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -761,68 +761,26 @@ xfs_inode_inherit_flags2( } /* - * Allocate an inode on disk and return a copy of its in-core version. - * The in-core inode is locked exclusively. Set mode, nlink, and rdev - * appropriately within the inode. The uid and gid for the inode are - * set according to the contents of the given cred structure. - * - * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() - * has a free inode available, call xfs_iget() to obtain the in-core - * version of the allocated inode. Finally, fill in the inode and - * log its initial contents. In this case, ialloc_context would be - * set to NULL. - * - * If xfs_dialloc() does not have an available inode, it will replenish - * its supply by doing an allocation. Since we can only do one - * allocation within a transaction without deadlocks, we must commit - * the current transaction before returning the inode itself. - * In this case, therefore, we will set ialloc_context and return. - * The caller should then commit the current transaction, start a new - * transaction, and call xfs_ialloc() again to actually get the inode. - * - * To ensure that some other process does not grab the inode that - * was allocated during the first call to xfs_ialloc(), this routine - * also returns the [locked] bp pointing to the head of the freelist - * as ialloc_context. The caller should hold this buffer across - * the commit and pass it back into this routine on the second call. - * - * If we are allocating quota inodes, we do not have a parent inode - * to attach to or associate with (i.e. pip == NULL) because they - * are not linked into the directory structure - they are attached - * directly to the superblock - and so have no parent. + * Initialise a newly allocated inode and return the in-core inode to the + * caller locked exclusively. */ static int -xfs_ialloc( - xfs_trans_t *tp, - xfs_inode_t *pip, - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, - xfs_buf_t **ialloc_context, - xfs_inode_t **ipp) -{ - struct xfs_mount *mp = tp->t_mountp; - xfs_ino_t ino; - xfs_inode_t *ip; - uint flags; - int error; - struct timespec64 tv; - struct inode *inode; - - /* - * Call the space management code to pick - * the on-disk inode to be allocated. - */ - error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, - ialloc_context, &ino); - if (error) - return error; - if (*ialloc_context || ino == NULLFSINO) { - *ipp = NULL; - return 0; - } - ASSERT(*ialloc_context == NULL); +xfs_init_new_inode( + struct xfs_trans *tp, + struct xfs_inode *pip, + xfs_ino_t ino, + umode_t mode, + xfs_nlink_t nlink, + dev_t rdev, + prid_t prid, + struct xfs_inode **ipp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; + unsigned int flags; + int error; + struct timespec64 tv; + struct inode *inode; /* * Protect against obviously corrupt allocation btree records. Later @@ -837,14 +795,13 @@ xfs_ialloc( } /* - * Get the in-core inode with the lock held exclusively. - * This is because we're setting fields here we need - * to prevent others from looking at until we're done. + * Get the in-core inode with the lock held exclusively to prevent + * others from looking at until we're done. */ - error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, - XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); if (error) return error; + ASSERT(ip != NULL); inode = VFS_I(ip); inode->i_mode = mode; @@ -932,143 +889,51 @@ xfs_ialloc( } /* - * Allocates a new inode from disk and return a pointer to the - * incore copy. This routine will internally commit the current - * transaction and allocate a new one if the Space Manager needed - * to do an allocation to replenish the inode free-list. - * - * This routine is designed to be called from xfs_create and - * xfs_create_dir. + * Allocates a new inode from disk and return a pointer to the incore copy. This + * routine will internally commit the current transaction and allocate a new one + * if we needed to allocate more on-disk free inodes to perform the requested + * operation. * + * If we are allocating quota inodes, we do not have a parent inode to attach to + * or associate with (i.e. dp == NULL) because they are not linked into the + * directory structure - they are attached directly to the superblock - and so + * have no parent. */ int xfs_dir_ialloc( - xfs_trans_t **tpp, /* input: current transaction; - output: may be a new transaction. */ - xfs_inode_t *dp, /* directory within whose allocate - the inode. */ - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, /* project id */ - xfs_inode_t **ipp) /* pointer to inode; it will be - locked. */ -{ - xfs_trans_t *tp; - xfs_inode_t *ip; - xfs_buf_t *ialloc_context = NULL; - int code; - void *dqinfo; - uint tflags; - - tp = *tpp; - ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + struct xfs_trans **tpp, + struct xfs_inode *dp, + umode_t mode, + xfs_nlink_t nlink, + dev_t rdev, + prid_t prid, + struct xfs_inode **ipp) +{ + struct xfs_buf *agibp; + xfs_ino_t parent_ino = dp ? dp->i_ino : 0; + xfs_ino_t ino; + int error; - /* - * xfs_ialloc will return a pointer to an incore inode if - * the Space Manager has an available inode on the free - * list. Otherwise, it will do an allocation and replenish - * the freelist. Since we can only do one allocation per - * transaction without deadlocks, we will need to commit the - * current transaction and start a new one. We will then - * need to call xfs_ialloc again to get the inode. - * - * If xfs_ialloc did an allocation to replenish the freelist, - * it returns the bp containing the head of the freelist as - * ialloc_context. We will hold a lock on it across the - * transaction commit so that no other process can steal - * the inode(s) that we've just allocated. - */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context, - &ip); + ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES); /* - * Return an error if we were unable to allocate a new inode. - * This should only happen if we run out of space on disk or - * encounter a disk error. + * Call the space management code to pick the on-disk inode to be + * allocated. */ - if (code) { - *ipp = NULL; - return code; - } - if (!ialloc_context && !ip) { - *ipp = NULL; - return -ENOSPC; - } - - /* - * If the AGI buffer is non-NULL, then we were unable to get an - * inode in one operation. We need to commit the current - * transaction and call xfs_ialloc() again. It is guaranteed - * to succeed the second time. - */ - if (ialloc_context) { - /* - * Normally, xfs_trans_commit releases all the locks. - * We call bhold to hang on to the ialloc_context across - * the commit. Holding this buffer prevents any other - * processes from doing any allocations in this - * allocation group. - */ - xfs_trans_bhold(tp, ialloc_context); - - /* - * We want the quota changes to be associated with the next - * transaction, NOT this one. So, detach the dqinfo from this - * and attach it to the next transaction. - */ - dqinfo = NULL; - tflags = 0; - if (tp->t_dqinfo) { - dqinfo = (void *)tp->t_dqinfo; - tp->t_dqinfo = NULL; - tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; - tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); - } - - code = xfs_trans_roll(&tp); - - /* - * Re-attach the quota info that we detached from prev trx. - */ - if (dqinfo) { - tp->t_dqinfo = dqinfo; - tp->t_flags |= tflags; - } - - if (code) { - xfs_buf_relse(ialloc_context); - *tpp = tp; - *ipp = NULL; - return code; - } - xfs_trans_bjoin(tp, ialloc_context); - - /* - * Call ialloc again. Since we've locked out all - * other allocations in this allocation group, - * this call should always succeed. - */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, - &ialloc_context, &ip); - - /* - * If we get an error at this point, return to the caller - * so that the current transaction can be aborted. - */ - if (code) { - *tpp = tp; - *ipp = NULL; - return code; - } - ASSERT(!ialloc_context && ip); + error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp); + if (error) + return error; - } + if (!agibp) + return -ENOSPC; - *ipp = ip; - *tpp = tp; + /* Allocate an inode from the selected AG */ + error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino); + if (error) + return error; + ASSERT(ino != NULLFSINO); - return 0; + return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp); } /* @@ -1521,7 +1386,7 @@ xfs_itruncate_extents_flags( * the page cache can't scale that far. */ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - if (first_unmap_block >= XFS_MAX_FILEOFF) { + if (!xfs_verify_fileoff(mp, first_unmap_block)) { WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF); return 0; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 751a3d1d7d84..eca333f5f715 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -407,9 +407,9 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); -int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, - xfs_nlink_t, dev_t, prid_t, - struct xfs_inode **); +int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode, + xfs_nlink_t nlink, dev_t dev, prid_t prid, + struct xfs_inode **ipp); static inline int xfs_itruncate_extents( diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1414ab79eacf..67c8dc9de8aa 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -206,10 +206,8 @@ xfs_generic_create( xfs_finish_inode_setup(ip); out_free_acl: - if (default_acl) - posix_acl_release(default_acl); - if (acl) - posix_acl_release(acl); + posix_acl_release(default_acl); + posix_acl_release(acl); return error; out_cleanup_inode: @@ -648,11 +646,10 @@ xfs_vn_change_ok( * Caution: The caller of this function is responsible for calling * setattr_prepare() or otherwise verifying the change is fine. */ -int +static int xfs_setattr_nonsize( struct xfs_inode *ip, - struct iattr *iattr, - int flags) + struct iattr *iattr) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -809,7 +806,7 @@ xfs_setattr_nonsize( * to attr_set. No previous user of the generic * Posix ACL code seems to care about this issue either. */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + if (mask & ATTR_MODE) { error = posix_acl_chmod(inode, inode->i_mode); if (error) return error; @@ -826,22 +823,6 @@ out_dqrele: return error; } -int -xfs_vn_setattr_nonsize( - struct dentry *dentry, - struct iattr *iattr) -{ - struct xfs_inode *ip = XFS_I(d_inode(dentry)); - int error; - - trace_xfs_setattr(ip); - - error = xfs_vn_change_ok(dentry, iattr); - if (error) - return error; - return xfs_setattr_nonsize(ip, iattr, 0); -} - /* * Truncate file. Must have write permission and not be a directory. * @@ -881,7 +862,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); + return xfs_setattr_nonsize(ip, iattr); } /* @@ -1069,11 +1050,11 @@ xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { + struct inode *inode = d_inode(dentry); + struct xfs_inode *ip = XFS_I(inode); int error; if (iattr->ia_valid & ATTR_SIZE) { - struct inode *inode = d_inode(dentry); - struct xfs_inode *ip = XFS_I(inode); uint iolock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); @@ -1088,7 +1069,11 @@ xfs_vn_setattr( error = xfs_vn_setattr_size(dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { - error = xfs_vn_setattr_nonsize(dentry, iattr); + trace_xfs_setattr(ip); + + error = xfs_vn_change_ok(dentry, iattr); + if (!error) + error = xfs_setattr_nonsize(ip, iattr); } return error; diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index 4d24ff309f59..99ca745c1071 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -13,15 +13,7 @@ extern const struct file_operations xfs_dir_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -/* - * Internal setattr interfaces. - */ -#define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */ - extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr); -extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, - int flags); -extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap); extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 2a45138831e3..eae3aff9bc97 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -363,7 +363,7 @@ xfs_iwalk_run_callbacks( /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; - ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); + ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK); error = xfs_iwalk_ag_recs(iwag); if (error) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87886b7f77da..97f31308de03 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2559,8 +2559,11 @@ xlog_recover_process_intents( spin_unlock(&ailp->ail_lock); error = lip->li_ops->iop_recover(lip, &capture_list); spin_lock(&ailp->ail_lock); - if (error) + if (error) { + trace_xlog_intent_recovery_failed(log->l_mp, error, + lip->li_ops->iop_recover); break; + } } xfs_trans_ail_cursor_done(&cur); @@ -2628,7 +2631,7 @@ xlog_recover_clear_agi_bucket( { xfs_trans_t *tp; xfs_agi_t *agi; - xfs_buf_t *agibp; + struct xfs_buf *agibp; int offset; int error; @@ -2746,7 +2749,7 @@ xlog_recover_process_iunlinks( xfs_mount_t *mp; xfs_agnumber_t agno; xfs_agi_t *agi; - xfs_buf_t *agibp; + struct xfs_buf *agibp; xfs_agino_t agino; int bucket; int error; @@ -3498,8 +3501,8 @@ xlog_recover_check_summary( struct xlog *log) { xfs_mount_t *mp; - xfs_buf_t *agfbp; - xfs_buf_t *agibp; + struct xfs_buf *agfbp; + struct xfs_buf *agibp; xfs_agnumber_t agno; uint64_t freeblks; uint64_t itotal; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b2a9abee8b2b..c134eb4aeaa8 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -737,15 +737,15 @@ xfs_qm_destroy_quotainfo( */ STATIC int xfs_qm_qino_alloc( - xfs_mount_t *mp, - xfs_inode_t **ip, - uint flags) + struct xfs_mount *mp, + struct xfs_inode **ipp, + unsigned int flags) { - xfs_trans_t *tp; - int error; - bool need_alloc = true; + struct xfs_trans *tp; + int error; + bool need_alloc = true; - *ip = NULL; + *ipp = NULL; /* * With superblock that doesn't have separate pquotino, we * share an inode between gquota and pquota. If the on-disk @@ -771,7 +771,7 @@ xfs_qm_qino_alloc( return -EFSCORRUPTED; } if (ino != NULLFSINO) { - error = xfs_iget(mp, NULL, ino, 0, 0, ip); + error = xfs_iget(mp, NULL, ino, 0, 0, ipp); if (error) return error; mp->m_sb.sb_gquotino = NULLFSINO; @@ -787,7 +787,7 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip); + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp); if (error) { xfs_trans_cancel(tp); return error; @@ -812,11 +812,11 @@ xfs_qm_qino_alloc( mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT; } if (flags & XFS_QMOPT_UQUOTA) - mp->m_sb.sb_uquotino = (*ip)->i_ino; + mp->m_sb.sb_uquotino = (*ipp)->i_ino; else if (flags & XFS_QMOPT_GQUOTA) - mp->m_sb.sb_gquotino = (*ip)->i_ino; + mp->m_sb.sb_gquotino = (*ipp)->i_ino; else - mp->m_sb.sb_pquotino = (*ip)->i_ino; + mp->m_sb.sb_pquotino = (*ipp)->i_ino; spin_unlock(&mp->m_sb_lock); xfs_log_sb(tp); @@ -826,7 +826,7 @@ xfs_qm_qino_alloc( xfs_alert(mp, "%s failed (error %d)!", __func__, error); } if (need_alloc) - xfs_finish_inode_setup(*ip); + xfs_finish_inode_setup(*ipp); return error; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7529eb63ce94..07ebccbbf4df 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -417,6 +417,31 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = { .cancel_item = xfs_refcount_update_cancel_item, }; +/* Is this recovered CUI ok? */ +static inline bool +xfs_cui_validate_phys( + struct xfs_mount *mp, + struct xfs_phys_extent *refc) +{ + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return false; + + if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) + return false; + + switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + break; + default: + return false; + } + + return xfs_verify_fsbext(mp, refc->pe_startblock, refc->pe_len); +} + /* * Process a refcount update intent item that was recovered from the log. * We need to update the refcountbt. @@ -433,11 +458,9 @@ xfs_cui_item_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_mountp; - xfs_fsblock_t startblock_fsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; unsigned int refc_type; - bool op_ok; bool requeue_only = false; enum xfs_refcount_intent_type type; int i; @@ -449,26 +472,13 @@ xfs_cui_item_recover( * just toss the CUI. */ for (i = 0; i < cuip->cui_format.cui_nextents; i++) { - refc = &cuip->cui_format.cui_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, refc->pe_startblock)); - switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { - case XFS_REFCOUNT_INCREASE: - case XFS_REFCOUNT_DECREASE: - case XFS_REFCOUNT_ALLOC_COW: - case XFS_REFCOUNT_FREE_COW: - op_ok = true; - break; - default: - op_ok = false; - break; - } - if (!op_ok || startblock_fsb == 0 || - refc->pe_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - refc->pe_len >= mp->m_sb.sb_agblocks || - (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) + if (!xfs_cui_validate_phys(mp, + &cuip->cui_format.cui_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, + sizeof(cuip->cui_format)); return -EFSCORRUPTED; + } } /* diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 7adc996ca6e3..49cebd68b672 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -460,6 +460,42 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = { .cancel_item = xfs_rmap_update_cancel_item, }; +/* Is this recovered RUI ok? */ +static inline bool +xfs_rui_validate_map( + struct xfs_mount *mp, + struct xfs_map_extent *rmap) +{ + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return false; + + if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS) + return false; + + switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { + case XFS_RMAP_EXTENT_MAP: + case XFS_RMAP_EXTENT_MAP_SHARED: + case XFS_RMAP_EXTENT_UNMAP: + case XFS_RMAP_EXTENT_UNMAP_SHARED: + case XFS_RMAP_EXTENT_CONVERT: + case XFS_RMAP_EXTENT_CONVERT_SHARED: + case XFS_RMAP_EXTENT_ALLOC: + case XFS_RMAP_EXTENT_FREE: + break; + default: + return false; + } + + if (!XFS_RMAP_NON_INODE_OWNER(rmap->me_owner) && + !xfs_verify_ino(mp, rmap->me_owner)) + return false; + + if (!xfs_verify_fileext(mp, rmap->me_startoff, rmap->me_len)) + return false; + + return xfs_verify_fsbext(mp, rmap->me_startblock, rmap->me_len); +} + /* * Process an rmap update intent item that was recovered from the log. * We need to update the rmapbt. @@ -475,10 +511,8 @@ xfs_rui_item_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_mountp; - xfs_fsblock_t startblock_fsb; enum xfs_rmap_intent_type type; xfs_exntst_t state; - bool op_ok; int i; int whichfork; int error = 0; @@ -489,30 +523,13 @@ xfs_rui_item_recover( * just toss the RUI. */ for (i = 0; i < ruip->rui_format.rui_nextents; i++) { - rmap = &ruip->rui_format.rui_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, rmap->me_startblock)); - switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { - case XFS_RMAP_EXTENT_MAP: - case XFS_RMAP_EXTENT_MAP_SHARED: - case XFS_RMAP_EXTENT_UNMAP: - case XFS_RMAP_EXTENT_UNMAP_SHARED: - case XFS_RMAP_EXTENT_CONVERT: - case XFS_RMAP_EXTENT_CONVERT_SHARED: - case XFS_RMAP_EXTENT_ALLOC: - case XFS_RMAP_EXTENT_FREE: - op_ok = true; - break; - default: - op_ok = false; - break; - } - if (!op_ok || startblock_fsb == 0 || - rmap->me_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - rmap->me_len >= mp->m_sb.sb_agblocks || - (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) + if (!xfs_rui_validate_map(mp, + &ruip->rui_format.rui_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &ruip->rui_format, + sizeof(ruip->rui_format)); return -EFSCORRUPTED; + } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ede1baf31413..b4999fb01ff7 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -32,7 +32,7 @@ xfs_rtget_summary( xfs_trans_t *tp, /* transaction pointer */ int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { @@ -50,7 +50,7 @@ xfs_rtany_summary( int low, /* low log2 extent size */ int high, /* high log2 extent size */ xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ int *stat) /* out: any good extents here? */ { @@ -104,7 +104,7 @@ xfs_rtcopy_summary( xfs_trans_t *tp) /* transaction pointer */ { xfs_rtblock_t bbno; /* bitmap block number */ - xfs_buf_t *bp; /* summary buffer */ + struct xfs_buf *bp; /* summary buffer */ int error; /* error return value */ int log; /* summary level number (log length) */ xfs_suminfo_t sum; /* summary data */ @@ -144,7 +144,7 @@ xfs_rtallocate_range( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* start block to allocate */ xfs_extlen_t len, /* length to allocate */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { xfs_rtblock_t end; /* end of the allocated extent */ @@ -226,7 +226,7 @@ xfs_rtallocate_extent_block( xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ xfs_rtblock_t *nextp, /* out: next block to try */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -345,7 +345,7 @@ xfs_rtallocate_extent_exact( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -424,7 +424,7 @@ xfs_rtallocate_extent_near( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -626,7 +626,7 @@ xfs_rtallocate_extent_size( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -900,7 +900,7 @@ xfs_growfs_rt( xfs_growfs_rt_t *in) /* growfs rt input struct */ { xfs_rtblock_t bmbno; /* bitmap block number */ - xfs_buf_t *bp; /* temporary buffer */ + struct xfs_buf *bp; /* temporary buffer */ int error; /* error return value */ xfs_mount_t *nmp; /* new (fake) mount structure */ xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ @@ -1151,7 +1151,7 @@ xfs_rtallocate_extent( int error; /* error value */ xfs_rtblock_t r; /* result allocated block */ xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp; /* summary file block buffer */ + struct xfs_buf *sumbp; /* summary file block buffer */ ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); ASSERT(minlen > 0 && minlen <= maxlen); diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 93e77b221355..ed885620589c 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -115,10 +115,10 @@ int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, int val); int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, int log, xfs_rtblock_t bbno, int delta, - xfs_buf_t **rbpp, xfs_fsblock_t *rsb, + struct xfs_buf **rbpp, xfs_fsblock_t *rsb, xfs_suminfo_t *sum); int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, - xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, + xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, xfs_fsblock_t *rsb); int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e3e229e52512..813be879a5e5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -199,10 +199,12 @@ xfs_fs_show_options( seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) - seq_puts(m, ",usrquota"); - else if (mp->m_qflags & XFS_UQUOTA_ACCT) - seq_puts(m, ",uqnoenforce"); + if (mp->m_qflags & XFS_UQUOTA_ACCT) { + if (mp->m_qflags & XFS_UQUOTA_ENFD) + seq_puts(m, ",usrquota"); + else + seq_puts(m, ",uqnoenforce"); + } if (mp->m_qflags & XFS_PQUOTA_ACCT) { if (mp->m_qflags & XFS_PQUOTA_ENFD) @@ -1159,7 +1161,7 @@ suffix_kstrtoint( * NOTE: mp->m_super is NULL here! */ static int -xfs_fc_parse_param( +xfs_fs_parse_param( struct fs_context *fc, struct fs_parameter *param) { @@ -1317,7 +1319,7 @@ xfs_fc_parse_param( } static int -xfs_fc_validate_params( +xfs_fs_validate_params( struct xfs_mount *mp) { /* @@ -1386,7 +1388,7 @@ xfs_fc_validate_params( } static int -xfs_fc_fill_super( +xfs_fs_fill_super( struct super_block *sb, struct fs_context *fc) { @@ -1396,7 +1398,7 @@ xfs_fc_fill_super( mp->m_super = sb; - error = xfs_fc_validate_params(mp); + error = xfs_fs_validate_params(mp); if (error) goto out_free_names; @@ -1467,6 +1469,45 @@ xfs_fc_fill_super( #endif } + /* Filesystem claims it needs repair, so refuse the mount. */ + if (xfs_sb_version_needsrepair(&mp->m_sb)) { + xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); + error = -EFSCORRUPTED; + goto out_free_sb; + } + + /* + * Don't touch the filesystem if a user tool thinks it owns the primary + * superblock. mkfs doesn't clear the flag from secondary supers, so + * we don't check them at all. + */ + if (mp->m_sb.sb_inprogress) { + xfs_warn(mp, "Offline file system operation in progress!"); + error = -EFSCORRUPTED; + goto out_free_sb; + } + + /* + * Until this is fixed only page-sized or smaller data blocks work. + */ + if (mp->m_sb.sb_blocksize > PAGE_SIZE) { + xfs_warn(mp, + "File system with blocksize %d bytes. " + "Only pagesize (%ld) or less will currently work.", + mp->m_sb.sb_blocksize, PAGE_SIZE); + error = -ENOSYS; + goto out_free_sb; + } + + /* Ensure this filesystem fits in the page cache limits */ + if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || + xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { + xfs_warn(mp, + "file system too large to be mounted on this system."); + error = -EFBIG; + goto out_free_sb; + } + /* * XFS block mappings use 54 bits to store the logical block offset. * This should suffice to handle the maximum file size that the VFS @@ -1478,7 +1519,7 @@ xfs_fc_fill_super( * Avoid integer overflow by comparing the maximum bmbt offset to the * maximum pagecache offset in units of fs blocks. */ - if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) { + if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { xfs_warn(mp, "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), @@ -1621,10 +1662,10 @@ xfs_fc_fill_super( } static int -xfs_fc_get_tree( +xfs_fs_get_tree( struct fs_context *fc) { - return get_tree_bdev(fc, xfs_fc_fill_super); + return get_tree_bdev(fc, xfs_fs_fill_super); } static int @@ -1743,7 +1784,7 @@ xfs_remount_ro( * silently ignore all options that we can't actually change. */ static int -xfs_fc_reconfigure( +xfs_fs_reconfigure( struct fs_context *fc) { struct xfs_mount *mp = XFS_M(fc->root->d_sb); @@ -1756,7 +1797,7 @@ xfs_fc_reconfigure( if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) fc->sb_flags |= SB_I_VERSION; - error = xfs_fc_validate_params(new_mp); + error = xfs_fs_validate_params(new_mp); if (error) return error; @@ -1793,7 +1834,7 @@ xfs_fc_reconfigure( return 0; } -static void xfs_fc_free( +static void xfs_fs_free( struct fs_context *fc) { struct xfs_mount *mp = fc->s_fs_info; @@ -1809,10 +1850,10 @@ static void xfs_fc_free( } static const struct fs_context_operations xfs_context_ops = { - .parse_param = xfs_fc_parse_param, - .get_tree = xfs_fc_get_tree, - .reconfigure = xfs_fc_reconfigure, - .free = xfs_fc_free, + .parse_param = xfs_fs_parse_param, + .get_tree = xfs_fs_get_tree, + .reconfigure = xfs_fs_reconfigure, + .free = xfs_fs_free, }; static int xfs_init_fs_context( diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 8e88a7ca387e..1f43fd7f3209 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -154,7 +154,7 @@ xfs_symlink( const char *cur_chunk; int byte_cnt; int n; - xfs_buf_t *bp; + struct xfs_buf *bp; prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; @@ -365,7 +365,7 @@ STATIC int xfs_inactive_symlink_rmt( struct xfs_inode *ip) { - xfs_buf_t *bp; + struct xfs_buf *bp; int done; int error; int i; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 86951652d3ed..5a263ae3d4f0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -103,6 +103,24 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list); DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list); +TRACE_EVENT(xlog_intent_recovery_failed, + TP_PROTO(struct xfs_mount *mp, int error, void *function), + TP_ARGS(mp, error, function), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(void *, function) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->error = error; + __entry->function = function; + ), + TP_printk("dev %d:%d error %d function %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->error, __entry->function) +); + DECLARE_EVENT_CLASS(xfs_perag_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, unsigned long caller_ip), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c94e71f741b6..e72730f85af1 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -465,7 +465,7 @@ xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { xfs_dsb_t *sbp; - xfs_buf_t *bp; + struct xfs_buf *bp; int whole = 0; bp = xfs_trans_getsb(tp); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 42d63b830cb9..9aced0a00003 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -121,7 +121,7 @@ xfs_trans_get_buf_map( xfs_buf_flags_t flags, struct xfs_buf **bpp) { - xfs_buf_t *bp; + struct xfs_buf *bp; struct xfs_buf_log_item *bip; int error; @@ -401,7 +401,7 @@ xfs_trans_brelse( void xfs_trans_bhold( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -422,7 +422,7 @@ xfs_trans_bhold( void xfs_trans_bhold_release( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -538,7 +538,7 @@ xfs_trans_log_buf( void xfs_trans_binval( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; int i; @@ -593,7 +593,7 @@ xfs_trans_binval( void xfs_trans_inode_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -618,7 +618,7 @@ xfs_trans_inode_buf( void xfs_trans_stale_inode_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -643,7 +643,7 @@ xfs_trans_stale_inode_buf( void xfs_trans_inode_alloc_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -737,7 +737,7 @@ xfs_trans_buf_copy_type( void xfs_trans_dquot_buf( xfs_trans_t *tp, - xfs_buf_t *bp, + struct xfs_buf *bp, uint type) { struct xfs_buf_log_item *bip = bp->b_log_item; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index fe45b0c3970c..28b8ac701919 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -84,13 +84,6 @@ xfs_trans_dup_dqinfo( xfs_trans_alloc_dqinfo(ntp); - /* - * Because the quota blk reservation is carried forward, - * it is also necessary to carry forward the DQ_DIRTY flag. - */ - if (otp->t_flags & XFS_TRANS_DQ_DIRTY) - ntp->t_flags |= XFS_TRANS_DQ_DIRTY; - for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { oqa = otp->t_dqinfo->dqs[j]; nqa = ntp->t_dqinfo->dqs[j]; @@ -143,9 +136,6 @@ xfs_trans_mod_dquot_byino( xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return; - if (tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) @@ -204,6 +194,9 @@ xfs_trans_mod_dquot( ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); qtrx = NULL; + if (!delta) + return; + if (tp->t_dqinfo == NULL) xfs_trans_alloc_dqinfo(tp); /* @@ -215,10 +208,8 @@ xfs_trans_mod_dquot( if (qtrx->qt_dquot == NULL) qtrx->qt_dquot = dqp; - if (delta) { - trace_xfs_trans_mod_dquot_before(qtrx); - trace_xfs_trans_mod_dquot(tp, dqp, field, delta); - } + trace_xfs_trans_mod_dquot_before(qtrx); + trace_xfs_trans_mod_dquot(tp, dqp, field, delta); switch (field) { /* regular disk blk reservation */ @@ -271,10 +262,7 @@ xfs_trans_mod_dquot( ASSERT(0); } - if (delta) - trace_xfs_trans_mod_dquot_after(qtrx); - - tp->t_flags |= XFS_TRANS_DQ_DIRTY; + trace_xfs_trans_mod_dquot_after(qtrx); } @@ -351,7 +339,7 @@ xfs_trans_apply_dquot_deltas( int64_t totalbdelta; int64_t totalrtbdelta; - if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!tp->t_dqinfo) return; ASSERT(tp->t_dqinfo); @@ -493,7 +481,7 @@ xfs_trans_unreserve_and_mod_dquots( struct xfs_dqtrx *qtrx, *qa; bool locked; - if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!tp->t_dqinfo) return; for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { @@ -698,16 +686,10 @@ xfs_trans_dqresv( * because we don't have the luxury of a transaction envelope then. */ if (tp) { - ASSERT(tp->t_dqinfo); ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - if (nblks != 0) - xfs_trans_mod_dquot(tp, dqp, - flags & XFS_QMOPT_RESBLK_MASK, - nblks); - if (ninos != 0) - xfs_trans_mod_dquot(tp, dqp, - XFS_TRANS_DQ_RES_INOS, - ninos); + xfs_trans_mod_dquot(tp, dqp, flags & XFS_QMOPT_RESBLK_MASK, + nblks); + xfs_trans_mod_dquot(tp, dqp, XFS_TRANS_DQ_RES_INOS, ninos); } ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count); ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count); @@ -752,9 +734,6 @@ xfs_trans_reserve_quota_bydquots( if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; - if (tp && tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - ASSERT(flags & XFS_QMOPT_RESBLK_MASK); if (udqp) { |