diff options
Diffstat (limited to 'fs')
98 files changed, 3505 insertions, 2939 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 23cf9b2fbfe4..805151114e96 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -11,7 +11,6 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/sched.h> -#include <linux/idr.h> #include <net/9p/9p.h> #include <net/9p/client.h> diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 0129de2ea31a..3a9c4517265f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -14,7 +14,6 @@ #include <linux/sched.h> #include <linux/cred.h> #include <linux/parser.h> -#include <linux/idr.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index a19891015f19..97599edbc300 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -14,7 +14,6 @@ #include <linux/string.h> #include <linux/inet.h> #include <linux/pagemap.h> -#include <linux/idr.h> #include <linux/sched.h> #include <linux/swap.h> #include <linux/uio.h> diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f89f01734587..65fa2df5e49b 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -15,7 +15,6 @@ #include <linux/string.h> #include <linux/inet.h> #include <linux/namei.h> -#include <linux/idr.h> #include <linux/sched.h> #include <linux/slab.h> #include <net/9p/9p.h> diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 3bb95adc9619..59b0e8948f78 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -14,7 +14,6 @@ #include <linux/string.h> #include <linux/sched.h> #include <linux/inet.h> -#include <linux/idr.h> #include <linux/slab.h> #include <linux/uio.h> #include <linux/fscache.h> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index aec43ba83799..b740017634ef 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -18,7 +18,6 @@ #include <linux/pagemap.h> #include <linux/utsname.h> #include <linux/uaccess.h> -#include <linux/idr.h> #include <linux/uio.h> #include <linux/slab.h> #include <net/9p/9p.h> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4d1a4a8d9277..27a04a226d97 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -17,7 +17,6 @@ #include <linux/string.h> #include <linux/inet.h> #include <linux/namei.h> -#include <linux/idr.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/xattr.h> diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 03c1743c4aff..f806b3f11649 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -15,7 +15,6 @@ #include <linux/string.h> #include <linux/inet.h> #include <linux/namei.h> -#include <linux/idr.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/xattr.h> diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 2d9ee073d12c..266c4693e20c 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -15,7 +15,6 @@ #include <linux/inet.h> #include <linux/pagemap.h> #include <linux/mount.h> -#include <linux/idr.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/statfs.h> diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 104df2964225..b7c1f8c84b38 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -77,6 +77,7 @@ const struct address_space_operations afs_dir_aops = { .dirty_folio = afs_dir_dirty_folio, .release_folio = afs_dir_release_folio, .invalidate_folio = afs_dir_invalidate_folio, + .migrate_folio = filemap_migrate_folio, }; const struct dentry_operations afs_fs_dentry_operations = { diff --git a/fs/afs/file.c b/fs/afs/file.c index 2eeab57df133..68d6d5dc608d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -58,14 +58,15 @@ const struct address_space_operations afs_file_aops = { .invalidate_folio = afs_invalidate_folio, .write_begin = afs_write_begin, .write_end = afs_write_end, - .writepage = afs_writepage, .writepages = afs_writepages, + .migrate_folio = filemap_migrate_folio, }; const struct address_space_operations afs_symlink_aops = { .read_folio = afs_symlink_read_folio, .release_folio = afs_release_folio, .invalidate_folio = afs_invalidate_folio, + .migrate_folio = filemap_migrate_folio, }; static const struct vm_operations_struct afs_vm_ops = { diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 3ac5fcf98d0d..daaf3810cc92 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -366,12 +366,15 @@ void afs_fs_probe_dispatcher(struct work_struct *work) unsigned long nowj, timer_at, poll_at; bool first_pass = true, set_timer = false; - if (!net->live) + if (!net->live) { + afs_dec_servers_outstanding(net); return; + } _enter(""); if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { + afs_dec_servers_outstanding(net); _leave(" [none]"); return; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9ba7b68375c9..fd8567b98e2b 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -972,13 +972,6 @@ extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); /* - * cache.c - */ -#ifdef CONFIG_AFS_FSCACHE -extern struct fscache_netfs afs_cache_netfs; -#endif - -/* * callback.c */ extern void afs_invalidate_mmap_work(struct work_struct *); @@ -1391,7 +1384,6 @@ extern void afs_put_permits(struct afs_permits *); extern void afs_clear_permits(struct afs_vnode *); extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int, struct afs_status_cb *); -extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); extern struct key *afs_request_key_rcu(struct afs_cell *); extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *); diff --git a/fs/afs/volume.c b/fs/afs/volume.c index f4937029dcd7..29d483c80281 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -70,11 +70,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, { struct afs_server_list *slist; struct afs_volume *volume; - int ret = -ENOMEM, nr_servers = 0, i; - - for (i = 0; i < vldb->nr_servers; i++) - if (vldb->fs_mask[i] & type_mask) - nr_servers++; + int ret = -ENOMEM; volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); if (!volume) diff --git a/fs/afs/write.c b/fs/afs/write.c index 08fd456dde67..19df10d63323 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -14,6 +14,11 @@ #include <linux/netfs.h> #include "internal.h" +static int afs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + loff_t start, loff_t end, loff_t *_next, + bool max_one_loop); + static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len, loff_t i_size, bool caching); @@ -39,6 +44,25 @@ static void afs_folio_start_fscache(bool caching, struct folio *folio) #endif /* + * Flush out a conflicting write. This may extend the write to the surrounding + * pages if also dirty and contiguous to the conflicting region.. + */ +static int afs_flush_conflicting_write(struct address_space *mapping, + struct folio *folio) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = folio_pos(folio), + .range_end = LLONG_MAX, + }; + loff_t next; + + return afs_writepages_region(mapping, &wbc, folio_pos(folio), LLONG_MAX, + &next, true); +} + +/* * prepare to perform part of a write to a page */ int afs_write_begin(struct file *file, struct address_space *mapping, @@ -80,7 +104,8 @@ try_again: if (folio_test_writeback(folio)) { trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio); - goto flush_conflicting_write; + folio_unlock(folio); + goto wait_for_writeback; } /* If the file is being filled locally, allow inter-write * spaces to be merged into writes. If it's not, only write @@ -99,8 +124,15 @@ try_again: * flush the page out. */ flush_conflicting_write: - _debug("flush conflict"); - ret = folio_write_one(folio); + trace_afs_folio_dirty(vnode, tracepoint_string("confl"), folio); + folio_unlock(folio); + + ret = afs_flush_conflicting_write(mapping, folio); + if (ret < 0) + goto error; + +wait_for_writeback: + ret = folio_wait_writeback_killable(folio); if (ret < 0) goto error; @@ -664,39 +696,12 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, } /* - * write a page back to the server - * - the caller locked the page for us - */ -int afs_writepage(struct page *subpage, struct writeback_control *wbc) -{ - struct folio *folio = page_folio(subpage); - ssize_t ret; - loff_t start; - - _enter("{%lx},", folio_index(folio)); - -#ifdef CONFIG_AFS_FSCACHE - folio_wait_fscache(folio); -#endif - - start = folio_index(folio) * PAGE_SIZE; - ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc, - folio, start, LLONG_MAX - start); - if (ret < 0) { - _leave(" = %zd", ret); - return ret; - } - - _leave(" = 0"); - return 0; -} - -/* * write a region of pages back to the server */ static int afs_writepages_region(struct address_space *mapping, struct writeback_control *wbc, - loff_t start, loff_t end, loff_t *_next) + loff_t start, loff_t end, loff_t *_next, + bool max_one_loop) { struct folio *folio; struct page *head_page; @@ -775,6 +780,9 @@ static int afs_writepages_region(struct address_space *mapping, start += ret; + if (max_one_loop) + break; + cond_resched(); } while (wbc->nr_to_write > 0); @@ -806,24 +814,27 @@ int afs_writepages(struct address_space *mapping, if (wbc->range_cyclic) { start = mapping->writeback_index * PAGE_SIZE; - ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); + ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, + &next, false); if (ret == 0) { mapping->writeback_index = next / PAGE_SIZE; if (start > 0 && wbc->nr_to_write > 0) { ret = afs_writepages_region(mapping, wbc, 0, - start, &next); + start, &next, false); if (ret == 0) mapping->writeback_index = next / PAGE_SIZE; } } } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { - ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); + ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, + &next, false); if (wbc->nr_to_write > 0 && ret == 0) mapping->writeback_index = next / PAGE_SIZE; } else { ret = afs_writepages_region(mapping, wbc, - wbc->range_start, wbc->range_end, &next); + wbc->range_start, wbc->range_end, + &next, false); } up_read(&vnode->validate_lock); diff --git a/fs/char_dev.c b/fs/char_dev.c index ba0ded7842a7..13deb45f1ec6 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -483,17 +483,24 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count) p->dev = dev; p->count = count; - if (WARN_ON(dev == WHITEOUT_DEV)) - return -EBUSY; + if (WARN_ON(dev == WHITEOUT_DEV)) { + error = -EBUSY; + goto err; + } error = kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p); if (error) - return error; + goto err; kobject_get(p->kobj.parent); return 0; + +err: + kfree_const(p->kobj.name); + p->kobj.name = NULL; + return error; } /** @@ -547,7 +554,7 @@ int cdev_device_add(struct cdev *cdev, struct device *dev) } rc = device_add(dev); - if (rc) + if (rc && dev->devt) cdev_del(cdev); return rc; diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 7c9785973f49..304a7f6cc13a 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -21,7 +21,7 @@ cifs-$(CONFIG_CIFS_XATTR) += xattr.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o -cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o +cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o dfs.o cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 90850da390ae..56b23def4c95 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -372,6 +372,14 @@ skip_rdma: seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d", atomic_read(&server->in_send), atomic_read(&server->num_waiters)); + if (IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)) { + if (server->origin_fullpath) + seq_printf(m, "\nDFS origin full path: %s", + server->origin_fullpath); + if (server->leaf_fullpath) + seq_printf(m, "\nDFS leaf full path: %s", + server->leaf_fullpath); + } seq_printf(m, "\n\n\tSessions: "); i = 0; diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index b0864da9ef43..2b1a8d55b4ec 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -21,8 +21,7 @@ #include "cifsfs.h" #include "dns_resolve.h" #include "cifs_debug.h" -#include "cifs_unicode.h" -#include "dfs_cache.h" +#include "dfs.h" #include "fs_context.h" static LIST_HEAD(cifs_dfs_automount_list); @@ -60,7 +59,7 @@ void cifs_dfs_release_automount_timer(void) * Returns pointer to the built string, or a ERR_PTR. Caller is responsible * for freeing the returned string. */ -static char * +char * cifs_build_devname(char *nodename, const char *prepath) { size_t pplen; @@ -119,200 +118,34 @@ cifs_build_devname(char *nodename, const char *prepath) return dev; } - -/** - * cifs_compose_mount_options - creates mount options for referral - * @sb_mountdata: parent/root DFS mount options (template) - * @fullpath: full path in UNC format - * @ref: optional server's referral - * @devname: return the built cifs device name if passed pointer not NULL - * creates mount options for submount based on template options sb_mountdata - * and replacing unc,ip,prefixpath options with ones we've got form ref_unc. - * - * Returns: pointer to new mount options or ERR_PTR. - * Caller is responsible for freeing returned value if it is not error. - */ -char *cifs_compose_mount_options(const char *sb_mountdata, - const char *fullpath, - const struct dfs_info3_param *ref, - char **devname) +static int set_dest_addr(struct smb3_fs_context *ctx, const char *full_path) { + struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; int rc; - char *name; - char *mountdata = NULL; - const char *prepath = NULL; - int md_len; - char *tkn_e; - char *srvIP = NULL; - char sep = ','; - int off, noff; - - if (sb_mountdata == NULL) - return ERR_PTR(-EINVAL); - - if (ref) { - if (WARN_ON_ONCE(!ref->node_name || ref->path_consumed < 0)) - return ERR_PTR(-EINVAL); - - if (strlen(fullpath) - ref->path_consumed) { - prepath = fullpath + ref->path_consumed; - /* skip initial delimiter */ - if (*prepath == '/' || *prepath == '\\') - prepath++; - } - - name = cifs_build_devname(ref->node_name, prepath); - if (IS_ERR(name)) { - rc = PTR_ERR(name); - name = NULL; - goto compose_mount_options_err; - } - } else { - name = cifs_build_devname((char *)fullpath, NULL); - if (IS_ERR(name)) { - rc = PTR_ERR(name); - name = NULL; - goto compose_mount_options_err; - } - } - - rc = dns_resolve_server_name_to_ip(name, &srvIP, NULL); - if (rc < 0) { - cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n", - __func__, name, rc); - goto compose_mount_options_err; - } - - /* - * In most cases, we'll be building a shorter string than the original, - * but we do have to assume that the address in the ip= option may be - * much longer than the original. Add the max length of an address - * string to the length of the original string to allow for worst case. - */ - md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN; - mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL); - if (mountdata == NULL) { - rc = -ENOMEM; - goto compose_mount_options_err; - } - - /* copy all options except of unc,ip,prefixpath */ - off = 0; - if (strncmp(sb_mountdata, "sep=", 4) == 0) { - sep = sb_mountdata[4]; - strncpy(mountdata, sb_mountdata, 5); - off += 5; - } - - do { - tkn_e = strchr(sb_mountdata + off, sep); - if (tkn_e == NULL) - noff = strlen(sb_mountdata + off); - else - noff = tkn_e - (sb_mountdata + off) + 1; - - if (strncasecmp(sb_mountdata + off, "cruid=", 6) == 0) { - off += noff; - continue; - } - if (strncasecmp(sb_mountdata + off, "unc=", 4) == 0) { - off += noff; - continue; - } - if (strncasecmp(sb_mountdata + off, "ip=", 3) == 0) { - off += noff; - continue; - } - if (strncasecmp(sb_mountdata + off, "prefixpath=", 11) == 0) { - off += noff; - continue; - } - strncat(mountdata, sb_mountdata + off, noff); - off += noff; - } while (tkn_e); - strcat(mountdata, sb_mountdata + off); - mountdata[md_len] = '\0'; - - /* copy new IP and ref share name */ - if (mountdata[strlen(mountdata) - 1] != sep) - strncat(mountdata, &sep, 1); - strcat(mountdata, "ip="); - strcat(mountdata, srvIP); - - if (devname) - *devname = name; - else - kfree(name); - - /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ - /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ -compose_mount_options_out: - kfree(srvIP); - return mountdata; - -compose_mount_options_err: - kfree(mountdata); - mountdata = ERR_PTR(rc); - kfree(name); - goto compose_mount_options_out; -} - -/** - * cifs_dfs_do_mount - mounts specified path using DFS full path - * - * Always pass down @fullpath to smb3_do_mount() so we can use the root server - * to perform failover in case we failed to connect to the first target in the - * referral. - * - * @mntpt: directory entry for the path we are trying to automount - * @cifs_sb: parent/root superblock - * @fullpath: full path in UNC format - */ -static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt, - struct cifs_sb_info *cifs_sb, - const char *fullpath) -{ - struct vfsmount *mnt; - char *mountdata; - char *devname; - - devname = kstrdup(fullpath, GFP_KERNEL); - if (!devname) - return ERR_PTR(-ENOMEM); - - convert_delimiter(devname, '/'); - - /* TODO: change to call fs_context_for_mount(), fill in context directly, call fc_mount */ - - /* See afs_mntpt_do_automount in fs/afs/mntpt.c for an example */ - - /* strip first '\' from fullpath */ - mountdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, - fullpath + 1, NULL, NULL); - if (IS_ERR(mountdata)) { - kfree(devname); - return (struct vfsmount *)mountdata; - } - - mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata); - kfree(mountdata); - kfree(devname); - return mnt; + rc = dns_resolve_server_name_to_ip(full_path, addr, NULL); + if (!rc) + cifs_set_port(addr, ctx->port); + return rc; } /* * Create a vfsmount that we can automount */ -static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) +static struct vfsmount *cifs_dfs_do_automount(struct path *path) { + int rc; + struct dentry *mntpt = path->dentry; + struct fs_context *fc; struct cifs_sb_info *cifs_sb; - void *page; + void *page = NULL; + struct smb3_fs_context *ctx, *cur_ctx; + struct smb3_fs_context tmp; char *full_path; struct vfsmount *mnt; - cifs_dbg(FYI, "in %s\n", __func__); - BUG_ON(IS_ROOT(mntpt)); + if (IS_ROOT(mntpt)) + return ERR_PTR(-ESTALE); /* * The MSDFS spec states that paths in DFS referral requests and @@ -321,29 +154,53 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) * gives us the latter, so we must adjust the result. */ cifs_sb = CIFS_SB(mntpt->d_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) { - mnt = ERR_PTR(-EREMOTE); - goto cdda_exit; - } + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) + return ERR_PTR(-EREMOTE); + + cur_ctx = cifs_sb->ctx; + + fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, mntpt); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ctx = smb3_fc2context(fc); page = alloc_dentry_path(); - /* always use tree name prefix */ - full_path = build_path_from_dentry_optional_prefix(mntpt, page, true); + full_path = dfs_get_automount_devname(mntpt, page); if (IS_ERR(full_path)) { mnt = ERR_CAST(full_path); - goto free_full_path; + goto out; } - convert_delimiter(full_path, '\\'); + convert_delimiter(full_path, '/'); cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); - mnt = cifs_dfs_do_mount(mntpt, cifs_sb, full_path); - cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__, full_path + 1, mnt); + tmp = *cur_ctx; + tmp.source = full_path; + tmp.leaf_fullpath = NULL; + tmp.UNC = tmp.prepath = NULL; + + rc = smb3_fs_context_dup(ctx, &tmp); + if (rc) { + mnt = ERR_PTR(rc); + goto out; + } + + rc = set_dest_addr(ctx, full_path); + if (rc) { + mnt = ERR_PTR(rc); + goto out; + } + + rc = smb3_parse_devname(full_path, ctx); + if (!rc) + mnt = fc_mount(fc); + else + mnt = ERR_PTR(rc); -free_full_path: +out: + put_fs_context(fc); free_dentry_path(page); -cdda_exit: - cifs_dbg(FYI, "leaving %s\n" , __func__); return mnt; } @@ -354,9 +211,9 @@ struct vfsmount *cifs_dfs_d_automount(struct path *path) { struct vfsmount *newmnt; - cifs_dbg(FYI, "in %s\n", __func__); + cifs_dbg(FYI, "%s: %pd\n", __func__, path->dentry); - newmnt = cifs_dfs_do_automount(path->dentry); + newmnt = cifs_dfs_do_automount(path); if (IS_ERR(newmnt)) { cifs_dbg(FYI, "leaving %s [automount failed]\n" , __func__); return newmnt; diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index d86d78d5bfdc..332588e77c31 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -108,7 +108,7 @@ struct smb3_notify_info { #define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info) #define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info) -#define CIFS_IOC_SHUTDOWN _IOR ('X', 125, __u32) +#define CIFS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* * Flags for going down operation diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 040267ed8a64..10e00c624922 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -678,9 +678,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",echo_interval=%lu", tcon->ses->server->echo_interval / HZ); - /* Only display max_credits if it was overridden on mount */ + /* Only display the following if overridden on mount */ if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE) seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits); + if (tcon->ses->server->tcp_nodelay) + seq_puts(s, ",tcpnodelay"); + if (tcon->ses->server->noautotune) + seq_puts(s, ",noautotune"); + if (tcon->ses->server->noblocksnd) + seq_puts(s, ",noblocksend"); if (tcon->snapshot_time) seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); @@ -890,12 +896,6 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, goto out; } - rc = cifs_setup_volume_info(cifs_sb->ctx, NULL, NULL); - if (rc) { - root = ERR_PTR(rc); - goto out; - } - rc = cifs_setup_cifs_sb(cifs_sb); if (rc) { root = ERR_PTR(rc); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 388b745a978e..63a0ac2b9355 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -105,8 +105,8 @@ extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, loff_t, loff_t, int); extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); extern int cifs_flush(struct file *, fl_owner_t id); -extern int cifs_file_mmap(struct file * , struct vm_area_struct *); -extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); +extern int cifs_file_mmap(struct file *file, struct vm_area_struct *vma); +extern int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); extern int cifs_readdir(struct file *file, struct dir_context *ctx); @@ -153,6 +153,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 40 -#define CIFS_VERSION "2.40" +#define SMB3_PRODUCT_BUILD 41 +#define CIFS_VERSION "2.41" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1420acf987f0..cfdd5bf701a1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -13,6 +13,8 @@ #include <linux/in6.h> #include <linux/inet.h> #include <linux/slab.h> +#include <linux/scatterlist.h> +#include <linux/mm.h> #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/utsname.h> @@ -21,7 +23,6 @@ #include "cifs_fs_sb.h" #include "cifsacl.h" #include <crypto/internal/hash.h> -#include <linux/scatterlist.h> #include <uapi/linux/cifs/cifs_mount.h> #include "../smbfs_common/smb2pdu.h" #include "smb2pdu.h" @@ -106,6 +107,8 @@ #define CIFS_MAX_WORKSTATION_LEN (__NEW_UTS_LEN + 1) /* reasonable max for client */ +#define CIFS_DFS_ROOT_SES(ses) ((ses)->dfs_root_ses ?: (ses)) + /* * CIFS vfs client Status information (based on what we know.) */ @@ -737,8 +740,6 @@ struct TCP_Server_Info { bool use_swn_dstaddr; struct sockaddr_storage swn_dstaddr; #endif -#ifdef CONFIG_CIFS_DFS_UPCALL - bool is_dfs_conn; /* if a dfs connection */ struct mutex refpath_lock; /* protects leaf_fullpath */ /* * Canonical DFS full paths that were used to chase referrals in mount and reconnect. @@ -752,7 +753,6 @@ struct TCP_Server_Info { * format: \\HOST\SHARE\[OPTIONAL PATH] */ char *origin_fullpath, *leaf_fullpath, *current_fullpath; -#endif }; static inline bool is_smb1(struct TCP_Server_Info *server) @@ -785,6 +785,7 @@ static inline unsigned int in_flight(struct TCP_Server_Info *server) { unsigned int num; + spin_lock(&server->req_lock); num = server->in_flight; spin_unlock(&server->req_lock); @@ -795,6 +796,7 @@ static inline bool has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) { int num; + spin_lock(&server->req_lock); num = *credits; spin_unlock(&server->req_lock); @@ -1025,7 +1027,7 @@ struct cifs_ses { struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ enum ses_status_enum ses_status; /* updates protected by cifs_tcp_ses_lock */ - unsigned overrideSecFlg; /* if non-zero override global sec flags */ + unsigned int overrideSecFlg; /* if non-zero override global sec flags */ char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ @@ -1099,6 +1101,7 @@ struct cifs_ses { */ unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ + struct cifs_ses *dfs_root_ses; }; static inline bool @@ -1381,7 +1384,7 @@ struct cifsFileInfo { __u32 pid; /* process id who opened file */ struct cifs_fid fid; /* file id from remote */ struct list_head rlist; /* reconnect list */ - /* BB add lock scope info here if needed */ ; + /* BB add lock scope info here if needed */ /* lock scope id (0 if none) */ struct dentry *dentry; struct tcon_link *tlink; @@ -1757,6 +1760,18 @@ struct file_list { struct cifsFileInfo *cfile; }; +struct cifs_mount_ctx { + struct cifs_sb_info *cifs_sb; + struct smb3_fs_context *fs_ctx; + unsigned int xid; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct cifs_ses *root_ses; + uuid_t mount_id; + char *origin_fullpath, *leaf_fullpath; +}; + static inline void free_dfs_info_param(struct dfs_info3_param *param) { if (param) { @@ -1769,6 +1784,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, int number_of_items) { int i; + if ((number_of_items == 0) || (param == NULL)) return; for (i = 0; i < number_of_items; i++) { @@ -2137,4 +2153,70 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const dst->FileNameLength = src->FileNameLength; } +static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst, + int num_rqst, + const u8 *sig) +{ + unsigned int len, skip; + unsigned int nents = 0; + unsigned long addr; + int i, j; + + /* Assumes the first rqst has a transform header as the first iov. + * I.e. + * rqst[0].rq_iov[0] is transform header + * rqst[0].rq_iov[1+] data to be encrypted/decrypted + * rqst[1+].rq_iov[0+] data to be encrypted/decrypted + */ + for (i = 0; i < num_rqst; i++) { + /* + * The first rqst has a transform header where the + * first 20 bytes are not part of the encrypted blob. + */ + for (j = 0; j < rqst[i].rq_nvec; j++) { + struct kvec *iov = &rqst[i].rq_iov[j]; + + skip = (i == 0) && (j == 0) ? 20 : 0; + addr = (unsigned long)iov->iov_base + skip; + if (unlikely(is_vmalloc_addr((void *)addr))) { + len = iov->iov_len - skip; + nents += DIV_ROUND_UP(offset_in_page(addr) + len, + PAGE_SIZE); + } else { + nents++; + } + } + nents += rqst[i].rq_npages; + } + nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); + return nents; +} + +/* We can not use the normal sg_set_buf() as we will sometimes pass a + * stack object as buf. + */ +static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, + const void *buf, + unsigned int buflen) +{ + unsigned long addr = (unsigned long)buf; + unsigned int off = offset_in_page(addr); + + addr &= PAGE_MASK; + if (unlikely(is_vmalloc_addr((void *)addr))) { + do { + unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); + + sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off); + + off = 0; + addr += PAGE_SIZE; + buflen -= len; + } while (buflen); + } else { + sg_set_page(sg++, virt_to_page(addr), buflen, off); + } + return sg; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index d1abaeea974a..623caece2b10 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1429,7 +1429,7 @@ typedef struct smb_com_transaction_change_notify_req { __u8 WatchTree; /* 1 = Monitor subdirectories */ __u8 Reserved2; __le16 ByteCount; -/* __u8 Pad[3];*/ +/* __u8 Pad[3];*/ /* __u8 Data[1];*/ } __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ; @@ -1752,8 +1752,7 @@ struct smb_com_transaction2_sfi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - - present for infolevels > 100 */ + __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */ } __attribute__((packed)); struct smb_t2_qfi_req { @@ -1768,8 +1767,7 @@ struct smb_t2_qfi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - - present for infolevels > 100 */ + __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */ } __attribute__((packed)); /* @@ -2146,13 +2144,11 @@ typedef struct { #define CIFS_UNIX_POSIX_PATH_OPS_CAP 0x00000020 /* Allow new POSIX path based calls including posix open and posix unlink */ -#define CIFS_UNIX_LARGE_READ_CAP 0x00000040 /* support reads >128K (up - to 0xFFFF00 */ +#define CIFS_UNIX_LARGE_READ_CAP 0x00000040 /* support reads >128K (up to 0xFFFF00 */ #define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080 #define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */ #define CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */ -#define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and - QFS PROXY call */ +#define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and QFS PROXY call */ #ifdef CONFIG_CIFS_POSIX /* presumably don't need the 0x20 POSIX_PATH_OPS_CAP since we never send LockingX instead of posix locking call on unix sess (and we do not expect @@ -2368,8 +2364,7 @@ typedef struct { struct file_allocation_info { __le64 AllocationSize; /* Note old Samba srvr rounds this up too much */ -} __attribute__((packed)); /* size used on disk, for level 0x103 for set, - 0x105 for query */ +} __packed; /* size used on disk, for level 0x103 for set, 0x105 for query */ struct file_end_of_file_info { __le64 FileSize; /* offset to end of file */ @@ -2409,8 +2404,7 @@ struct cifs_posix_acl { /* access conrol list (ACL) */ __le16 access_entry_count; /* access ACL - count of entries */ __le16 default_entry_count; /* default ACL - count of entries */ struct cifs_posix_ace ace_array[]; - /* followed by - struct cifs_posix_ace default_ace_arraay[] */ + /* followed by struct cifs_posix_ace default_ace_array[] */ } __attribute__((packed)); /* level 0x204 */ /* types of access control entries already defined in posix_acl.h */ @@ -2429,17 +2423,17 @@ struct cifs_posix_acl { /* access conrol list (ACL) */ /* end of POSIX ACL definitions */ /* POSIX Open Flags */ -#define SMB_O_RDONLY 0x1 -#define SMB_O_WRONLY 0x2 -#define SMB_O_RDWR 0x4 -#define SMB_O_CREAT 0x10 -#define SMB_O_EXCL 0x20 -#define SMB_O_TRUNC 0x40 -#define SMB_O_APPEND 0x80 -#define SMB_O_SYNC 0x100 -#define SMB_O_DIRECTORY 0x200 -#define SMB_O_NOFOLLOW 0x400 -#define SMB_O_DIRECT 0x800 +#define SMB_O_RDONLY 0x1 +#define SMB_O_WRONLY 0x2 +#define SMB_O_RDWR 0x4 +#define SMB_O_CREAT 0x10 +#define SMB_O_EXCL 0x20 +#define SMB_O_TRUNC 0x40 +#define SMB_O_APPEND 0x80 +#define SMB_O_SYNC 0x100 +#define SMB_O_DIRECTORY 0x200 +#define SMB_O_NOFOLLOW 0x400 +#define SMB_O_DIRECT 0x800 typedef struct { __le32 OpenFlags; /* same as NT CreateX */ @@ -2716,15 +2710,13 @@ typedef struct file_xattr_info { __u32 xattr_value_len; char xattr_name[]; /* followed by xattr_value[xattr_value_len], no pad */ -} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info - level 0x205 */ +} __packed FILE_XATTR_INFO; /* extended attribute info level 0x205 */ /* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */ typedef struct file_chattr_info { __le64 mask; /* list of all possible attribute bits */ __le64 mode; /* list of actual attribute bits on this inode */ -} __attribute__((packed)) FILE_CHATTR_INFO; /* ext attributes - (chattr, chflags) level 0x206 */ -#endif /* POSIX */ +} __packed FILE_CHATTR_INFO; /* ext attributes (chattr, chflags) level 0x206 */ +#endif /* POSIX */ #endif /* _CIFSPDU_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f50f96e4ec30..1207b39686fb 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -57,6 +57,9 @@ extern void exit_cifs_idmap(void); extern int init_cifs_spnego(void); extern void exit_cifs_spnego(void); extern const char *build_path_from_dentry(struct dentry *, void *); +char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, + const char *tree, int tree_len, + bool prefix); extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, bool prefix); static inline void *alloc_dentry_path(void) @@ -75,9 +78,7 @@ extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_tcon *tcon, int add_treename); extern char *build_wildcard_path_from_dentry(struct dentry *direntry); -extern char *cifs_compose_mount_options(const char *sb_mountdata, - const char *fullpath, const struct dfs_info3_param *ref, - char **devname); +char *cifs_build_devname(char *nodename, const char *prepath); extern void delete_mid(struct mid_q_entry *mid); extern void release_mid(struct mid_q_entry *mid); extern void cifs_wake_up_task(struct mid_q_entry *mid); @@ -124,7 +125,7 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec * /* resp vec */); extern int SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *ptcon, - struct smb_hdr *in_buf , + struct smb_hdr *in_buf, struct smb_hdr *out_buf, int *bytes_returned); void @@ -244,6 +245,10 @@ extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, unsigned int page_offset, unsigned int to_read); extern int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb); +void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx); +int cifs_mount_get_session(struct cifs_mount_ctx *mnt_ctx); +int cifs_is_path_remote(struct cifs_mount_ctx *mnt_ctx); +int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx); extern int cifs_match_super(struct super_block *, void *); extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx); extern void cifs_umount(struct cifs_sb_info *); @@ -561,9 +566,6 @@ extern int check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, const struct nls_table *codepage); -extern int -cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname); - extern struct TCP_Server_Info * cifs_find_tcp_session(struct smb3_fs_context *ctx); @@ -604,8 +606,8 @@ int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); -extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, - unsigned int *len, unsigned int *offset); +void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page, + unsigned int *len, unsigned int *offset); struct cifs_chan * cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e80252a83225..d371259d6808 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -46,6 +46,7 @@ #include "smbdirect.h" #include "dns_resolve.h" #ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs.h" #include "dfs_cache.h" #endif #include "fs_context.h" @@ -61,20 +62,6 @@ extern bool disable_legacy_dialects; /* Drop the connection to not overload the server */ #define NUM_STATUS_IO_TIMEOUT 5 -struct mount_ctx { - struct cifs_sb_info *cifs_sb; - struct smb3_fs_context *fs_ctx; - unsigned int xid; - struct TCP_Server_Info *server; - struct cifs_ses *ses; - struct cifs_tcon *tcon; -#ifdef CONFIG_CIFS_DFS_UPCALL - struct cifs_ses *root_ses; - uuid_t mount_id; - char *origin_fullpath, *leaf_fullpath; -#endif -}; - static int ip_connect(struct TCP_Server_Info *server); static int generic_ip_connect(struct TCP_Server_Info *server); static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); @@ -90,7 +77,8 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) { int rc; int len; - char *unc, *ipaddr = NULL; + char *unc; + struct sockaddr_storage ss; time64_t expiry, now; unsigned long ttl = SMB_DNS_RESOLVE_INTERVAL_DEFAULT; @@ -110,7 +98,11 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) } scnprintf(unc, len, "\\\\%s", server->hostname); - rc = dns_resolve_server_name_to_ip(unc, &ipaddr, &expiry); + spin_lock(&server->srv_lock); + ss = server->dstaddr; + spin_unlock(&server->srv_lock); + + rc = dns_resolve_server_name_to_ip(unc, (struct sockaddr *)&ss, &expiry); kfree(unc); if (rc < 0) { @@ -120,22 +112,13 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) } spin_lock(&server->srv_lock); - rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, - strlen(ipaddr)); + memcpy(&server->dstaddr, &ss, sizeof(server->dstaddr)); spin_unlock(&server->srv_lock); - kfree(ipaddr); - /* rc == 1 means success here */ - if (rc) { - now = ktime_get_real_seconds(); - if (expiry && expiry > now) - /* - * To make sure we don't use the cached entry, retry 1s - * after expiry. - */ - ttl = max_t(unsigned long, expiry - now, SMB_DNS_RESOLVE_INTERVAL_MIN) + 1; - } - rc = !rc ? -1 : 0; + now = ktime_get_real_seconds(); + if (expiry && expiry > now) + /* To make sure we don't use the cached entry, retry 1s */ + ttl = max_t(unsigned long, expiry - now, SMB_DNS_RESOLVE_INTERVAL_MIN) + 1; requeue_resolve: cifs_dbg(FYI, "%s: next dns resolution scheduled for %lu seconds in the future\n", @@ -279,8 +262,10 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, tcon->need_reconnect = true; tcon->status = TID_NEED_RECON; } - if (ses->tcon_ipc) + if (ses->tcon_ipc) { ses->tcon_ipc->need_reconnect = true; + ses->tcon_ipc->status = TID_NEED_RECON; + } next_session: spin_unlock(&ses->chan_lock); @@ -546,9 +531,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } while (server->tcpStatus == CifsNeedReconnect); - if (target_hint) - dfs_cache_noreq_update_tgthint(refpath, target_hint); - + dfs_cache_noreq_update_tgthint(refpath, target_hint); dfs_cache_free_tgts(&tl); /* Need to set up echo worker again once connection has been established */ @@ -563,16 +546,8 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { - /* If tcp session is not an dfs connection, then reconnect to last target server */ - spin_lock(&server->srv_lock); - if (!server->is_dfs_conn) { - spin_unlock(&server->srv_lock); - return __cifs_reconnect(server, mark_smb_session); - } - spin_unlock(&server->srv_lock); - mutex_lock(&server->refpath_lock); - if (!server->origin_fullpath || !server->leaf_fullpath) { + if (!server->leaf_fullpath) { mutex_unlock(&server->refpath_lock); return __cifs_reconnect(server, mark_smb_session); } @@ -1384,9 +1359,7 @@ match_port(struct TCP_Server_Info *server, struct sockaddr *addr) return port == *sport; } -static bool -match_address(struct TCP_Server_Info *server, struct sockaddr *addr, - struct sockaddr *srcaddr) +static bool match_server_address(struct TCP_Server_Info *server, struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: { @@ -1415,9 +1388,6 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr, return false; /* don't expect to be here */ } - if (!cifs_match_ipaddr(srcaddr, (struct sockaddr *)&server->srcaddr)) - return false; - return true; } @@ -1444,8 +1414,23 @@ match_security(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) return true; } +static bool dfs_src_pathname_equal(const char *s1, const char *s2) +{ + if (strlen(s1) != strlen(s2)) + return false; + for (; *s1; s1++, s2++) { + if (*s1 == '/' || *s1 == '\\') { + if (*s2 != '/' && *s2 != '\\') + return false; + } else if (tolower(*s1) != tolower(*s2)) + return false; + } + return true; +} + /* this function must be called with srv_lock held */ -static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) +static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *ctx, + bool dfs_super_cmp) { struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; @@ -1470,15 +1455,30 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) return 0; - if (strcasecmp(server->hostname, ctx->server_hostname)) - return 0; - - if (!match_address(server, addr, - (struct sockaddr *)&ctx->srcaddr)) - return 0; - - if (!match_port(server, addr)) + if (!cifs_match_ipaddr((struct sockaddr *)&ctx->srcaddr, + (struct sockaddr *)&server->srcaddr)) return 0; + /* + * When matching DFS superblocks, we only check for original source pathname as the + * currently connected target might be different than the one parsed earlier in i.e. + * mount.cifs(8). + */ + if (dfs_super_cmp) { + if (!ctx->source || !server->origin_fullpath || + !dfs_src_pathname_equal(server->origin_fullpath, ctx->source)) + return 0; + } else { + /* Skip addr, hostname and port matching for DFS connections */ + if (server->leaf_fullpath) { + if (!ctx->leaf_fullpath || + strcasecmp(server->leaf_fullpath, ctx->leaf_fullpath)) + return 0; + } else if (strcasecmp(server->hostname, ctx->server_hostname) || + !match_server_address(server, addr) || + !match_port(server, addr)) { + return 0; + } + } if (!match_security(server, ctx)) return 0; @@ -1506,23 +1506,11 @@ cifs_find_tcp_session(struct smb3_fs_context *ctx) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { spin_lock(&server->srv_lock); -#ifdef CONFIG_CIFS_DFS_UPCALL - /* - * DFS failover implementation in cifs_reconnect() requires unique tcp sessions for - * DFS connections to do failover properly, so avoid sharing them with regular - * shares or even links that may connect to same server but having completely - * different failover targets. - */ - if (server->is_dfs_conn) { - spin_unlock(&server->srv_lock); - continue; - } -#endif /* * Skip ses channels since they're only handled in lower layers * (e.g. cifs_send_recv). */ - if (CIFS_SERVER_IS_CHAN(server) || !match_server(server, ctx)) { + if (CIFS_SERVER_IS_CHAN(server) || !match_server(server, ctx, false)) { spin_unlock(&server->srv_lock); continue; } @@ -1617,6 +1605,15 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, goto out_err; } + if (ctx->leaf_fullpath) { + tcp_ses->leaf_fullpath = kstrdup(ctx->leaf_fullpath, GFP_KERNEL); + if (!tcp_ses->leaf_fullpath) { + rc = -ENOMEM; + goto out_err; + } + tcp_ses->current_fullpath = tcp_ses->leaf_fullpath; + } + if (ctx->nosharesock) tcp_ses->nosharesock = true; @@ -1765,6 +1762,7 @@ out_err: if (CIFS_SERVER_IS_CHAN(tcp_ses)) cifs_put_tcp_session(tcp_ses->primary_server, false); kfree(tcp_ses->hostname); + kfree(tcp_ses->leaf_fullpath); if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); kfree(tcp_ses); @@ -1871,6 +1869,9 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) cifs_dbg(FYI, "IPC tcon rc=%d ipc tid=0x%x\n", rc, tcon->tid); + spin_lock(&tcon->tc_lock); + tcon->status = TID_GOOD; + spin_unlock(&tcon->tc_lock); ses->tcon_ipc = tcon; out: return rc; @@ -2157,7 +2158,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)), struct cifs_ses * cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { - int rc = -ENOMEM; + int rc = 0; unsigned int xid; struct cifs_ses *ses; struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; @@ -2206,6 +2207,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) return ses; } + rc = -ENOMEM; + cifs_dbg(FYI, "Existing smb sess not found\n"); ses = sesInfoAlloc(); if (ses == NULL) @@ -2278,10 +2281,10 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - free_xid(xid); - cifs_setup_ipc(ses, ctx); + free_xid(xid); + return ses; get_ses_fail: @@ -2291,11 +2294,12 @@ get_ses_fail: } /* this function must be called with tc_lock held */ -static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx, bool dfs_super_cmp) { if (tcon->status == TID_EXITING) return 0; - if (strncmp(tcon->tree_name, ctx->UNC, MAX_TREE_SIZE)) + /* Skip UNC validation when matching DFS superblocks */ + if (!dfs_super_cmp && strncmp(tcon->tree_name, ctx->UNC, MAX_TREE_SIZE)) return 0; if (tcon->seal != ctx->seal) return 0; @@ -2318,7 +2322,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->tc_lock); - if (!match_tcon(tcon, ctx)) { + if (!match_tcon(tcon, ctx, false)) { spin_unlock(&tcon->tc_lock); continue; } @@ -2600,6 +2604,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); + tcon->status = TID_GOOD; /* schedule query interfaces poll */ INIT_DELAYED_WORK(&tcon->query_interfaces, @@ -2712,6 +2717,7 @@ cifs_match_super(struct super_block *sb, void *data) struct cifs_ses *ses; struct cifs_tcon *tcon; struct tcon_link *tlink; + bool dfs_super_cmp; int rc = 0; spin_lock(&cifs_tcp_ses_lock); @@ -2726,14 +2732,16 @@ cifs_match_super(struct super_block *sb, void *data) ses = tcon->ses; tcp_srv = ses->server; + dfs_super_cmp = IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && tcp_srv->origin_fullpath; + ctx = mnt_data->ctx; spin_lock(&tcp_srv->srv_lock); spin_lock(&ses->ses_lock); spin_lock(&tcon->tc_lock); - if (!match_server(tcp_srv, ctx) || + if (!match_server(tcp_srv, ctx, dfs_super_cmp) || !match_session(ses, ctx) || - !match_tcon(tcon, ctx) || + !match_tcon(tcon, ctx, dfs_super_cmp) || !match_prepath(sb, mnt_data)) { rc = 0; goto out; @@ -2944,6 +2952,7 @@ generic_ip_connect(struct TCP_Server_Info *server) cifs_dbg(FYI, "Socket created\n"); server->ssocket = socket; socket->sk->sk_allocation = GFP_NOFS; + socket->sk->sk_use_task_frag = false; if (sfamily == AF_INET6) cifs_reclassify_socket6(socket); else @@ -3190,7 +3199,7 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) } /* Release all succeed connections */ -static inline void mount_put_conns(struct mount_ctx *mnt_ctx) +void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) { int rc = 0; @@ -3204,19 +3213,22 @@ static inline void mount_put_conns(struct mount_ctx *mnt_ctx) free_xid(mnt_ctx->xid); } -/* Get connections for tcp, ses and tcon */ -static int mount_get_conns(struct mount_ctx *mnt_ctx) +int cifs_mount_get_session(struct cifs_mount_ctx *mnt_ctx) { - int rc = 0; struct TCP_Server_Info *server = NULL; + struct smb3_fs_context *ctx; struct cifs_ses *ses = NULL; - struct cifs_tcon *tcon = NULL; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; unsigned int xid; + int rc = 0; xid = get_xid(); + if (WARN_ON_ONCE(!mnt_ctx || !mnt_ctx->fs_ctx)) { + rc = -EINVAL; + goto out; + } + ctx = mnt_ctx->fs_ctx; + /* get a reference to a tcp session */ server = cifs_get_tcp_session(ctx, NULL); if (IS_ERR(server)) { @@ -3237,11 +3249,36 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) { cifs_server_dbg(VFS, "persistent handles not supported by server\n"); rc = -EOPNOTSUPP; + } + +out: + mnt_ctx->xid = xid; + mnt_ctx->server = server; + mnt_ctx->ses = ses; + mnt_ctx->tcon = NULL; + + return rc; +} + +int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) +{ + struct TCP_Server_Info *server; + struct cifs_sb_info *cifs_sb; + struct smb3_fs_context *ctx; + struct cifs_tcon *tcon = NULL; + int rc = 0; + + if (WARN_ON_ONCE(!mnt_ctx || !mnt_ctx->server || !mnt_ctx->ses || !mnt_ctx->fs_ctx || + !mnt_ctx->cifs_sb)) { + rc = -EINVAL; goto out; } + server = mnt_ctx->server; + ctx = mnt_ctx->fs_ctx; + cifs_sb = mnt_ctx->cifs_sb; /* search for existing tcon to this server share */ - tcon = cifs_get_tcon(ses, ctx); + tcon = cifs_get_tcon(mnt_ctx->ses, ctx); if (IS_ERR(tcon)) { rc = PTR_ERR(tcon); tcon = NULL; @@ -3259,7 +3296,7 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) * reset of caps checks mount to see if unix extensions disabled * for just this mount. */ - reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx); + reset_cifs_unix_caps(mnt_ctx->xid, tcon, cifs_sb, ctx); spin_lock(&tcon->ses->server->srv_lock); if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && (le64_to_cpu(tcon->fsUnixInfo.Capability) & @@ -3275,7 +3312,7 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { - server->ops->qfs_tcon(xid, tcon, cifs_sb); + server->ops->qfs_tcon(mnt_ctx->xid, tcon, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) @@ -3308,11 +3345,7 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) cifs_fscache_get_super_cookie(tcon); out: - mnt_ctx->server = server; - mnt_ctx->ses = ses; mnt_ctx->tcon = tcon; - mnt_ctx->xid = xid; - return rc; } @@ -3342,146 +3375,6 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, return 0; } -#ifdef CONFIG_CIFS_DFS_UPCALL -/* Get unique dfs connections */ -static int mount_get_dfs_conns(struct mount_ctx *mnt_ctx) -{ - int rc; - - mnt_ctx->fs_ctx->nosharesock = true; - rc = mount_get_conns(mnt_ctx); - if (mnt_ctx->server) { - cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__); - spin_lock(&mnt_ctx->server->srv_lock); - mnt_ctx->server->is_dfs_conn = true; - spin_unlock(&mnt_ctx->server->srv_lock); - } - return rc; -} - -/* - * cifs_build_path_to_root returns full path to root when we do not have an - * existing connection (tcon) - */ -static char * -build_unc_path_to_root(const struct smb3_fs_context *ctx, - const struct cifs_sb_info *cifs_sb, bool useppath) -{ - char *full_path, *pos; - unsigned int pplen = useppath && ctx->prepath ? - strlen(ctx->prepath) + 1 : 0; - unsigned int unc_len = strnlen(ctx->UNC, MAX_TREE_SIZE + 1); - - if (unc_len > MAX_TREE_SIZE) - return ERR_PTR(-EINVAL); - - full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); - if (full_path == NULL) - return ERR_PTR(-ENOMEM); - - memcpy(full_path, ctx->UNC, unc_len); - pos = full_path + unc_len; - - if (pplen) { - *pos = CIFS_DIR_SEP(cifs_sb); - memcpy(pos + 1, ctx->prepath, pplen); - pos += pplen; - } - - *pos = '\0'; /* add trailing null */ - convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); - cifs_dbg(FYI, "%s: full_path=%s\n", __func__, full_path); - return full_path; -} - -/* - * expand_dfs_referral - Update cifs_sb from dfs referral path - * - * cifs_sb->ctx->mount_options will be (re-)allocated to a string containing updated options for the - * submount. Otherwise it will be left untouched. - */ -static int expand_dfs_referral(struct mount_ctx *mnt_ctx, const char *full_path, - struct dfs_info3_param *referral) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - char *fake_devname = NULL, *mdata = NULL; - - mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, referral, - &fake_devname); - if (IS_ERR(mdata)) { - rc = PTR_ERR(mdata); - mdata = NULL; - } else { - /* - * We can not clear out the whole structure since we no longer have an explicit - * function to parse a mount-string. Instead we need to clear out the individual - * fields that are no longer valid. - */ - kfree(ctx->prepath); - ctx->prepath = NULL; - rc = cifs_setup_volume_info(ctx, mdata, fake_devname); - } - kfree(fake_devname); - kfree(cifs_sb->ctx->mount_options); - cifs_sb->ctx->mount_options = mdata; - - return rc; -} -#endif - -/* TODO: all callers to this are broken. We are not parsing mount_options here - * we should pass a clone of the original context? - */ -int -cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname) -{ - int rc; - - if (devname) { - cifs_dbg(FYI, "%s: devname=%s\n", __func__, devname); - rc = smb3_parse_devname(devname, ctx); - if (rc) { - cifs_dbg(VFS, "%s: failed to parse %s: %d\n", __func__, devname, rc); - return rc; - } - } - - if (mntopts) { - char *ip; - - rc = smb3_parse_opt(mntopts, "ip", &ip); - if (rc) { - cifs_dbg(VFS, "%s: failed to parse ip options: %d\n", __func__, rc); - return rc; - } - - rc = cifs_convert_address((struct sockaddr *)&ctx->dstaddr, ip, strlen(ip)); - kfree(ip); - if (!rc) { - cifs_dbg(VFS, "%s: failed to convert ip address\n", __func__); - return -EINVAL; - } - } - - if (ctx->nullauth) { - cifs_dbg(FYI, "Anonymous login\n"); - kfree(ctx->username); - ctx->username = NULL; - } else if (ctx->username) { - /* BB fixme parse for domain name here */ - cifs_dbg(FYI, "Username: %s\n", ctx->username); - } else { - cifs_dbg(VFS, "No username specified\n"); - /* In userspace mount helper we can get user name from alternate - locations such as env variables and files on disk */ - return -EINVAL; - } - - return 0; -} - static int cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, unsigned int xid, @@ -3534,7 +3427,7 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, * * Return -EREMOTE if it is, otherwise 0 or -errno. */ -static int is_path_remote(struct mount_ctx *mnt_ctx) +int cifs_is_path_remote(struct cifs_mount_ctx *mnt_ctx) { int rc; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; @@ -3543,9 +3436,6 @@ static int is_path_remote(struct mount_ctx *mnt_ctx) struct cifs_tcon *tcon = mnt_ctx->tcon; struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; char *full_path; -#ifdef CONFIG_CIFS_DFS_UPCALL - bool nodfs = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS; -#endif if (!server->ops->is_path_accessible) return -EOPNOTSUPP; @@ -3562,19 +3452,6 @@ static int is_path_remote(struct mount_ctx *mnt_ctx) rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, full_path); -#ifdef CONFIG_CIFS_DFS_UPCALL - if (nodfs) { - if (rc == -EREMOTE) - rc = -EOPNOTSUPP; - goto out; - } - - /* path *might* exist with non-ASCII characters in DFS root - * try again with full path (only if nodfs is not set) */ - if (rc == -ENOENT && is_tcon_dfs(tcon)) - rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, cifs_sb, - full_path); -#endif if (rc != 0 && rc != -EREMOTE) goto out; @@ -3594,251 +3471,19 @@ out: } #ifdef CONFIG_CIFS_DFS_UPCALL -static void set_root_ses(struct mount_ctx *mnt_ctx) -{ - if (mnt_ctx->ses) { - spin_lock(&cifs_tcp_ses_lock); - mnt_ctx->ses->ses_count++; - spin_unlock(&cifs_tcp_ses_lock); - dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses); - } - mnt_ctx->root_ses = mnt_ctx->ses; -} - -static int is_dfs_mount(struct mount_ctx *mnt_ctx, bool *isdfs, struct dfs_cache_tgt_list *root_tl) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - - *isdfs = true; - - rc = mount_get_conns(mnt_ctx); - /* - * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally - * try to get an DFS referral (even cached) to determine whether it is an DFS mount. - * - * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem - * to respond with PATH_NOT_COVERED to requests that include the prefix. - */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || - dfs_cache_find(mnt_ctx->xid, mnt_ctx->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), - ctx->UNC + 1, NULL, root_tl)) { - if (rc) - return rc; - /* Check if it is fully accessible and then mount it */ - rc = is_path_remote(mnt_ctx); - if (!rc) - *isdfs = false; - else if (rc != -EREMOTE) - return rc; - } - return 0; -} - -static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path, - const char *ref_path, struct dfs_cache_tgt_iterator *tit) -{ - int rc; - struct dfs_info3_param ref = {}; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - char *oldmnt = cifs_sb->ctx->mount_options; - - cifs_dbg(FYI, "%s: full_path=%s ref_path=%s target=%s\n", __func__, full_path, ref_path, - dfs_cache_get_tgt_name(tit)); - - rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref); - if (rc) - goto out; - - rc = expand_dfs_referral(mnt_ctx, full_path, &ref); - if (rc) - goto out; - - /* Connect to new target only if we were redirected (e.g. mount options changed) */ - if (oldmnt != cifs_sb->ctx->mount_options) { - mount_put_conns(mnt_ctx); - rc = mount_get_dfs_conns(mnt_ctx); - } - if (!rc) { - if (cifs_is_referral_server(mnt_ctx->tcon, &ref)) - set_root_ses(mnt_ctx); - rc = dfs_cache_update_tgthint(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, - cifs_remap(cifs_sb), ref_path, tit); - } - -out: - free_dfs_info_param(&ref); - return rc; -} - -static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list *root_tl) -{ - int rc; - char *full_path; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct dfs_cache_tgt_iterator *tit; - - /* Put initial connections as they might be shared with other mounts. We need unique dfs - * connections per mount to properly failover, so mount_get_dfs_conns() must be used from - * now on. - */ - mount_put_conns(mnt_ctx); - mount_get_dfs_conns(mnt_ctx); - set_root_ses(mnt_ctx); - - full_path = build_unc_path_to_root(ctx, cifs_sb, true); - if (IS_ERR(full_path)) - return PTR_ERR(full_path); - - mnt_ctx->origin_fullpath = dfs_cache_canonical_path(ctx->UNC, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (IS_ERR(mnt_ctx->origin_fullpath)) { - rc = PTR_ERR(mnt_ctx->origin_fullpath); - mnt_ctx->origin_fullpath = NULL; - goto out; - } - - /* Try all dfs root targets */ - for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(root_tl); - tit; tit = dfs_cache_get_next_tgt(root_tl, tit)) { - rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->origin_fullpath + 1, tit); - if (!rc) { - mnt_ctx->leaf_fullpath = kstrdup(mnt_ctx->origin_fullpath, GFP_KERNEL); - if (!mnt_ctx->leaf_fullpath) - rc = -ENOMEM; - break; - } - } - -out: - kfree(full_path); - return rc; -} - -static int __follow_dfs_link(struct mount_ctx *mnt_ctx) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - char *full_path; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); - struct dfs_cache_tgt_iterator *tit; - - full_path = build_unc_path_to_root(ctx, cifs_sb, true); - if (IS_ERR(full_path)) - return PTR_ERR(full_path); - - kfree(mnt_ctx->leaf_fullpath); - mnt_ctx->leaf_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (IS_ERR(mnt_ctx->leaf_fullpath)) { - rc = PTR_ERR(mnt_ctx->leaf_fullpath); - mnt_ctx->leaf_fullpath = NULL; - goto out; - } - - /* Get referral from dfs link */ - rc = dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, - cifs_remap(cifs_sb), mnt_ctx->leaf_fullpath + 1, NULL, &tl); - if (rc) - goto out; - - /* Try all dfs link targets. If an I/O fails from currently connected DFS target with an - * error other than STATUS_PATH_NOT_COVERED (-EREMOTE), then retry it from other targets as - * specified in MS-DFSC "3.1.5.2 I/O Operation to Target Fails with an Error Other Than - * STATUS_PATH_NOT_COVERED." - */ - for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl); - tit; tit = dfs_cache_get_next_tgt(&tl, tit)) { - rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit); - if (!rc) { - rc = is_path_remote(mnt_ctx); - if (!rc || rc == -EREMOTE) - break; - } - } - -out: - kfree(full_path); - dfs_cache_free_tgts(&tl); - return rc; -} - -static int follow_dfs_link(struct mount_ctx *mnt_ctx) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - char *full_path; - int num_links = 0; - - full_path = build_unc_path_to_root(ctx, cifs_sb, true); - if (IS_ERR(full_path)) - return PTR_ERR(full_path); - - kfree(mnt_ctx->origin_fullpath); - mnt_ctx->origin_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - kfree(full_path); - - if (IS_ERR(mnt_ctx->origin_fullpath)) { - rc = PTR_ERR(mnt_ctx->origin_fullpath); - mnt_ctx->origin_fullpath = NULL; - return rc; - } - - do { - rc = __follow_dfs_link(mnt_ctx); - if (!rc || rc != -EREMOTE) - break; - } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS); - - return rc; -} - -/* Set up DFS referral paths for failover */ -static void setup_server_referral_paths(struct mount_ctx *mnt_ctx) -{ - struct TCP_Server_Info *server = mnt_ctx->server; - - mutex_lock(&server->refpath_lock); - server->origin_fullpath = mnt_ctx->origin_fullpath; - server->leaf_fullpath = mnt_ctx->leaf_fullpath; - server->current_fullpath = mnt_ctx->leaf_fullpath; - mutex_unlock(&server->refpath_lock); - mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL; -} - int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { - int rc; - struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + struct cifs_mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; bool isdfs; + int rc; - rc = is_dfs_mount(&mnt_ctx, &isdfs, &tl); + uuid_gen(&mnt_ctx.mount_id); + rc = dfs_mount_share(&mnt_ctx, &isdfs); if (rc) goto error; if (!isdfs) goto out; - /* proceed as DFS mount */ - uuid_gen(&mnt_ctx.mount_id); - rc = connect_dfs_root(&mnt_ctx, &tl); - dfs_cache_free_tgts(&tl); - - if (rc) - goto error; - - rc = is_path_remote(&mnt_ctx); - if (rc) - rc = follow_dfs_link(&mnt_ctx); - if (rc) - goto error; - - setup_server_referral_paths(&mnt_ctx); /* * After reconnecting to a different server, unique ids won't match anymore, so we disable * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). @@ -3867,26 +3512,28 @@ error: dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); kfree(mnt_ctx.origin_fullpath); kfree(mnt_ctx.leaf_fullpath); - mount_put_conns(&mnt_ctx); + cifs_mount_put_conns(&mnt_ctx); return rc; } #else int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { int rc = 0; - struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; + struct cifs_mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; - rc = mount_get_conns(&mnt_ctx); + rc = cifs_mount_get_session(&mnt_ctx); if (rc) goto error; - if (mnt_ctx.tcon) { - rc = is_path_remote(&mnt_ctx); - if (rc == -EREMOTE) - rc = -EOPNOTSUPP; - if (rc) - goto error; - } + rc = cifs_mount_get_tcon(&mnt_ctx); + if (rc) + goto error; + + rc = cifs_is_path_remote(&mnt_ctx); + if (rc == -EREMOTE) + rc = -EOPNOTSUPP; + if (rc) + goto error; rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); if (rc) @@ -3896,7 +3543,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) return rc; error: - mount_put_conns(&mnt_ctx); + cifs_mount_put_conns(&mnt_ctx); return rc; } #endif @@ -4449,264 +4096,7 @@ cifs_prune_tlinks(struct work_struct *work) TLINK_IDLE_EXPIRE); } -#ifdef CONFIG_CIFS_DFS_UPCALL -/* Update dfs referral path of superblock */ -static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb, - const char *target) -{ - int rc = 0; - size_t len = strlen(target); - char *refpath, *npath; - - if (unlikely(len < 2 || *target != '\\')) - return -EINVAL; - - if (target[1] == '\\') { - len += 1; - refpath = kmalloc(len, GFP_KERNEL); - if (!refpath) - return -ENOMEM; - - scnprintf(refpath, len, "%s", target); - } else { - len += sizeof("\\"); - refpath = kmalloc(len, GFP_KERNEL); - if (!refpath) - return -ENOMEM; - - scnprintf(refpath, len, "\\%s", target); - } - - npath = dfs_cache_canonical_path(refpath, cifs_sb->local_nls, cifs_remap(cifs_sb)); - kfree(refpath); - - if (IS_ERR(npath)) { - rc = PTR_ERR(npath); - } else { - mutex_lock(&server->refpath_lock); - kfree(server->leaf_fullpath); - server->leaf_fullpath = npath; - mutex_unlock(&server->refpath_lock); - server->current_fullpath = server->leaf_fullpath; - } - return rc; -} - -static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host, - size_t tcp_host_len, char *share, bool *target_match) -{ - int rc = 0; - const char *dfs_host; - size_t dfs_host_len; - - *target_match = true; - extract_unc_hostname(share, &dfs_host, &dfs_host_len); - - /* Check if hostnames or addresses match */ - if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { - cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, - dfs_host, (int)tcp_host_len, tcp_host); - rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); - if (rc) - cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); - } - return rc; -} - -static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, char *tree, bool islink, - struct dfs_cache_tgt_list *tl) -{ - int rc; - struct TCP_Server_Info *server = tcon->ses->server; - const struct smb_version_operations *ops = server->ops; - struct cifs_tcon *ipc = tcon->ses->tcon_ipc; - char *share = NULL, *prefix = NULL; - const char *tcp_host; - size_t tcp_host_len; - struct dfs_cache_tgt_iterator *tit; - bool target_match; - - extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); - - tit = dfs_cache_get_tgt_iterator(tl); - if (!tit) { - rc = -ENOENT; - goto out; - } - - /* Try to tree connect to all dfs targets */ - for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) { - const char *target = dfs_cache_get_tgt_name(tit); - struct dfs_cache_tgt_list ntl = DFS_CACHE_TGT_LIST_INIT(ntl); - - kfree(share); - kfree(prefix); - share = prefix = NULL; - - /* Check if share matches with tcp ses */ - rc = dfs_cache_get_tgt_share(server->current_fullpath + 1, tit, &share, &prefix); - if (rc) { - cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc); - break; - } - - rc = target_share_matches_server(server, tcp_host, tcp_host_len, share, - &target_match); - if (rc) - break; - if (!target_match) { - rc = -EHOSTUNREACH; - continue; - } - - if (ipc->need_reconnect) { - scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); - rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls); - if (rc) - break; - } - - scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); - if (!islink) { - rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls); - break; - } - /* - * If no dfs referrals were returned from link target, then just do a TREE_CONNECT - * to it. Otherwise, cache the dfs referral and then mark current tcp ses for - * reconnect so either the demultiplex thread or the echo worker will reconnect to - * newly resolved target. - */ - if (dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), target, - NULL, &ntl)) { - rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls); - if (rc) - continue; - rc = dfs_cache_noreq_update_tgthint(server->current_fullpath + 1, tit); - if (!rc) - rc = cifs_update_super_prepath(cifs_sb, prefix); - } else { - /* Target is another dfs share */ - rc = update_server_fullpath(server, cifs_sb, target); - dfs_cache_free_tgts(tl); - - if (!rc) { - rc = -EREMOTE; - list_replace_init(&ntl.tl_list, &tl->tl_list); - } else - dfs_cache_free_tgts(&ntl); - } - break; - } - -out: - kfree(share); - kfree(prefix); - - return rc; -} - -static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, char *tree, bool islink, - struct dfs_cache_tgt_list *tl) -{ - int rc; - int num_links = 0; - struct TCP_Server_Info *server = tcon->ses->server; - - do { - rc = __tree_connect_dfs_target(xid, tcon, cifs_sb, tree, islink, tl); - if (!rc || rc != -EREMOTE) - break; - } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS); - /* - * If we couldn't tree connect to any targets from last referral path, then retry from - * original referral path. - */ - if (rc && server->current_fullpath != server->origin_fullpath) { - server->current_fullpath = server->origin_fullpath; - cifs_signal_cifsd_for_reconnect(server, true); - } - - dfs_cache_free_tgts(tl); - return rc; -} - -int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) -{ - int rc; - struct TCP_Server_Info *server = tcon->ses->server; - const struct smb_version_operations *ops = server->ops; - struct super_block *sb = NULL; - struct cifs_sb_info *cifs_sb; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); - char *tree; - struct dfs_info3_param ref = {0}; - - /* only send once per connect */ - spin_lock(&tcon->tc_lock); - if (tcon->ses->ses_status != SES_GOOD || - (tcon->status != TID_NEW && - tcon->status != TID_NEED_TCON)) { - spin_unlock(&tcon->tc_lock); - return 0; - } - tcon->status = TID_IN_TCON; - spin_unlock(&tcon->tc_lock); - - tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); - if (!tree) { - rc = -ENOMEM; - goto out; - } - - if (tcon->ipc) { - scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); - rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); - goto out; - } - - sb = cifs_get_tcp_super(server); - if (IS_ERR(sb)) { - rc = PTR_ERR(sb); - cifs_dbg(VFS, "%s: could not find superblock: %d\n", __func__, rc); - goto out; - } - - cifs_sb = CIFS_SB(sb); - - /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */ - if (!server->current_fullpath || - dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) { - rc = ops->tree_connect(xid, tcon->ses, tcon->tree_name, tcon, cifs_sb->local_nls); - goto out; - } - - rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, ref.server_type == DFS_TYPE_LINK, - &tl); - free_dfs_info_param(&ref); - -out: - kfree(tree); - cifs_put_tcp_super(sb); - - if (rc) { - spin_lock(&tcon->tc_lock); - if (tcon->status == TID_IN_TCON) - tcon->status = TID_NEED_TCON; - spin_unlock(&tcon->tc_lock); - } else { - spin_lock(&tcon->tc_lock); - if (tcon->status == TID_IN_TCON) - tcon->status = TID_GOOD; - spin_unlock(&tcon->tc_lock); - tcon->need_reconnect = false; - } - - return rc; -} -#else +#ifndef CONFIG_CIFS_DFS_UPCALL int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) { int rc; diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c new file mode 100644 index 000000000000..b541e68378f6 --- /dev/null +++ b/fs/cifs/dfs.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Paulo Alcantara <palcantara@suse.de> + */ + +#include <linux/namei.h> +#include "cifsproto.h" +#include "cifs_debug.h" +#include "dns_resolve.h" +#include "fs_context.h" +#include "dfs.h" + +/** + * dfs_parse_target_referral - set fs context for dfs target referral + * + * @full_path: full path in UNC format. + * @ref: dfs referral pointer. + * @ctx: smb3 fs context pointer. + * + * Return zero if dfs referral was parsed correctly, otherwise non-zero. + */ +int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, + struct smb3_fs_context *ctx) +{ + int rc; + const char *prepath = NULL; + char *path; + + if (!full_path || !*full_path || !ref || !ctx) + return -EINVAL; + + if (WARN_ON_ONCE(!ref->node_name || ref->path_consumed < 0)) + return -EINVAL; + + if (strlen(full_path) - ref->path_consumed) { + prepath = full_path + ref->path_consumed; + /* skip initial delimiter */ + if (*prepath == '/' || *prepath == '\\') + prepath++; + } + + path = cifs_build_devname(ref->node_name, prepath); + if (IS_ERR(path)) + return PTR_ERR(path); + + rc = smb3_parse_devname(path, ctx); + if (rc) + goto out; + + rc = dns_resolve_server_name_to_ip(path, (struct sockaddr *)&ctx->dstaddr, NULL); + +out: + kfree(path); + return rc; +} + +/* + * cifs_build_path_to_root returns full path to root when we do not have an + * existing connection (tcon) + */ +static char *build_unc_path_to_root(const struct smb3_fs_context *ctx, + const struct cifs_sb_info *cifs_sb, bool useppath) +{ + char *full_path, *pos; + unsigned int pplen = useppath && ctx->prepath ? strlen(ctx->prepath) + 1 : 0; + unsigned int unc_len = strnlen(ctx->UNC, MAX_TREE_SIZE + 1); + + if (unc_len > MAX_TREE_SIZE) + return ERR_PTR(-EINVAL); + + full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); + if (full_path == NULL) + return ERR_PTR(-ENOMEM); + + memcpy(full_path, ctx->UNC, unc_len); + pos = full_path + unc_len; + + if (pplen) { + *pos = CIFS_DIR_SEP(cifs_sb); + memcpy(pos + 1, ctx->prepath, pplen); + pos += pplen; + } + + *pos = '\0'; /* add trailing null */ + convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); + cifs_dbg(FYI, "%s: full_path=%s\n", __func__, full_path); + return full_path; +} + +static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) +{ + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + int rc; + + ctx->leaf_fullpath = (char *)full_path; + rc = cifs_mount_get_session(mnt_ctx); + ctx->leaf_fullpath = NULL; + if (!rc) { + struct cifs_ses *ses = mnt_ctx->ses; + + mutex_lock(&ses->session_mutex); + ses->dfs_root_ses = mnt_ctx->root_ses; + mutex_unlock(&ses->session_mutex); + } + return rc; +} + +static void set_root_ses(struct cifs_mount_ctx *mnt_ctx) +{ + if (mnt_ctx->ses) { + spin_lock(&cifs_tcp_ses_lock); + mnt_ctx->ses->ses_count++; + spin_unlock(&cifs_tcp_ses_lock); + dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses); + } + mnt_ctx->root_ses = mnt_ctx->ses; +} + +static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path, + const struct dfs_cache_tgt_iterator *tit) +{ + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct dfs_info3_param ref = {}; + int rc; + + rc = dfs_cache_get_tgt_referral(ref_path + 1, tit, &ref); + if (rc) + return rc; + + rc = dfs_parse_target_referral(full_path + 1, &ref, ctx); + if (rc) + goto out; + + cifs_mount_put_conns(mnt_ctx); + rc = get_session(mnt_ctx, ref_path); + if (rc) + goto out; + + if (ref.flags & DFSREF_REFERRAL_SERVER) + set_root_ses(mnt_ctx); + + rc = -EREMOTE; + if (ref.flags & DFSREF_STORAGE_SERVER) { + rc = cifs_mount_get_tcon(mnt_ctx); + if (rc) + goto out; + + /* some servers may not advertise referral capability under ref.flags */ + if (!(ref.flags & DFSREF_REFERRAL_SERVER) && + is_tcon_dfs(mnt_ctx->tcon)) + set_root_ses(mnt_ctx); + + rc = cifs_is_path_remote(mnt_ctx); + } + +out: + free_dfs_info_param(&ref); + return rc; +} + +static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) +{ + struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + char *ref_path = NULL, *full_path = NULL; + struct dfs_cache_tgt_iterator *tit; + struct TCP_Server_Info *server; + char *origin_fullpath = NULL; + int num_links = 0; + int rc; + + ref_path = dfs_get_path(cifs_sb, ctx->UNC); + if (IS_ERR(ref_path)) + return PTR_ERR(ref_path); + + full_path = build_unc_path_to_root(ctx, cifs_sb, true); + if (IS_ERR(full_path)) { + rc = PTR_ERR(full_path); + full_path = NULL; + goto out; + } + + origin_fullpath = kstrdup(full_path, GFP_KERNEL); + if (!origin_fullpath) { + rc = -ENOMEM; + goto out; + } + + do { + struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + + rc = dfs_get_referral(mnt_ctx, ref_path + 1, NULL, &tl); + if (rc) + break; + + tit = dfs_cache_get_tgt_iterator(&tl); + if (!tit) { + cifs_dbg(VFS, "%s: dfs referral (%s) with no targets\n", __func__, + ref_path + 1); + rc = -ENOENT; + dfs_cache_free_tgts(&tl); + break; + } + + do { + rc = get_dfs_conn(mnt_ctx, ref_path, full_path, tit); + if (!rc) + break; + if (rc == -EREMOTE) { + if (++num_links > MAX_NESTED_LINKS) { + rc = -ELOOP; + break; + } + kfree(ref_path); + kfree(full_path); + ref_path = full_path = NULL; + + full_path = build_unc_path_to_root(ctx, cifs_sb, true); + if (IS_ERR(full_path)) { + rc = PTR_ERR(full_path); + full_path = NULL; + } else { + ref_path = dfs_get_path(cifs_sb, full_path); + if (IS_ERR(ref_path)) { + rc = PTR_ERR(ref_path); + ref_path = NULL; + } + } + break; + } + } while ((tit = dfs_cache_get_next_tgt(&tl, tit))); + dfs_cache_free_tgts(&tl); + } while (rc == -EREMOTE); + + if (!rc) { + server = mnt_ctx->server; + + mutex_lock(&server->refpath_lock); + server->origin_fullpath = origin_fullpath; + server->current_fullpath = server->leaf_fullpath; + mutex_unlock(&server->refpath_lock); + origin_fullpath = NULL; + } + +out: + kfree(origin_fullpath); + kfree(ref_path); + kfree(full_path); + return rc; +} + +int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) +{ + struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + int rc; + + *isdfs = false; + + rc = get_session(mnt_ctx, NULL); + if (rc) + return rc; + mnt_ctx->root_ses = mnt_ctx->ses; + /* + * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally + * try to get an DFS referral (even cached) to determine whether it is an DFS mount. + * + * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem + * to respond with PATH_NOT_COVERED to requests that include the prefix. + */ + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || + dfs_get_referral(mnt_ctx, ctx->UNC + 1, NULL, NULL)) { + rc = cifs_mount_get_tcon(mnt_ctx); + if (rc) + return rc; + + rc = cifs_is_path_remote(mnt_ctx); + if (!rc || rc != -EREMOTE) + return rc; + } + + *isdfs = true; + set_root_ses(mnt_ctx); + + return __dfs_mount_share(mnt_ctx); +} + +/* Update dfs referral path of superblock */ +static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb, + const char *target) +{ + int rc = 0; + size_t len = strlen(target); + char *refpath, *npath; + + if (unlikely(len < 2 || *target != '\\')) + return -EINVAL; + + if (target[1] == '\\') { + len += 1; + refpath = kmalloc(len, GFP_KERNEL); + if (!refpath) + return -ENOMEM; + + scnprintf(refpath, len, "%s", target); + } else { + len += sizeof("\\"); + refpath = kmalloc(len, GFP_KERNEL); + if (!refpath) + return -ENOMEM; + + scnprintf(refpath, len, "\\%s", target); + } + + npath = dfs_cache_canonical_path(refpath, cifs_sb->local_nls, cifs_remap(cifs_sb)); + kfree(refpath); + + if (IS_ERR(npath)) { + rc = PTR_ERR(npath); + } else { + mutex_lock(&server->refpath_lock); + kfree(server->leaf_fullpath); + server->leaf_fullpath = npath; + mutex_unlock(&server->refpath_lock); + server->current_fullpath = server->leaf_fullpath; + } + return rc; +} + +static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host, + size_t tcp_host_len, char *share, bool *target_match) +{ + int rc = 0; + const char *dfs_host; + size_t dfs_host_len; + + *target_match = true; + extract_unc_hostname(share, &dfs_host, &dfs_host_len); + + /* Check if hostnames or addresses match */ + if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { + cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, + dfs_host, (int)tcp_host_len, tcp_host); + rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); + if (rc) + cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); + } + return rc; +} + +static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, char *tree, bool islink, + struct dfs_cache_tgt_list *tl) +{ + int rc; + struct TCP_Server_Info *server = tcon->ses->server; + const struct smb_version_operations *ops = server->ops; + struct cifs_ses *root_ses = CIFS_DFS_ROOT_SES(tcon->ses); + struct cifs_tcon *ipc = root_ses->tcon_ipc; + char *share = NULL, *prefix = NULL; + const char *tcp_host; + size_t tcp_host_len; + struct dfs_cache_tgt_iterator *tit; + bool target_match; + + extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); + + tit = dfs_cache_get_tgt_iterator(tl); + if (!tit) { + rc = -ENOENT; + goto out; + } + + /* Try to tree connect to all dfs targets */ + for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) { + const char *target = dfs_cache_get_tgt_name(tit); + struct dfs_cache_tgt_list ntl = DFS_CACHE_TGT_LIST_INIT(ntl); + + kfree(share); + kfree(prefix); + share = prefix = NULL; + + /* Check if share matches with tcp ses */ + rc = dfs_cache_get_tgt_share(server->current_fullpath + 1, tit, &share, &prefix); + if (rc) { + cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc); + break; + } + + rc = target_share_matches_server(server, tcp_host, tcp_host_len, share, + &target_match); + if (rc) + break; + if (!target_match) { + rc = -EHOSTUNREACH; + continue; + } + + dfs_cache_noreq_update_tgthint(server->current_fullpath + 1, tit); + + if (ipc->need_reconnect) { + scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); + rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls); + if (rc) + break; + } + + scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); + if (!islink) { + rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls); + break; + } + /* + * If no dfs referrals were returned from link target, then just do a TREE_CONNECT + * to it. Otherwise, cache the dfs referral and then mark current tcp ses for + * reconnect so either the demultiplex thread or the echo worker will reconnect to + * newly resolved target. + */ + if (dfs_cache_find(xid, root_ses, cifs_sb->local_nls, cifs_remap(cifs_sb), target, + NULL, &ntl)) { + rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls); + if (rc) + continue; + + rc = cifs_update_super_prepath(cifs_sb, prefix); + } else { + /* Target is another dfs share */ + rc = update_server_fullpath(server, cifs_sb, target); + dfs_cache_free_tgts(tl); + + if (!rc) { + rc = -EREMOTE; + list_replace_init(&ntl.tl_list, &tl->tl_list); + } else + dfs_cache_free_tgts(&ntl); + } + break; + } + +out: + kfree(share); + kfree(prefix); + + return rc; +} + +static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, char *tree, bool islink, + struct dfs_cache_tgt_list *tl) +{ + int rc; + int num_links = 0; + struct TCP_Server_Info *server = tcon->ses->server; + char *old_fullpath = server->leaf_fullpath; + + do { + rc = __tree_connect_dfs_target(xid, tcon, cifs_sb, tree, islink, tl); + if (!rc || rc != -EREMOTE) + break; + } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS); + /* + * If we couldn't tree connect to any targets from last referral path, then + * retry it from newly resolved dfs referral. + */ + if (rc && server->leaf_fullpath != old_fullpath) + cifs_signal_cifsd_for_reconnect(server, true); + + dfs_cache_free_tgts(tl); + return rc; +} + +int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) +{ + int rc; + struct TCP_Server_Info *server = tcon->ses->server; + const struct smb_version_operations *ops = server->ops; + struct super_block *sb = NULL; + struct cifs_sb_info *cifs_sb; + struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + char *tree; + struct dfs_info3_param ref = {0}; + + /* only send once per connect */ + spin_lock(&tcon->tc_lock); + if (tcon->ses->ses_status != SES_GOOD || + (tcon->status != TID_NEW && + tcon->status != TID_NEED_TCON)) { + spin_unlock(&tcon->tc_lock); + return 0; + } + tcon->status = TID_IN_TCON; + spin_unlock(&tcon->tc_lock); + + tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); + if (!tree) { + rc = -ENOMEM; + goto out; + } + + if (tcon->ipc) { + scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); + rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); + goto out; + } + + sb = cifs_get_tcp_super(server); + if (IS_ERR(sb)) { + rc = PTR_ERR(sb); + cifs_dbg(VFS, "%s: could not find superblock: %d\n", __func__, rc); + goto out; + } + + cifs_sb = CIFS_SB(sb); + + /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */ + if (!server->current_fullpath || + dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) { + rc = ops->tree_connect(xid, tcon->ses, tcon->tree_name, tcon, cifs_sb->local_nls); + goto out; + } + + rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, ref.server_type == DFS_TYPE_LINK, + &tl); + free_dfs_info_param(&ref); + +out: + kfree(tree); + cifs_put_tcp_super(sb); + + if (rc) { + spin_lock(&tcon->tc_lock); + if (tcon->status == TID_IN_TCON) + tcon->status = TID_NEED_TCON; + spin_unlock(&tcon->tc_lock); + } else { + spin_lock(&tcon->tc_lock); + if (tcon->status == TID_IN_TCON) + tcon->status = TID_GOOD; + spin_unlock(&tcon->tc_lock); + tcon->need_reconnect = false; + } + + return rc; +} diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h new file mode 100644 index 000000000000..344bea6d8bab --- /dev/null +++ b/fs/cifs/dfs.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Paulo Alcantara <palcantara@suse.de> + */ + +#ifndef _CIFS_DFS_H +#define _CIFS_DFS_H + +#include "cifsglob.h" +#include "fs_context.h" +#include "cifs_unicode.h" + +int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, + struct smb3_fs_context *ctx); +int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs); + +static inline char *dfs_get_path(struct cifs_sb_info *cifs_sb, const char *path) +{ + return dfs_cache_canonical_path(path, cifs_sb->local_nls, cifs_remap(cifs_sb)); +} + +static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *path, + struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tl) +{ + struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + + return dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, + cifs_remap(cifs_sb), path, ref, tl); +} + +static inline char *dfs_get_automount_devname(struct dentry *dentry, void *page) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct TCP_Server_Info *server = tcon->ses->server; + + if (unlikely(!server->origin_fullpath)) + return ERR_PTR(-EREMOTE); + + return __build_path_from_dentry_optional_prefix(dentry, page, + server->origin_fullpath, + strlen(server->origin_fullpath), + true); +} + +#endif /* _CIFS_DFS_H */ diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index e70915ad7541..43ad1176dcb9 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -83,27 +83,6 @@ static void refresh_cache_worker(struct work_struct *work); static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker); -static void get_ipc_unc(const char *ref_path, char *ipc, size_t ipclen) -{ - const char *host; - size_t len; - - extract_unc_hostname(ref_path, &host, &len); - scnprintf(ipc, ipclen, "\\\\%.*s\\IPC$", (int)len, host); -} - -static struct cifs_ses *find_ipc_from_server_path(struct cifs_ses **ses, const char *path) -{ - char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0}; - - get_ipc_unc(path, unc, sizeof(unc)); - for (; *ses; ses++) { - if (!strcasecmp(unc, (*ses)->tcon_ipc->tree_name)) - return *ses; - } - return ERR_PTR(-ENOENT); -} - static void __mount_group_release(struct mount_group *mg) { int i; @@ -760,8 +739,6 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const int rc; int i; - cifs_dbg(FYI, "%s: get an DFS referral for %s\n", __func__, path); - *refs = NULL; *numrefs = 0; @@ -770,6 +747,7 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const if (unlikely(!cache_cp)) return -EINVAL; + cifs_dbg(FYI, "%s: ipc=%s referral=%s\n", __func__, ses->tcon_ipc->tree_name, path); rc = ses->server->ops->get_dfs_refer(xid, ses, path, refs, numrefs, cache_cp, NO_MAP_UNI_RSVD); if (!rc) { @@ -1104,26 +1082,23 @@ out_free_path: * * Return zero if the target hint was updated successfully, otherwise non-zero. */ -int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it) +void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it) { - int rc; - struct cache_entry *ce; struct cache_dfs_tgt *t; + struct cache_entry *ce; - if (!it) - return -EINVAL; + if (!path || !it) + return; cifs_dbg(FYI, "%s: path: %s\n", __func__, path); - down_write(&htable_rw_lock); + if (!down_write_trylock(&htable_rw_lock)) + return; ce = lookup_cache_entry(path); - if (IS_ERR(ce)) { - rc = PTR_ERR(ce); + if (IS_ERR(ce)) goto out_unlock; - } - rc = 0; t = ce->tgthint; if (unlikely(!strcasecmp(it->it_name, t->name))) @@ -1140,7 +1115,6 @@ int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_ out_unlock: up_write(&htable_rw_lock); - return rc; } /** @@ -1314,8 +1288,7 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0}; const char *host; size_t hostlen; - char *ip = NULL; - struct sockaddr sa; + struct sockaddr_storage ss; bool match; int rc; @@ -1330,23 +1303,17 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c extract_unc_hostname(s1, &host, &hostlen); scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); - rc = dns_resolve_server_name_to_ip(unc, &ip, NULL); + rc = dns_resolve_server_name_to_ip(unc, (struct sockaddr *)&ss, NULL); if (rc < 0) { cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n", __func__, (int)hostlen, host); return true; } - if (!cifs_convert_address(&sa, ip, strlen(ip))) { - cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n", - __func__, ip); - } else { - cifs_server_lock(server); - match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa); - cifs_server_unlock(server); - } + cifs_server_lock(server); + match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss); + cifs_server_unlock(server); - kfree(ip); return match; } @@ -1373,23 +1340,19 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach } /* Refresh dfs referral of tcon and mark it for reconnect if needed */ -static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct cifs_tcon *tcon, - bool force_refresh) +static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_refresh) { - struct cifs_ses *ses; - struct cache_entry *ce; + struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + struct cifs_ses *ses = CIFS_DFS_ROOT_SES(tcon->ses); + struct cifs_tcon *ipc = ses->tcon_ipc; struct dfs_info3_param *refs = NULL; - int numrefs = 0; bool needs_refresh = false; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); - int rc = 0; + struct cache_entry *ce; unsigned int xid; + int numrefs = 0; + int rc = 0; - ses = find_ipc_from_server_path(sessions, path); - if (IS_ERR(ses)) { - cifs_dbg(FYI, "%s: could not find ipc session\n", __func__); - return PTR_ERR(ses); - } + xid = get_xid(); down_read(&htable_rw_lock); ce = lookup_cache_entry(path); @@ -1406,12 +1369,17 @@ static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct c goto out; } - xid = get_xid(); - rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - free_xid(xid); + spin_lock(&ipc->tc_lock); + if (ses->ses_status != SES_GOOD || ipc->status != TID_GOOD) { + spin_unlock(&ipc->tc_lock); + cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", __func__); + goto out; + } + spin_unlock(&ipc->tc_lock); - /* Create or update a cache entry with the new referral */ + rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); if (!rc) { + /* Create or update a cache entry with the new referral */ dump_refs(refs, numrefs); down_write(&htable_rw_lock); @@ -1426,24 +1394,20 @@ static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct c } out: + free_xid(xid); dfs_cache_free_tgts(&tl); free_dfs_info_array(refs, numrefs); return rc; } -static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh) +static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh) { struct TCP_Server_Info *server = tcon->ses->server; mutex_lock(&server->refpath_lock); - if (server->origin_fullpath) { - if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath, - server->origin_fullpath)) - __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh); - __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh); - } + if (server->leaf_fullpath) + __refresh_tcon(server->leaf_fullpath + 1, tcon, force_refresh); mutex_unlock(&server->refpath_lock); - return 0; } @@ -1461,9 +1425,6 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) { struct cifs_tcon *tcon; struct TCP_Server_Info *server; - struct mount_group *mg; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; - int rc; if (!cifs_sb || !cifs_sb->master_tlink) return -EINVAL; @@ -1480,21 +1441,6 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__); return -EINVAL; } - - mutex_lock(&mount_group_list_lock); - mg = find_mount_group_locked(&cifs_sb->dfs_mount_id); - if (IS_ERR(mg)) { - mutex_unlock(&mount_group_list_lock); - cifs_dbg(FYI, "%s: no ipc session for refreshing referral\n", __func__); - return PTR_ERR(mg); - } - kref_get(&mg->refcount); - mutex_unlock(&mount_group_list_lock); - - spin_lock(&mg->lock); - memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0])); - spin_unlock(&mg->lock); - /* * After reconnecting to a different server, unique ids won't match anymore, so we disable * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). @@ -1505,42 +1451,38 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) * that have different prefix paths. */ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; - rc = refresh_tcon(sessions, tcon, true); - kref_put(&mg->refcount, mount_group_release); - return rc; + return refresh_tcon(tcon, true); } /* - * Refresh all active dfs mounts regardless of whether they are in cache or not. - * (cache can be cleared) + * Worker that will refresh DFS cache from all active mounts based on lowest TTL value + * from a DFS referral. */ -static void refresh_mounts(struct cifs_ses **sessions) +static void refresh_cache_worker(struct work_struct *work) { struct TCP_Server_Info *server; - struct cifs_ses *ses; struct cifs_tcon *tcon, *ntcon; struct list_head tcons; + struct cifs_ses *ses; INIT_LIST_HEAD(&tcons); spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - spin_lock(&server->srv_lock); - if (!server->is_dfs_conn) { - spin_unlock(&server->srv_lock); + if (!server->leaf_fullpath) continue; - } - spin_unlock(&server->srv_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (ses->tcon_ipc) { + ses->ses_count++; + list_add_tail(&ses->tcon_ipc->ulist, &tcons); + } list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { - spin_lock(&tcon->tc_lock); - if (!tcon->ipc && !tcon->need_reconnect) { + if (!tcon->ipc) { tcon->tc_count++; list_add_tail(&tcon->ulist, &tcons); } - spin_unlock(&tcon->tc_lock); } } } @@ -1552,132 +1494,14 @@ static void refresh_mounts(struct cifs_ses **sessions) list_del_init(&tcon->ulist); mutex_lock(&server->refpath_lock); - if (server->origin_fullpath) { - if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath, - server->origin_fullpath)) - __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false); - __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false); - } + if (server->leaf_fullpath) + __refresh_tcon(server->leaf_fullpath + 1, tcon, false); mutex_unlock(&server->refpath_lock); - cifs_put_tcon(tcon); - } -} - -static void refresh_cache(struct cifs_ses **sessions) -{ - int i; - struct cifs_ses *ses; - unsigned int xid; - char *ref_paths[CACHE_MAX_ENTRIES]; - int count = 0; - struct cache_entry *ce; - - /* - * Refresh all cached entries. Get all new referrals outside critical section to avoid - * starvation while performing SMB2 IOCTL on broken or slow connections. - - * The cache entries may cover more paths than the active mounts - * (e.g. domain-based DFS referrals or multi tier DFS setups). - */ - down_read(&htable_rw_lock); - for (i = 0; i < CACHE_HTABLE_SIZE; i++) { - struct hlist_head *l = &cache_htable[i]; - - hlist_for_each_entry(ce, l, hlist) { - if (count == ARRAY_SIZE(ref_paths)) - goto out_unlock; - if (hlist_unhashed(&ce->hlist) || !cache_entry_expired(ce) || - IS_ERR(find_ipc_from_server_path(sessions, ce->path))) - continue; - ref_paths[count++] = kstrdup(ce->path, GFP_ATOMIC); - } - } - -out_unlock: - up_read(&htable_rw_lock); - - for (i = 0; i < count; i++) { - char *path = ref_paths[i]; - struct dfs_info3_param *refs = NULL; - int numrefs = 0; - int rc = 0; - - if (!path) - continue; - - ses = find_ipc_from_server_path(sessions, path); - if (IS_ERR(ses)) - goto next_referral; - - xid = get_xid(); - rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - free_xid(xid); - - if (!rc) { - down_write(&htable_rw_lock); - ce = lookup_cache_entry(path); - /* - * We need to re-check it because other tasks might have it deleted or - * updated. - */ - if (!IS_ERR(ce) && cache_entry_expired(ce)) - update_cache_entry_locked(ce, refs, numrefs); - up_write(&htable_rw_lock); - } - -next_referral: - kfree(path); - free_dfs_info_array(refs, numrefs); - } -} - -/* - * Worker that will refresh DFS cache and active mounts based on lowest TTL value from a DFS - * referral. - */ -static void refresh_cache_worker(struct work_struct *work) -{ - struct list_head mglist; - struct mount_group *mg, *tmp_mg; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; - int max_sessions = ARRAY_SIZE(sessions) - 1; - int i = 0, count; - - INIT_LIST_HEAD(&mglist); - - /* Get refereces of mount groups */ - mutex_lock(&mount_group_list_lock); - list_for_each_entry(mg, &mount_group_list, list) { - kref_get(&mg->refcount); - list_add(&mg->refresh_list, &mglist); - } - mutex_unlock(&mount_group_list_lock); - - /* Fill in local array with an NULL-terminated list of all referral server sessions */ - list_for_each_entry(mg, &mglist, refresh_list) { - if (i >= max_sessions) - break; - - spin_lock(&mg->lock); - if (i + mg->num_sessions > max_sessions) - count = max_sessions - i; + if (tcon->ipc) + cifs_put_smb_ses(tcon->ses); else - count = mg->num_sessions; - memcpy(&sessions[i], mg->sessions, count * sizeof(mg->sessions[0])); - spin_unlock(&mg->lock); - i += count; - } - - if (sessions[0]) { - /* Refresh all active mounts and cached entries */ - refresh_mounts(sessions); - refresh_cache(sessions); - } - - list_for_each_entry_safe(mg, tmp_mg, &mglist, refresh_list) { - list_del_init(&mg->refresh_list); - kref_put(&mg->refcount, mount_group_release); + cifs_put_tcon(tcon); } spin_lock(&cache_ttl_lock); diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index 52070d1df189..f7cff0be9327 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -38,7 +38,7 @@ int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref, int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *cp, int remap, const char *path, const struct dfs_cache_tgt_iterator *it); -int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it); +void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it); int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it, struct dfs_info3_param *ref); int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share, diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 8b1c37158556..ad4208bf1e32 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -78,14 +78,13 @@ build_path_from_dentry(struct dentry *direntry, void *page) prefix); } -char * -build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, - bool prefix) +char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, + const char *tree, int tree_len, + bool prefix) { int dfsplen; int pplen = 0; struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); char dirsep = CIFS_DIR_SEP(cifs_sb); char *s; @@ -93,7 +92,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, return ERR_PTR(-ENOMEM); if (prefix) - dfsplen = strnlen(tcon->tree_name, MAX_TREE_SIZE + 1); + dfsplen = strnlen(tree, tree_len + 1); else dfsplen = 0; @@ -123,7 +122,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, } if (dfsplen) { s -= dfsplen; - memcpy(s, tcon->tree_name, dfsplen); + memcpy(s, tree, dfsplen); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { int i; for (i = 0; i < dfsplen; i++) { @@ -135,6 +134,16 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, return s; } +char *build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, + bool prefix) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + + return __build_path_from_dentry_optional_prefix(direntry, page, tcon->tree_name, + MAX_TREE_SIZE, prefix); +} + /* * Don't allow path components longer than the server max. * Don't allow the separator character in a path component. diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 0458d28d71aa..8bf8978bc5d6 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -12,6 +12,7 @@ * */ +#include <linux/inet.h> #include <linux/slab.h> #include <linux/dns_resolver.h> #include "dns_resolve.h" @@ -25,17 +26,13 @@ * @ip_addr: Where to return the IP address. * @expiry: Where to return the expiry time for the dns record. * - * The IP address will be returned in string form, and the caller is - * responsible for freeing it. - * - * Returns length of result on success, -ve on error. + * Returns zero success, -ve on error. */ int -dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry) +dns_resolve_server_name_to_ip(const char *unc, struct sockaddr *ip_addr, time64_t *expiry) { - struct sockaddr_storage ss; const char *hostname, *sep; - char *name; + char *ip; int len, rc; if (!ip_addr || !unc) @@ -60,30 +57,32 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry) __func__, unc); /* Try to interpret hostname as an IPv4 or IPv6 address */ - rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len); - if (rc > 0) - goto name_is_IP_address; + rc = cifs_convert_address(ip_addr, hostname, len); + if (rc > 0) { + cifs_dbg(FYI, "%s: unc is IP, skipping dns upcall: %*.*s\n", __func__, len, len, + hostname); + return 0; + } /* Perform the upcall */ rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len, - NULL, ip_addr, expiry, false); - if (rc < 0) + NULL, &ip, expiry, false); + if (rc < 0) { cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); - else + } else { cifs_dbg(FYI, "%s: resolved: %*.*s to %s expiry %llu\n", - __func__, len, len, hostname, *ip_addr, + __func__, len, len, hostname, ip, expiry ? (*expiry) : 0); - return rc; -name_is_IP_address: - name = kmalloc(len + 1, GFP_KERNEL); - if (!name) - return -ENOMEM; - memcpy(name, hostname, len); - name[len] = 0; - cifs_dbg(FYI, "%s: unc is IP, skipping dns upcall: %s\n", - __func__, name); - *ip_addr = name; - return 0; + rc = cifs_convert_address(ip_addr, ip, strlen(ip)); + kfree(ip); + + if (!rc) { + cifs_dbg(FYI, "%s: unable to determine ip address\n", __func__); + rc = -EHOSTUNREACH; + } else + rc = 0; + } + return rc; } diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index afc0df381246..6eb0c15a2440 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h @@ -11,8 +11,10 @@ #ifndef _DNS_RESOLVE_H #define _DNS_RESOLVE_H +#include <linux/net.h> + #ifdef __KERNEL__ -extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry); +int dns_resolve_server_name_to_ip(const char *unc, struct sockaddr *ip_addr, time64_t *expiry); #endif /* KERNEL */ #endif /* _DNS_RESOLVE_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 87b56b1ae117..22dfc1f8b4f1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2646,6 +2646,21 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, return rc; } +static int +cifs_writepage_locked(struct page *page, struct writeback_control *wbc); + +static int cifs_write_one_page(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct address_space *mapping = data; + int ret; + + ret = cifs_writepage_locked(page, wbc); + unlock_page(page); + mapping_set_error(mapping, ret); + return ret; +} + static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -2662,10 +2677,11 @@ static int cifs_writepages(struct address_space *mapping, /* * If wsize is smaller than the page cache size, default to writing - * one page at a time via cifs_writepage + * one page at a time. */ if (cifs_sb->ctx->wsize < PAGE_SIZE) - return generic_writepages(mapping, wbc); + return write_cache_pages(mapping, wbc, cifs_write_one_page, + mapping); xid = get_xid(); if (wbc->range_cyclic) { @@ -2852,13 +2868,6 @@ retry_write: return rc; } -static int cifs_writepage(struct page *page, struct writeback_control *wbc) -{ - int rc = cifs_writepage_locked(page, wbc); - unlock_page(page); - return rc; -} - static int cifs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) @@ -5231,7 +5240,6 @@ static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) const struct address_space_operations cifs_addr_ops = { .read_folio = cifs_read_folio, .readahead = cifs_readahead, - .writepage = cifs_writepage, .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, @@ -5240,10 +5248,10 @@ const struct address_space_operations cifs_addr_ops = { .direct_IO = cifs_direct_io, .invalidate_folio = cifs_invalidate_folio, .launder_folio = cifs_launder_folio, + .migrate_folio = filemap_migrate_folio, /* - * TODO: investigate and if useful we could add an cifs_migratePage - * helper (under an CONFIG_MIGRATION) in the future, and also - * investigate and add an is_dirty_writeback helper if needed + * TODO: investigate and if useful we could add an is_dirty_writeback + * helper if needed */ .swap_activate = cifs_swap_activate, .swap_deactivate = cifs_swap_deactivate, @@ -5256,7 +5264,6 @@ const struct address_space_operations cifs_addr_ops = { */ const struct address_space_operations cifs_addr_ops_smallbuf = { .read_folio = cifs_read_folio, - .writepage = cifs_writepage, .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, @@ -5264,4 +5271,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { .release_folio = cifs_release_folio, .invalidate_folio = cifs_invalidate_folio, .launder_folio = cifs_launder_folio, + .migrate_folio = filemap_migrate_folio, }; diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 45119597c765..6d13f8207e96 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -308,7 +308,6 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx { memcpy(new_ctx, ctx, sizeof(*ctx)); new_ctx->prepath = NULL; - new_ctx->mount_options = NULL; new_ctx->nodename = NULL; new_ctx->username = NULL; new_ctx->password = NULL; @@ -317,11 +316,11 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx new_ctx->UNC = NULL; new_ctx->source = NULL; new_ctx->iocharset = NULL; + new_ctx->leaf_fullpath = NULL; /* * Make sure to stay in sync with smb3_cleanup_fs_context_contents() */ DUP_CTX_STR(prepath); - DUP_CTX_STR(mount_options); DUP_CTX_STR(username); DUP_CTX_STR(password); DUP_CTX_STR(server_hostname); @@ -330,6 +329,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(domainname); DUP_CTX_STR(nodename); DUP_CTX_STR(iocharset); + DUP_CTX_STR(leaf_fullpath); return 0; } @@ -569,17 +569,12 @@ static const struct fs_context_operations smb3_fs_context_ops = { static int smb3_fs_context_parse_monolithic(struct fs_context *fc, void *data) { - struct smb3_fs_context *ctx = smb3_fc2context(fc); char *options = data, *key; int ret = 0; if (!options) return 0; - ctx->mount_options = kstrdup(data, GFP_KERNEL); - if (ctx->mount_options == NULL) - return -ENOMEM; - ret = security_sb_eat_lsm_opts(options, &fc->security); if (ret) return ret; @@ -884,16 +879,21 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->nodfs = 1; break; case Opt_hard: - if (result.negated) + if (result.negated) { + if (ctx->retry == 1) + cifs_dbg(VFS, "conflicting hard vs. soft mount options\n"); ctx->retry = 0; - else + } else ctx->retry = 1; break; case Opt_soft: if (result.negated) ctx->retry = 1; - else + else { + if (ctx->retry == 1) + cifs_dbg(VFS, "conflicting hard vs soft mount options\n"); ctx->retry = 0; + } break; case Opt_mapposix: if (result.negated) @@ -1576,8 +1576,6 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) /* * Make sure this stays in sync with smb3_fs_context_dup() */ - kfree(ctx->mount_options); - ctx->mount_options = NULL; kfree(ctx->username); ctx->username = NULL; kfree_sensitive(ctx->password); @@ -1596,6 +1594,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) ctx->iocharset = NULL; kfree(ctx->prepath); ctx->prepath = NULL; + kfree(ctx->leaf_fullpath); + ctx->leaf_fullpath = NULL; } void diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index bbaee4c2281f..44cb5639ed3b 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -264,8 +264,7 @@ struct smb3_fs_context { __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */ bool rootfs:1; /* if it's a SMB root file system */ bool witness:1; /* use witness protocol */ - - char *mount_options; + char *leaf_fullpath; }; extern const struct fs_parameter_spec smb3_fs_parameters[]; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4e2ca3c6e5c0..f145a59af89b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -632,6 +632,8 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from POSIX info struct */ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_open_info_data *data, + struct cifs_sid *owner, + struct cifs_sid *group, struct super_block *sb, bool adjust_tz, bool symlink) { struct smb311_posix_qinfo *info = &data->posix_fi; @@ -680,8 +682,8 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_ope } /* else if reparse point ... TODO: add support for FIFO and blk dev; special file types */ - fattr->cf_uid = cifs_sb->ctx->linux_uid; /* TODO: map uid and gid from SID */ - fattr->cf_gid = cifs_sb->ctx->linux_gid; + sid_to_id(cifs_sb, owner, fattr, SIDOWNER); + sid_to_id(cifs_sb, group, fattr, SIDGROUP); cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n", fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink); @@ -991,12 +993,6 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path, } rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, &tmp_data, &adjust_tz, &is_reparse_point); -#ifdef CONFIG_CIFS_DFS_UPCALL - if (rc == -ENOENT && is_tcon_dfs(tcon)) - rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, - cifs_sb, - full_path); -#endif data = &tmp_data; } @@ -1175,6 +1171,7 @@ smb311_posix_get_inode_info(struct inode **inode, struct cifs_fattr fattr = {0}; bool symlink = false; struct cifs_open_info_data data = {}; + struct cifs_sid owner, group; int rc = 0; int tmprc = 0; @@ -1192,7 +1189,8 @@ smb311_posix_get_inode_info(struct inode **inode, goto out; } - rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, full_path, &data, &adjust_tz, + rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, full_path, &data, + &owner, &group, &adjust_tz, &symlink); /* @@ -1201,7 +1199,8 @@ smb311_posix_get_inode_info(struct inode **inode, switch (rc) { case 0: - smb311_posix_info_to_fattr(&fattr, &data, sb, adjust_tz, symlink); + smb311_posix_info_to_fattr(&fattr, &data, &owner, &group, + sb, adjust_tz, symlink); break; case -EREMOTE: /* DFS link, no metadata available on this server */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 3e68d8208cf5..4d3c586785a5 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1136,8 +1136,8 @@ cifs_free_hash(struct shash_desc **sdesc) * @len: Where to store the length for this page: * @offset: Where to store the offset for this page */ -void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, - unsigned int *len, unsigned int *offset) +void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page, + unsigned int *len, unsigned int *offset) { *len = rqst->rq_pagesz; *offset = (page == 0) ? rqst->rq_offset : 0; @@ -1258,44 +1258,28 @@ int match_target_ip(struct TCP_Server_Info *server, bool *result) { int rc; - char *target, *tip = NULL; - struct sockaddr tipaddr; + char *target; + struct sockaddr_storage ss; *result = false; target = kzalloc(share_len + 3, GFP_KERNEL); - if (!target) { - rc = -ENOMEM; - goto out; - } + if (!target) + return -ENOMEM; scnprintf(target, share_len + 3, "\\\\%.*s", (int)share_len, share); cifs_dbg(FYI, "%s: target name: %s\n", __func__, target + 2); - rc = dns_resolve_server_name_to_ip(target, &tip, NULL); - if (rc < 0) - goto out; - - cifs_dbg(FYI, "%s: target ip: %s\n", __func__, tip); + rc = dns_resolve_server_name_to_ip(target, (struct sockaddr *)&ss, NULL); + kfree(target); - if (!cifs_convert_address(&tipaddr, tip, strlen(tip))) { - cifs_dbg(VFS, "%s: failed to convert target ip address\n", - __func__); - rc = -EINVAL; - goto out; - } + if (rc < 0) + return rc; - *result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, - &tipaddr); + *result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss); cifs_dbg(FYI, "%s: ip addresses match: %u\n", __func__, *result); - rc = 0; - -out: - kfree(target); - kfree(tip); - - return rc; + return 0; } int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) @@ -1314,49 +1298,4 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; return 0; } - -/** cifs_dfs_query_info_nonascii_quirk - * Handle weird Windows SMB server behaviour. It responds with - * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request - * for "\<server>\<dfsname>\<linkpath>" DFS reference, - * where <dfsname> contains non-ASCII unicode symbols. - * - * Check such DFS reference. - */ -int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid, - struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, - const char *linkpath) -{ - char *treename, *dfspath, sep; - int treenamelen, linkpathlen, rc; - - treename = tcon->tree_name; - /* MS-DFSC: All paths in REQ_GET_DFS_REFERRAL and RESP_GET_DFS_REFERRAL - * messages MUST be encoded with exactly one leading backslash, not two - * leading backslashes. - */ - sep = CIFS_DIR_SEP(cifs_sb); - if (treename[0] == sep && treename[1] == sep) - treename++; - linkpathlen = strlen(linkpath); - treenamelen = strnlen(treename, MAX_TREE_SIZE + 1); - dfspath = kzalloc(treenamelen + linkpathlen + 1, GFP_KERNEL); - if (!dfspath) - return -ENOMEM; - if (treenamelen) - memcpy(dfspath, treename, treenamelen); - memcpy(dfspath + treenamelen, linkpath, linkpathlen); - rc = dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, - cifs_remap(cifs_sb), dfspath, NULL, NULL); - if (rc == 0) { - cifs_dbg(FYI, "DFS ref '%s' is found, emulate -EREMOTE\n", - dfspath); - rc = -EREMOTE; - } else { - cifs_dbg(FYI, "%s: dfs_cache_find returned %d\n", __func__, rc); - } - kfree(dfspath); - return rc; -} #endif diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index ffbd9a99fc12..ba6cc50af390 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -122,8 +122,8 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 struct smb2_hdr *hdr = err_iov.iov_base; if (unlikely(!err_iov.iov_base || err_buftype == CIFS_NO_BUFFER)) - rc = -ENOMEM; - else if (hdr->Status == STATUS_STOPPED_ON_SYMLINK) { + goto out; + if (hdr->Status == STATUS_STOPPED_ON_SYMLINK) { rc = smb2_parse_symlink_response(oparms->cifs_sb, &err_iov, &data->symlink_target); if (!rc) { diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 68e08c85fbb8..8521adf9ce79 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -59,6 +59,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, __u32 desired_access, __u32 create_disposition, __u32 create_options, umode_t mode, void *ptr, int command, struct cifsFileInfo *cfile, + __u8 **extbuf, size_t *extbuflen, struct kvec *err_iov, int *err_buftype) { struct cop_vars *vars = NULL; @@ -430,6 +431,21 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, &rsp_iov[1], sizeof(idata->posix_fi) /* add SIDs */, (char *)&idata->posix_fi); } + if (rc == 0) { + unsigned int length = le32_to_cpu(qi_rsp->OutputBufferLength); + + if (length > sizeof(idata->posix_fi)) { + char *base = (char *)rsp_iov[1].iov_base + + le16_to_cpu(qi_rsp->OutputBufferOffset) + + sizeof(idata->posix_fi); + *extbuflen = length - sizeof(idata->posix_fi); + *extbuf = kmemdup(base, *extbuflen, GFP_KERNEL); + if (!*extbuf) + rc = -ENOMEM; + } else { + rc = -EINVAL; + } + } if (rqst[1].rq_iov) SMB2_query_info_free(&rqst[1]); if (rqst[2].rq_iov) @@ -539,23 +555,43 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, SMB2_OP_QUERY_INFO, cfile, - err_iov, err_buftype); - if (rc == -EOPNOTSUPP) { - if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER && - ((struct smb2_hdr *)err_iov[0].iov_base)->Command == SMB2_CREATE && - ((struct smb2_hdr *)err_iov[0].iov_base)->Status == STATUS_STOPPED_ON_SYMLINK) { - rc = smb2_parse_symlink_response(cifs_sb, err_iov, &data->symlink_target); + NULL, NULL, err_iov, err_buftype); + if (rc) { + struct smb2_hdr *hdr = err_iov[0].iov_base; + + if (unlikely(!hdr || err_buftype[0] == CIFS_NO_BUFFER)) + goto out; + if (rc == -EOPNOTSUPP && hdr->Command == SMB2_CREATE && + hdr->Status == STATUS_STOPPED_ON_SYMLINK) { + rc = smb2_parse_symlink_response(cifs_sb, err_iov, + &data->symlink_target); if (rc) goto out; - } - *reparse = true; - create_options |= OPEN_REPARSE_POINT; - /* Failed on a symbolic link - query a reparse point info */ - cifs_get_readable_path(tcon, full_path, &cfile); - rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, - FILE_OPEN, create_options, ACL_NO_MODE, data, - SMB2_OP_QUERY_INFO, cfile, NULL, NULL); + *reparse = true; + create_options |= OPEN_REPARSE_POINT; + + /* Failed on a symbolic link - query a reparse point info */ + cifs_get_readable_path(tcon, full_path, &cfile); + rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, + FILE_READ_ATTRIBUTES, FILE_OPEN, + create_options, ACL_NO_MODE, data, + SMB2_OP_QUERY_INFO, cfile, NULL, NULL, + NULL, NULL); + goto out; + } else if (rc != -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && + hdr->Status == STATUS_OBJECT_NAME_INVALID) { + /* + * Handle weird Windows SMB server behaviour. It responds with + * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request + * for "\<server>\<dfsname>\<linkpath>" DFS reference, + * where <dfsname> contains non-ASCII unicode symbols. + */ + rc = -EREMOTE; + } + if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && cifs_sb && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)) + rc = -EOPNOTSUPP; } out: @@ -568,13 +604,20 @@ out: int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse) + struct cifs_open_info_data *data, + struct cifs_sid *owner, + struct cifs_sid *group, + bool *adjust_tz, bool *reparse) { int rc; __u32 create_options = 0; struct cifsFileInfo *cfile; struct kvec err_iov[3] = {}; int err_buftype[3] = {}; + __u8 *sidsbuf = NULL; + __u8 *sidsbuf_end = NULL; + size_t sidsbuflen = 0; + size_t owner_len, group_len; *adjust_tz = false; *reparse = false; @@ -589,7 +632,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, SMB2_OP_POSIX_QUERY_INFO, cfile, - err_iov, err_buftype); + &sidsbuf, &sidsbuflen, err_iov, err_buftype); if (rc == -EOPNOTSUPP) { /* BB TODO: When support for special files added to Samba re-verify this path */ if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER && @@ -606,10 +649,31 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, - SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL); + SMB2_OP_POSIX_QUERY_INFO, cfile, + &sidsbuf, &sidsbuflen, NULL, NULL); + } + + if (rc == 0) { + sidsbuf_end = sidsbuf + sidsbuflen; + + owner_len = posix_info_sid_size(sidsbuf, sidsbuf_end); + if (owner_len == -1) { + rc = -EINVAL; + goto out; + } + memcpy(owner, sidsbuf, owner_len); + + group_len = posix_info_sid_size( + sidsbuf + owner_len, sidsbuf_end); + if (group_len == -1) { + rc = -EINVAL; + goto out; + } + memcpy(group, sidsbuf + owner_len, group_len); } out: + kfree(sidsbuf); free_rsp_buf(err_buftype[0], err_iov[0].iov_base); free_rsp_buf(err_buftype[1], err_iov[1].iov_base); free_rsp_buf(err_buftype[2], err_iov[2].iov_base); @@ -624,7 +688,7 @@ smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode, return smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); } void @@ -646,7 +710,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE, - &data, SMB2_OP_SET_INFO, cfile, NULL, NULL); + &data, SMB2_OP_SET_INFO, cfile, NULL, NULL, NULL, NULL); if (tmprc == 0) cifs_i->cifsAttrs = dosattrs; } @@ -658,7 +722,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, drop_cached_dir_by_name(xid, tcon, name, cifs_sb); return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE, - NULL, SMB2_OP_RMDIR, NULL, NULL, NULL); + NULL, SMB2_OP_RMDIR, NULL, NULL, NULL, NULL, NULL); } int @@ -667,7 +731,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, { return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, - ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL); + ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL, NULL, NULL); } static int @@ -686,7 +750,7 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, } rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access, FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name, - command, cfile, NULL, NULL); + command, cfile, NULL, NULL, NULL, NULL); smb2_rename_path: kfree(smb2_to_name); return rc; @@ -727,7 +791,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); return smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE, - &eof, SMB2_OP_SET_EOF, cfile, NULL, NULL); + &eof, SMB2_OP_SET_EOF, cfile, NULL, NULL, NULL, NULL); } int @@ -754,7 +818,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile, - NULL, NULL); + NULL, NULL, NULL, NULL); cifs_put_tlink(tlink); return rc; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 32b3877b538a..dc160de7a6de 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -796,7 +796,9 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, int rc; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + int err_buftype = CIFS_NO_BUFFER; struct cifs_open_parms oparms; + struct kvec err_iov = {}; struct cifs_fid fid; struct cached_fid *cfid; @@ -820,14 +822,32 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, - NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, + &err_iov, &err_buftype); if (rc) { - kfree(utf16_path); - return rc; + struct smb2_hdr *hdr = err_iov.iov_base; + + if (unlikely(!hdr || err_buftype == CIFS_NO_BUFFER)) + goto out; + /* + * Handle weird Windows SMB server behaviour. It responds with + * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request + * for "\<server>\<dfsname>\<linkpath>" DFS reference, + * where <dfsname> contains non-ASCII unicode symbols. + */ + if (rc != -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && + hdr->Status == STATUS_OBJECT_NAME_INVALID) + rc = -EREMOTE; + if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && cifs_sb && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)) + rc = -EOPNOTSUPP; + goto out; } rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + +out: + free_rsp_buf(err_buftype, err_iov.iov_base); kfree(utf16_path); return rc; } @@ -4204,69 +4224,82 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8); } -/* We can not use the normal sg_set_buf() as we will sometimes pass a - * stack object as buf. - */ -static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, - unsigned int buflen) +static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst, + int num_rqst, const u8 *sig, u8 **iv, + struct aead_request **req, struct scatterlist **sgl, + unsigned int *num_sgs) { - void *addr; - /* - * VMAP_STACK (at least) puts stack into the vmalloc address space - */ - if (is_vmalloc_addr(buf)) - addr = vmalloc_to_page(buf); - else - addr = virt_to_page(buf); - sg_set_page(sg, addr, buflen, offset_in_page(buf)); + unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm); + unsigned int iv_size = crypto_aead_ivsize(tfm); + unsigned int len; + u8 *p; + + *num_sgs = cifs_get_num_sgs(rqst, num_rqst, sig); + + len = iv_size; + len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + len += req_size; + len = ALIGN(len, __alignof__(struct scatterlist)); + len += *num_sgs * sizeof(**sgl); + + p = kmalloc(len, GFP_ATOMIC); + if (!p) + return NULL; + + *iv = (u8 *)PTR_ALIGN(p, crypto_aead_alignmask(tfm) + 1); + *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size, + crypto_tfm_ctx_alignment()); + *sgl = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size, + __alignof__(struct scatterlist)); + return p; } -/* Assumes the first rqst has a transform header as the first iov. - * I.e. - * rqst[0].rq_iov[0] is transform header - * rqst[0].rq_iov[1+] data to be encrypted/decrypted - * rqst[1+].rq_iov[0+] data to be encrypted/decrypted - */ -static struct scatterlist * -init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) +static void *smb2_get_aead_req(struct crypto_aead *tfm, const struct smb_rqst *rqst, + int num_rqst, const u8 *sig, u8 **iv, + struct aead_request **req, struct scatterlist **sgl) { - unsigned int sg_len; + unsigned int off, len, skip; struct scatterlist *sg; - unsigned int i; - unsigned int j; - unsigned int idx = 0; - int skip; - - sg_len = 1; - for (i = 0; i < num_rqst; i++) - sg_len += rqst[i].rq_nvec + rqst[i].rq_npages; + unsigned int num_sgs; + unsigned long addr; + int i, j; + void *p; - sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL); - if (!sg) + p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, sgl, &num_sgs); + if (!p) return NULL; - sg_init_table(sg, sg_len); + sg_init_table(*sgl, num_sgs); + sg = *sgl; + + /* Assumes the first rqst has a transform header as the first iov. + * I.e. + * rqst[0].rq_iov[0] is transform header + * rqst[0].rq_iov[1+] data to be encrypted/decrypted + * rqst[1+].rq_iov[0+] data to be encrypted/decrypted + */ for (i = 0; i < num_rqst; i++) { + /* + * The first rqst has a transform header where the + * first 20 bytes are not part of the encrypted blob. + */ for (j = 0; j < rqst[i].rq_nvec; j++) { - /* - * The first rqst has a transform header where the - * first 20 bytes are not part of the encrypted blob - */ - skip = (i == 0) && (j == 0) ? 20 : 0; - smb2_sg_set_buf(&sg[idx++], - rqst[i].rq_iov[j].iov_base + skip, - rqst[i].rq_iov[j].iov_len - skip); - } + struct kvec *iov = &rqst[i].rq_iov[j]; + skip = (i == 0) && (j == 0) ? 20 : 0; + addr = (unsigned long)iov->iov_base + skip; + len = iov->iov_len - skip; + sg = cifs_sg_set_buf(sg, (void *)addr, len); + } for (j = 0; j < rqst[i].rq_npages; j++) { - unsigned int len, offset; - - rqst_page_get_length(&rqst[i], j, &len, &offset); - sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset); + rqst_page_get_length(&rqst[i], j, &len, &off); + sg_set_page(sg++, rqst[i].rq_pages[j], len, off); } } - smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE); - return sg; + cifs_sg_set_buf(sg, sig, SMB2_SIGNATURE_SIZE); + + return p; } static int @@ -4314,11 +4347,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, u8 sign[SMB2_SIGNATURE_SIZE] = {}; u8 key[SMB3_ENC_DEC_KEY_SIZE]; struct aead_request *req; - char *iv; - unsigned int iv_len; + u8 *iv; DECLARE_CRYPTO_WAIT(wait); struct crypto_aead *tfm; unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); + void *creq; rc = smb2_get_enc_key(server, le64_to_cpu(tr_hdr->SessionId), enc, key); if (rc) { @@ -4352,32 +4385,15 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, return rc; } - req = aead_request_alloc(tfm, GFP_KERNEL); - if (!req) { - cifs_server_dbg(VFS, "%s: Failed to alloc aead request\n", __func__); + creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg); + if (unlikely(!creq)) return -ENOMEM; - } if (!enc) { memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE); crypt_len += SMB2_SIGNATURE_SIZE; } - sg = init_sg(num_rqst, rqst, sign); - if (!sg) { - cifs_server_dbg(VFS, "%s: Failed to init sg\n", __func__); - rc = -ENOMEM; - goto free_req; - } - - iv_len = crypto_aead_ivsize(tfm); - iv = kzalloc(iv_len, GFP_KERNEL); - if (!iv) { - cifs_server_dbg(VFS, "%s: Failed to alloc iv\n", __func__); - rc = -ENOMEM; - goto free_sg; - } - if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) || (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE); @@ -4386,6 +4402,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE); } + aead_request_set_tfm(req, tfm); aead_request_set_crypt(req, sg, sg, crypt_len, iv); aead_request_set_ad(req, assoc_data_len); @@ -4398,11 +4415,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, if (!rc && enc) memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); - kfree_sensitive(iv); -free_sg: - kfree_sensitive(sg); -free_req: - kfree_sensitive(req); + kfree_sensitive(creq); return rc; } @@ -4445,21 +4458,27 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, int rc = -ENOMEM; for (i = 1; i < num_rqst; i++) { - npages = old_rq[i - 1].rq_npages; + struct smb_rqst *old = &old_rq[i - 1]; + struct smb_rqst *new = &new_rq[i]; + + orig_len += smb_rqst_len(server, old); + new->rq_iov = old->rq_iov; + new->rq_nvec = old->rq_nvec; + + npages = old->rq_npages; + if (!npages) + continue; + pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (!pages) goto err_free; - new_rq[i].rq_pages = pages; - new_rq[i].rq_npages = npages; - new_rq[i].rq_offset = old_rq[i - 1].rq_offset; - new_rq[i].rq_pagesz = old_rq[i - 1].rq_pagesz; - new_rq[i].rq_tailsz = old_rq[i - 1].rq_tailsz; - new_rq[i].rq_iov = old_rq[i - 1].rq_iov; - new_rq[i].rq_nvec = old_rq[i - 1].rq_nvec; - - orig_len += smb_rqst_len(server, &old_rq[i - 1]); + new->rq_pages = pages; + new->rq_npages = npages; + new->rq_offset = old->rq_offset; + new->rq_pagesz = old->rq_pagesz; + new->rq_tailsz = old->rq_tailsz; for (j = 0; j < npages; j++) { pages[j] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); @@ -4472,14 +4491,14 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, char *dst, *src; unsigned int offset, len; - rqst_page_get_length(&new_rq[i], j, &len, &offset); + rqst_page_get_length(new, j, &len, &offset); - dst = (char *) kmap(new_rq[i].rq_pages[j]) + offset; - src = (char *) kmap(old_rq[i - 1].rq_pages[j]) + offset; + dst = kmap_local_page(new->rq_pages[j]) + offset; + src = kmap_local_page(old->rq_pages[j]) + offset; memcpy(dst, src, len); - kunmap(new_rq[i].rq_pages[j]); - kunmap(old_rq[i - 1].rq_pages[j]); + kunmap(new->rq_pages[j]); + kunmap(old->rq_pages[j]); } } diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index be21b5d26f67..d5d7ffb7711c 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -277,7 +277,10 @@ extern int smb2_query_info_compound(const unsigned int xid, /* query path info from the server using SMB311 POSIX extensions*/ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse); + struct cifs_open_info_data *data, + struct cifs_sid *owner, + struct cifs_sid *group, + bool *adjust_tz, bool *reparse); int posix_info_parse(const void *beg, const void *end, struct smb2_posix_info_parsed *out); int posix_info_sid_size(const void *beg, const void *end); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 8b80ca0cd65f..4450721ec83c 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -645,6 +645,7 @@ static void add_sock(struct socket *sock, struct connection *con) if (dlm_config.ci_protocol == DLM_PROTO_SCTP) sk->sk_state_change = lowcomms_state_change; sk->sk_allocation = GFP_NOFS; + sk->sk_use_task_frag = false; sk->sk_error_report = lowcomms_error_report; release_sock(sk); } @@ -1769,6 +1770,7 @@ static int dlm_listen_for_all(void) listen_con.sock = sock; sock->sk->sk_allocation = GFP_NOFS; + sock->sk->sk_use_task_frag = false; sock->sk->sk_data_ready = lowcomms_listen_data_ready; release_sock(sock->sk); diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 0fc08fdcba73..1dfa67f307f1 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -33,10 +33,9 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, struct exfat_chain *p_dir, int entry, unsigned short *uniname) { int i; - struct exfat_entry_set_cache *es; + struct exfat_entry_set_cache es; - es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES); - if (!es) + if (exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES)) return; /* @@ -45,8 +44,8 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, * Third entry : first file-name entry * So, the index of first file-name dentry should start from 2. */ - for (i = 2; i < es->num_entries; i++) { - struct exfat_dentry *ep = exfat_get_dentry_cached(es, i); + for (i = ES_IDX_FIRST_FILENAME; i < es.num_entries; i++) { + struct exfat_dentry *ep = exfat_get_dentry_cached(&es, i); /* end of name entry */ if (exfat_get_entry_type(ep) != TYPE_EXTEND) @@ -56,13 +55,13 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, uniname += EXFAT_FILE_NAME_LEN; } - exfat_free_dentry_set(es, false); + exfat_put_dentry_set(&es, false); } /* read a directory entry from the opened directory */ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_entry *dir_entry) { - int i, dentries_per_clu, dentries_per_clu_bits = 0, num_ext; + int i, dentries_per_clu, num_ext; unsigned int type, clu_offset, max_dentries; struct exfat_chain dir, clu; struct exfat_uni_name uni_name; @@ -84,11 +83,10 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags); dentries_per_clu = sbi->dentries_per_clu; - dentries_per_clu_bits = ilog2(dentries_per_clu); max_dentries = (unsigned int)min_t(u64, MAX_EXFAT_DENTRIES, - (u64)sbi->num_clusters << dentries_per_clu_bits); + (u64)EXFAT_CLU_TO_DEN(sbi->num_clusters, sbi)); - clu_offset = dentry >> dentries_per_clu_bits; + clu_offset = EXFAT_DEN_TO_CLU(dentry, sbi); exfat_chain_dup(&clu, &dir); if (clu.flags == ALLOC_NO_FAT_CHAIN) { @@ -163,7 +161,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent dir_entry->entry = dentry; brelse(bh); - ei->hint_bmap.off = dentry >> dentries_per_clu_bits; + ei->hint_bmap.off = EXFAT_DEN_TO_CLU(dentry, sbi); ei->hint_bmap.clu = clu.dir; *cpos = EXFAT_DEN_TO_B(dentry + 1 + num_ext); @@ -337,7 +335,7 @@ int exfat_calc_num_entries(struct exfat_uni_name *p_uniname) return -EINVAL; /* 1 file entry + 1 stream entry + name entries */ - return ((len - 1) / EXFAT_FILE_NAME_LEN + 3); + return ES_ENTRY_NUM(len); } unsigned int exfat_get_entry_type(struct exfat_dentry *ep) @@ -592,18 +590,18 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) unsigned short chksum = 0; struct exfat_dentry *ep; - for (i = 0; i < es->num_entries; i++) { + for (i = ES_IDX_FILE; i < es->num_entries; i++) { ep = exfat_get_dentry_cached(es, i); chksum = exfat_calc_chksum16(ep, DENTRY_SIZE, chksum, chksum_type); chksum_type = CS_DEFAULT; } - ep = exfat_get_dentry_cached(es, 0); + ep = exfat_get_dentry_cached(es, ES_IDX_FILE); ep->dentry.file.checksum = cpu_to_le16(chksum); es->modified = true; } -int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) +int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync) { int i, err = 0; @@ -615,7 +613,10 @@ int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) bforget(es->bh[i]); else brelse(es->bh[i]); - kfree(es); + + if (IS_DYNAMIC_ES(es)) + kfree(es->bh); + return err; } @@ -812,14 +813,14 @@ struct exfat_dentry *exfat_get_dentry_cached( * pointer of entry set on success, * NULL on failure. */ -struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, - struct exfat_chain *p_dir, int entry, unsigned int type) +int exfat_get_dentry_set(struct exfat_entry_set_cache *es, + struct super_block *sb, struct exfat_chain *p_dir, int entry, + unsigned int type) { int ret, i, num_bh; - unsigned int off, byte_offset, clu = 0; + unsigned int off; sector_t sec; struct exfat_sb_info *sbi = EXFAT_SB(sb); - struct exfat_entry_set_cache *es; struct exfat_dentry *ep; int num_entries; enum exfat_validate_dentry_mode mode = ES_MODE_STARTED; @@ -827,52 +828,51 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, if (p_dir->dir == DIR_DELETED) { exfat_err(sb, "access to deleted dentry"); - return NULL; + return -EIO; } - byte_offset = EXFAT_DEN_TO_B(entry); - ret = exfat_walk_fat_chain(sb, p_dir, byte_offset, &clu); + ret = exfat_find_location(sb, p_dir, entry, &sec, &off); if (ret) - return NULL; + return ret; - es = kzalloc(sizeof(*es), GFP_KERNEL); - if (!es) - return NULL; + memset(es, 0, sizeof(*es)); es->sb = sb; es->modified = false; - - /* byte offset in cluster */ - byte_offset = EXFAT_CLU_OFFSET(byte_offset, sbi); - - /* byte offset in sector */ - off = EXFAT_BLK_OFFSET(byte_offset, sb); es->start_off = off; - - /* sector offset in cluster */ - sec = EXFAT_B_TO_BLK(byte_offset, sb); - sec += exfat_cluster_to_sector(sbi, clu); + es->bh = es->__bh; bh = sb_bread(sb, sec); if (!bh) - goto free_es; + return -EIO; es->bh[es->num_bh++] = bh; - ep = exfat_get_dentry_cached(es, 0); + ep = exfat_get_dentry_cached(es, ES_IDX_FILE); if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) - goto free_es; + goto put_es; num_entries = type == ES_ALL_ENTRIES ? ep->dentry.file.num_ext + 1 : type; es->num_entries = num_entries; num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb); + if (num_bh > ARRAY_SIZE(es->__bh)) { + es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_KERNEL); + if (!es->bh) { + brelse(bh); + return -ENOMEM; + } + es->bh[0] = bh; + } + for (i = 1; i < num_bh; i++) { /* get the next sector */ if (exfat_is_last_sector_in_cluster(sbi, sec)) { + unsigned int clu = exfat_sector_to_cluster(sbi, sec); + if (p_dir->flags == ALLOC_NO_FAT_CHAIN) clu++; else if (exfat_get_next_cluster(sb, &clu)) - goto free_es; + goto put_es; sec = exfat_cluster_to_sector(sbi, clu); } else { sec++; @@ -880,21 +880,51 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, bh = sb_bread(sb, sec); if (!bh) - goto free_es; + goto put_es; es->bh[es->num_bh++] = bh; } /* validate cached dentries */ - for (i = 1; i < num_entries; i++) { + for (i = ES_IDX_STREAM; i < num_entries; i++) { ep = exfat_get_dentry_cached(es, i); if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) - goto free_es; + goto put_es; } - return es; + return 0; + +put_es: + exfat_put_dentry_set(es, false); + return -EIO; +} -free_es: - exfat_free_dentry_set(es, false); - return NULL; +static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp) +{ + hint_femp->eidx = EXFAT_HINT_NONE; + hint_femp->count = 0; +} + +static inline void exfat_set_empty_hint(struct exfat_inode_info *ei, + struct exfat_hint_femp *candi_empty, struct exfat_chain *clu, + int dentry, int num_entries, int entry_type) +{ + if (ei->hint_femp.eidx == EXFAT_HINT_NONE || + ei->hint_femp.eidx > dentry) { + int total_entries = EXFAT_B_TO_DEN(i_size_read(&ei->vfs_inode)); + + if (candi_empty->count == 0) { + candi_empty->cur = *clu; + candi_empty->eidx = dentry; + } + + if (entry_type == TYPE_UNUSED) + candi_empty->count += total_entries - dentry; + else + candi_empty->count++; + + if (candi_empty->count == num_entries || + candi_empty->count + candi_empty->eidx == total_entries) + ei->hint_femp = *candi_empty; + } } enum { @@ -917,17 +947,21 @@ enum { */ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, - int num_entries, unsigned int type, struct exfat_hint *hint_opt) + struct exfat_hint *hint_opt) { int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len; int order, step, name_len = 0; - int dentries_per_clu, num_empty = 0; + int dentries_per_clu; unsigned int entry_type; unsigned short *uniname = NULL; struct exfat_chain clu; struct exfat_hint *hint_stat = &ei->hint_stat; struct exfat_hint_femp candi_empty; struct exfat_sb_info *sbi = EXFAT_SB(sb); + int num_entries = exfat_calc_num_entries(p_uniname); + + if (num_entries < 0) + return num_entries; dentries_per_clu = sbi->dentries_per_clu; @@ -939,10 +973,13 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, end_eidx = dentry; } - candi_empty.eidx = EXFAT_HINT_NONE; + exfat_reset_empty_hint(&ei->hint_femp); + rewind: order = 0; step = DIRENT_STEP_FILE; + exfat_reset_empty_hint(&candi_empty); + while (clu.dir != EXFAT_EOF_CLUSTER) { i = dentry & (dentries_per_clu - 1); for (; i < dentries_per_clu; i++, dentry++) { @@ -962,26 +999,9 @@ rewind: entry_type == TYPE_DELETED) { step = DIRENT_STEP_FILE; - num_empty++; - if (candi_empty.eidx == EXFAT_HINT_NONE && - num_empty == 1) { - exfat_chain_set(&candi_empty.cur, - clu.dir, clu.size, clu.flags); - } - - if (candi_empty.eidx == EXFAT_HINT_NONE && - num_empty >= num_entries) { - candi_empty.eidx = - dentry - (num_empty - 1); - WARN_ON(candi_empty.eidx < 0); - candi_empty.count = num_empty; - - if (ei->hint_femp.eidx == - EXFAT_HINT_NONE || - candi_empty.eidx <= - ei->hint_femp.eidx) - ei->hint_femp = candi_empty; - } + exfat_set_empty_hint(ei, &candi_empty, &clu, + dentry, num_entries, + entry_type); brelse(bh); if (entry_type == TYPE_UNUSED) @@ -989,17 +1009,14 @@ rewind: continue; } - num_empty = 0; - candi_empty.eidx = EXFAT_HINT_NONE; + exfat_reset_empty_hint(&candi_empty); if (entry_type == TYPE_FILE || entry_type == TYPE_DIR) { step = DIRENT_STEP_FILE; hint_opt->clu = clu.dir; hint_opt->eidx = i; - if (type == TYPE_ALL || type == entry_type) { - num_ext = ep->dentry.file.num_ext; - step = DIRENT_STEP_STRM; - } + num_ext = ep->dentry.file.num_ext; + step = DIRENT_STEP_STRM; brelse(bh); continue; } @@ -1090,12 +1107,19 @@ not_found: rewind = 1; dentry = 0; clu.dir = p_dir->dir; - /* reset empty hint */ - num_empty = 0; - candi_empty.eidx = EXFAT_HINT_NONE; goto rewind; } + /* + * set the EXFAT_EOF_CLUSTER flag to avoid search + * from the beginning again when allocated a new cluster + */ + if (ei->hint_femp.eidx == EXFAT_HINT_NONE) { + ei->hint_femp.cur.dir = EXFAT_EOF_CLUSTER; + ei->hint_femp.eidx = p_dir->size * dentries_per_clu; + ei->hint_femp.count = 0; + } + /* initialized hint_stat */ hint_stat->clu = p_dir->dir; hint_stat->eidx = 0; diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index a8f8eee4937c..bc6d21d7c5ad 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/ratelimit.h> #include <linux/nls.h> +#include <linux/blkdev.h> #define EXFAT_ROOT_INO 1 @@ -41,6 +42,14 @@ enum { #define ES_2_ENTRIES 2 #define ES_ALL_ENTRIES 0 +#define ES_IDX_FILE 0 +#define ES_IDX_STREAM 1 +#define ES_IDX_FIRST_FILENAME 2 +#define EXFAT_FILENAME_ENTRY_NUM(name_len) \ + DIV_ROUND_UP(name_len, EXFAT_FILE_NAME_LEN) +#define ES_IDX_LAST_FILENAME(name_len) \ + (ES_IDX_FIRST_FILENAME + EXFAT_FILENAME_ENTRY_NUM(name_len) - 1) + #define DIR_DELETED 0xFFFF0321 /* type values */ @@ -62,15 +71,11 @@ enum { #define TYPE_PADDING 0x0402 #define TYPE_ACLTAB 0x0403 #define TYPE_BENIGN_SEC 0x0800 -#define TYPE_ALL 0x0FFF #define MAX_CHARSET_SIZE 6 /* max size of multi-byte character */ #define MAX_NAME_LENGTH 255 /* max len of file name excluding NULL */ #define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH + 1) * MAX_CHARSET_SIZE) -/* Enough size to hold 256 dentry (even 512 Byte sector) */ -#define DIR_CACHE_SIZE (256*sizeof(struct exfat_dentry)/512+1) - #define EXFAT_HINT_NONE -1 #define EXFAT_MIN_SUBDIR 2 @@ -95,12 +100,18 @@ enum { /* * helpers for block size to dentry size conversion. */ -#define EXFAT_B_TO_DEN_IDX(b, sbi) \ - ((b) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) #define EXFAT_B_TO_DEN(b) ((b) >> DENTRY_SIZE_BITS) #define EXFAT_DEN_TO_B(b) ((b) << DENTRY_SIZE_BITS) /* + * helpers for cluster size to dentry size conversion. + */ +#define EXFAT_CLU_TO_DEN(clu, sbi) \ + ((clu) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) +#define EXFAT_DEN_TO_CLU(dentry, sbi) \ + ((dentry) >> ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) + +/* * helpers for fat entry. */ #define FAT_ENT_SIZE (4) @@ -125,6 +136,17 @@ enum { #define BITS_PER_BYTE_MASK 0x7 #define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1) +#define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1) +/* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */ +#define ES_MAX_ENTRY_NUM ES_ENTRY_NUM(MAX_NAME_LENGTH) + +/* + * 19 entries x 32 bytes/entry = 608 bytes. + * The 608 bytes are in 3 sectors at most (even 512 Byte sector). + */ +#define DIR_CACHE_SIZE \ + (DIV_ROUND_UP(EXFAT_DEN_TO_B(ES_MAX_ENTRY_NUM), SECTOR_SIZE) + 1) + struct exfat_dentry_namebuf { char *lfn; int lfnbuf_len; /* usually MAX_UNINAME_BUF_SIZE */ @@ -166,13 +188,16 @@ struct exfat_hint { struct exfat_entry_set_cache { struct super_block *sb; - bool modified; unsigned int start_off; int num_bh; - struct buffer_head *bh[DIR_CACHE_SIZE]; + struct buffer_head *__bh[DIR_CACHE_SIZE]; + struct buffer_head **bh; unsigned int num_entries; + bool modified; }; +#define IS_DYNAMIC_ES(es) ((es)->__bh != (es)->bh) + struct exfat_dir_entry { struct exfat_chain dir; int entry; @@ -375,7 +400,7 @@ static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi, sbi->data_start_sector; } -static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, +static inline unsigned int exfat_sector_to_cluster(struct exfat_sb_info *sbi, sector_t sec) { return ((sec - sbi->data_start_sector) >> sbi->sect_per_clus_bits) + @@ -423,8 +448,8 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); /* file.c */ extern const struct file_operations exfat_file_operations; -int __exfat_truncate(struct inode *inode, loff_t new_size); -void exfat_truncate(struct inode *inode, loff_t size); +int __exfat_truncate(struct inode *inode); +void exfat_truncate(struct inode *inode); int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *attr); int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path, @@ -464,15 +489,16 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es); int exfat_calc_num_entries(struct exfat_uni_name *p_uniname); int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, - int num_entries, unsigned int type, struct exfat_hint *hint_opt); + struct exfat_hint *hint_opt); int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu); struct exfat_dentry *exfat_get_dentry(struct super_block *sb, struct exfat_chain *p_dir, int entry, struct buffer_head **bh); struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, int num); -struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, - struct exfat_chain *p_dir, int entry, unsigned int type); -int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); +int exfat_get_dentry_set(struct exfat_entry_set_cache *es, + struct super_block *sb, struct exfat_chain *p_dir, int entry, + unsigned int type); +int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync); int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); /* inode.c */ diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 4e0793f35e8f..f5b29072775d 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -93,7 +93,7 @@ static int exfat_sanitize_mode(const struct exfat_sb_info *sbi, } /* resize the file length */ -int __exfat_truncate(struct inode *inode, loff_t new_size) +int __exfat_truncate(struct inode *inode) { unsigned int num_clusters_new, num_clusters_phys; unsigned int last_clu = EXFAT_FREE_CLUSTER; @@ -113,7 +113,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); - if (new_size > 0) { + if (i_size_read(inode) > 0) { /* * Truncate FAT chain num_clusters after the first cluster * num_clusters = min(new, phys); @@ -143,8 +143,6 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) ei->start_clu = EXFAT_EOF_CLUSTER; } - i_size_write(inode, new_size); - if (ei->type == TYPE_FILE) ei->attr |= ATTR_ARCHIVE; @@ -189,7 +187,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) return 0; } -void exfat_truncate(struct inode *inode, loff_t size) +void exfat_truncate(struct inode *inode) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -207,7 +205,7 @@ void exfat_truncate(struct inode *inode, loff_t size) goto write_size; } - err = __exfat_truncate(inode, i_size_read(inode)); + err = __exfat_truncate(inode); if (err) goto write_size; @@ -310,7 +308,7 @@ int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, * __exfat_write_inode() is called from exfat_truncate(), inode * is already written by it, so mark_inode_dirty() is unneeded. */ - exfat_truncate(inode, attr->ia_size); + exfat_truncate(inode); up_write(&EXFAT_I(inode)->truncate_lock); } else mark_inode_dirty(inode); diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index eac95bcd9a8a..5b644cb057fa 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -21,7 +21,7 @@ int __exfat_write_inode(struct inode *inode, int sync) { unsigned long long on_disk_size; struct exfat_dentry *ep, *ep2; - struct exfat_entry_set_cache *es = NULL; + struct exfat_entry_set_cache es; struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); @@ -42,11 +42,10 @@ int __exfat_write_inode(struct inode *inode, int sync) exfat_set_volume_dirty(sb); /* get the directory entry of given file or directory */ - es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); - if (!es) + if (exfat_get_dentry_set(&es, sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES)) return -EIO; - ep = exfat_get_dentry_cached(es, 0); - ep2 = exfat_get_dentry_cached(es, 1); + ep = exfat_get_dentry_cached(&es, ES_IDX_FILE); + ep2 = exfat_get_dentry_cached(&es, ES_IDX_STREAM); ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode)); @@ -83,8 +82,8 @@ int __exfat_write_inode(struct inode *inode, int sync) ep2->dentry.stream.start_clu = EXFAT_FREE_CLUSTER; } - exfat_update_dir_chksum_with_entry_set(es); - return exfat_free_dentry_set(es, sync); + exfat_update_dir_chksum_with_entry_set(&es); + return exfat_put_dentry_set(&es, sync); } int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -358,7 +357,7 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to) if (to > i_size_read(inode)) { truncate_pagecache(inode, i_size_read(inode)); inode->i_mtime = inode->i_ctime = current_time(inode); - exfat_truncate(inode, EXFAT_I(inode)->i_size_aligned); + exfat_truncate(inode); } } @@ -622,7 +621,7 @@ void exfat_evict_inode(struct inode *inode) if (!inode->i_nlink) { i_size_write(inode, 0); mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); - __exfat_truncate(inode, 0); + __exfat_truncate(inode); mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); } diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index b617bebc3d0f..5f995eba5dbb 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -224,11 +224,18 @@ static int exfat_search_empty_slot(struct super_block *sb, if (hint_femp->eidx != EXFAT_HINT_NONE) { dentry = hint_femp->eidx; - if (num_entries <= hint_femp->count) { - hint_femp->eidx = EXFAT_HINT_NONE; - return dentry; - } + /* + * If hint_femp->count is enough, it is needed to check if + * there are actual empty entries. + * Otherwise, and if "dentry + hint_famp->count" is also equal + * to "p_dir->size * dentries_per_clu", it means ENOSPC. + */ + if (dentry + hint_femp->count == p_dir->size * dentries_per_clu && + num_entries > hint_femp->count) + return -ENOSPC; + + hint_femp->eidx = EXFAT_HINT_NONE; exfat_chain_dup(&clu, &hint_femp->cur); } else { exfat_chain_dup(&clu, p_dir); @@ -293,6 +300,12 @@ static int exfat_search_empty_slot(struct super_block *sb, } } + hint_femp->eidx = p_dir->size * dentries_per_clu - num_empty; + hint_femp->count = num_empty; + if (num_empty == 0) + exfat_chain_set(&hint_femp->cur, EXFAT_EOF_CLUSTER, 0, + clu.flags); + return -ENOSPC; } @@ -369,15 +382,11 @@ static int exfat_find_empty_entry(struct inode *inode, if (exfat_ent_set(sb, last_clu, clu.dir)) return -EIO; - if (hint_femp.eidx == EXFAT_HINT_NONE) { - /* the special case that new dentry - * should be allocated from the start of new cluster - */ - hint_femp.eidx = EXFAT_B_TO_DEN_IDX(p_dir->size, sbi); - hint_femp.count = sbi->dentries_per_clu; - + if (hint_femp.cur.dir == EXFAT_EOF_CLUSTER) exfat_chain_set(&hint_femp.cur, clu.dir, 0, clu.flags); - } + + hint_femp.count += sbi->dentries_per_clu; + hint_femp.cur.size++; p_dir->size++; size = EXFAT_CLU_TO_B(p_dir->size, sbi); @@ -588,14 +597,14 @@ unlock: static int exfat_find(struct inode *dir, struct qstr *qname, struct exfat_dir_entry *info) { - int ret, dentry, num_entries, count; + int ret, dentry, count; struct exfat_chain cdir; struct exfat_uni_name uni_name; struct super_block *sb = dir->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(dir); struct exfat_dentry *ep, *ep2; - struct exfat_entry_set_cache *es; + struct exfat_entry_set_cache es; /* for optimized dir & entry to prevent long traverse of cluster chain */ struct exfat_hint hint_opt; @@ -607,10 +616,6 @@ static int exfat_find(struct inode *dir, struct qstr *qname, if (ret) return ret; - num_entries = exfat_calc_num_entries(&uni_name); - if (num_entries < 0) - return num_entries; - /* check the validation of hint_stat and initialize it if required */ if (ei->version != (inode_peek_iversion_raw(dir) & 0xffffffff)) { ei->hint_stat.clu = cdir.dir; @@ -620,9 +625,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, } /* search the file name for directories */ - dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, - num_entries, TYPE_ALL, &hint_opt); - + dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, &hint_opt); if (dentry < 0) return dentry; /* -error value */ @@ -635,11 +638,10 @@ static int exfat_find(struct inode *dir, struct qstr *qname, if (cdir.flags & ALLOC_NO_FAT_CHAIN) cdir.size -= dentry / sbi->dentries_per_clu; dentry = hint_opt.eidx; - es = exfat_get_dentry_set(sb, &cdir, dentry, ES_2_ENTRIES); - if (!es) + if (exfat_get_dentry_set(&es, sb, &cdir, dentry, ES_2_ENTRIES)) return -EIO; - ep = exfat_get_dentry_cached(es, 0); - ep2 = exfat_get_dentry_cached(es, 1); + ep = exfat_get_dentry_cached(&es, ES_IDX_FILE); + ep2 = exfat_get_dentry_cached(&es, ES_IDX_STREAM); info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); @@ -668,7 +670,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, ep->dentry.file.access_time, ep->dentry.file.access_date, 0); - exfat_free_dentry_set(es, false); + exfat_put_dentry_set(&es, false); if (ei->start_clu == EXFAT_FREE_CLUSTER) { exfat_fs_error(sb, @@ -1167,7 +1169,7 @@ static int __exfat_rename(struct inode *old_parent_inode, struct exfat_inode_info *new_ei = NULL; unsigned int new_entry_type = TYPE_UNUSED; int new_entry = 0; - struct buffer_head *old_bh, *new_bh = NULL; + struct buffer_head *new_bh = NULL; /* check the validity of pointer parameters */ if (new_path == NULL || strlen(new_path) == 0) @@ -1183,13 +1185,6 @@ static int __exfat_rename(struct inode *old_parent_inode, EXFAT_I(old_parent_inode)->flags); dentry = ei->entry; - ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh); - if (!ep) { - ret = -EIO; - goto out; - } - brelse(old_bh); - /* check whether new dir is existing directory and empty */ if (new_inode) { ret = -EIO; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 16a343e8047d..260c1b3e3ef2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1225,7 +1225,7 @@ static void ext4_put_super(struct super_block *sb) } ext4_es_unregister_shrinker(sbi); - del_timer_sync(&sbi->s_err_report); + timer_shutdown_sync(&sbi->s_err_report); ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9958d4020771..6fba5a52127b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -121,6 +121,7 @@ static bool inode_io_list_move_locked(struct inode *inode, { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + WARN_ON_ONCE(inode->i_state & I_FREEING); list_move(&inode->i_io_list, head); @@ -280,6 +281,7 @@ static void inode_cgwb_move_to_attached(struct inode *inode, { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; if (wb != &wb->bdi->wb) @@ -1129,6 +1131,7 @@ static void inode_cgwb_move_to_attached(struct inode *inode, { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); @@ -1294,6 +1297,17 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&inode->i_lock); + inode->i_state &= ~I_SYNC_QUEUED; + /* + * When the inode is being freed just don't bother with dirty list + * tracking. Flush worker will ignore this inode anyway and it will + * trigger assertions in inode_io_list_move_locked(). + */ + if (inode->i_state & I_FREEING) { + list_del_init(&inode->i_io_list); + wb_io_lists_depopulated(wb); + return; + } if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -1302,7 +1316,6 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) inode->dirtied_when = jiffies; } inode_io_list_move_locked(inode, wb, &wb->b_dirty); - inode->i_state &= ~I_SYNC_QUEUED; } static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) @@ -1345,8 +1358,6 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) return ret; } -#define EXPIRE_DIRTY_ATIME 0x0001 - /* * Move expired (dirtied before dirtied_before) dirty inodes from * @delaying_queue to @dispatch_queue. diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 05bee80ac7de..e782b4f1d104 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -427,8 +427,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) return error; kaddr = kmap_atomic(page); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap_atomic(kaddr); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3bdb2c668a71..e7537fd305dd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, void *kaddr = kmap(page); u64 dsize = i_size_read(inode); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); - memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap(page); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 60c6fb91fb58..eea5be4fbf0e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1445,14 +1445,13 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) { - struct gfs2_glock *gl = fl_gh->gh_gl; + struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl); /* * Make sure gfs2_glock_put() won't sleep under the file->f_lock * spinlock. */ - gfs2_glock_hold(gl); spin_lock(&file->f_lock); gfs2_holder_uninit(fl_gh); spin_unlock(&file->f_lock); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index df335c258eb0..524f3c96b9a4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -186,10 +186,11 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -void gfs2_glock_hold(struct gfs2_glock *gl) +struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) { GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); lockref_get(&gl->gl_lockref); + return gl; } /** @@ -205,12 +206,6 @@ static int demote_ok(const struct gfs2_glock *gl) if (gl->gl_state == LM_ST_UNLOCKED) return 0; - /* - * Note that demote_ok is used for the lru process of disposing of - * glocks. For this purpose, we don't care if the glock's holders - * have the HIF_MAY_DEMOTE flag set or not. If someone is using - * them, don't demote. - */ if (!list_empty(&gl->gl_holders)) return 0; if (glops->go_demote_ok) @@ -393,7 +388,7 @@ static void do_error(struct gfs2_glock *gl, const int ret) struct gfs2_holder *gh, *tmp; list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (!test_bit(HIF_WAIT, &gh->gh_iflags)) + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; if (ret & LM_OUT_ERROR) gh->gh_error = -EIO; @@ -408,45 +403,6 @@ static void do_error(struct gfs2_glock *gl, const int ret) } /** - * demote_incompat_holders - demote incompatible demoteable holders - * @gl: the glock we want to promote - * @current_gh: the newly promoted holder - * - * We're passing the newly promoted holder in @current_gh, but actually, any of - * the strong holders would do. - */ -static void demote_incompat_holders(struct gfs2_glock *gl, - struct gfs2_holder *current_gh) -{ - struct gfs2_holder *gh, *tmp; - - /* - * Demote incompatible holders before we make ourselves eligible. - * (This holder may or may not allow auto-demoting, but we don't want - * to demote the new holder before it's even granted.) - */ - list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - /* - * Since holders are at the front of the list, we stop when we - * find the first non-holder. - */ - if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) - return; - if (gh == current_gh) - continue; - if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) && - !may_grant(gl, current_gh, gh)) { - /* - * We should not recurse into do_promote because - * __gfs2_glock_dq only calls handle_callback, - * gfs2_glock_add_to_lru and __gfs2_glock_queue_work. - */ - __gfs2_glock_dq(gh); - } - } -} - -/** * find_first_holder - find the first "holder" gh * @gl: the glock */ @@ -464,26 +420,6 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) return NULL; } -/** - * find_first_strong_holder - find the first non-demoteable holder - * @gl: the glock - * - * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set. - */ -static inline struct gfs2_holder * -find_first_strong_holder(struct gfs2_glock *gl) -{ - struct gfs2_holder *gh; - - list_for_each_entry(gh, &gl->gl_holders, gh_list) { - if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) - return NULL; - if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags)) - return gh; - } - return NULL; -} - /* * gfs2_instantiate - Call the glops instantiate function * @gh: The glock holder @@ -540,9 +476,8 @@ done: static int do_promote(struct gfs2_glock *gl) { struct gfs2_holder *gh, *current_gh; - bool incompat_holders_demoted = false; - current_gh = find_first_strong_holder(gl); + current_gh = find_first_holder(gl); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; @@ -561,11 +496,8 @@ static int do_promote(struct gfs2_glock *gl) set_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_promote(gh); gfs2_holder_wake(gh); - if (!incompat_holders_demoted) { + if (!current_gh) current_gh = gh; - demote_incompat_holders(gl, current_gh); - incompat_holders_demoted = true; - } } return 0; } @@ -927,6 +859,48 @@ out_unlock: return; } +/** + * glock_set_object - set the gl_object field of a glock + * @gl: the glock + * @object: the object + */ +void glock_set_object(struct gfs2_glock *gl, void *object) +{ + void *prev_object; + + spin_lock(&gl->gl_lockref.lock); + prev_object = gl->gl_object; + gl->gl_object = object; + spin_unlock(&gl->gl_lockref.lock); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) { + pr_warn("glock=%u/%llx\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + gfs2_dump_glock(NULL, gl, true); + } +} + +/** + * glock_clear_object - clear the gl_object field of a glock + * @gl: the glock + */ +void glock_clear_object(struct gfs2_glock *gl, void *object) +{ + void *prev_object; + + spin_lock(&gl->gl_lockref.lock); + prev_object = gl->gl_object; + gl->gl_object = NULL; + spin_unlock(&gl->gl_lockref.lock); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, + prev_object == object || prev_object == NULL)) { + pr_warn("glock=%u/%llx\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + gfs2_dump_glock(NULL, gl, true); + } +} + void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) { struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; @@ -980,8 +954,6 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ip = NULL; spin_unlock(&gl->gl_lockref.lock); if (ip) { - struct gfs2_glock *inode_gl = NULL; - gl->gl_no_formal_ino = ip->i_no_formal_ino; set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode); @@ -991,14 +963,14 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; if (ip) { - inode_gl = ip->i_gl; - lockref_get(&inode_gl->gl_lockref); clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + if (!igrab(&ip->i_inode)) + ip = NULL; } spin_unlock(&gl->gl_lockref.lock); - if (inode_gl) { - gfs2_glock_poke(inode_gl); - gfs2_glock_put(inode_gl); + if (ip) { + gfs2_glock_poke(ip->i_gl); + iput(&ip->i_inode); } evicted = !ip; } @@ -1039,6 +1011,7 @@ static void delete_work_func(struct work_struct *work) if (gfs2_queue_delete_work(gl, 5 * HZ)) return; } + goto out; } inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, @@ -1051,6 +1024,7 @@ static void delete_work_func(struct work_struct *work) d_prune_aliases(inode); iput(inode); } +out: gfs2_glock_put(gl); } @@ -1256,13 +1230,12 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh, unsigned long ip) { INIT_LIST_HEAD(&gh->gh_list); - gh->gh_gl = gl; + gh->gh_gl = gfs2_glock_hold(gl); gh->gh_ip = ip; gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; gh->gh_iflags = 0; - gfs2_glock_hold(gl); } /** @@ -1496,7 +1469,7 @@ __acquires(&gl->gl_lockref.lock) if (test_bit(GLF_LOCK, &gl->gl_flags)) { struct gfs2_holder *current_gh; - current_gh = find_first_strong_holder(gl); + current_gh = find_first_holder(gl); try_futile = !may_grant(gl, current_gh, gh); } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) @@ -1508,8 +1481,6 @@ __acquires(&gl->gl_lockref.lock) continue; if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK) continue; - if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)) - continue; if (!pid_is_meaningful(gh2)) continue; goto trap_recursive; @@ -1619,69 +1590,28 @@ static inline bool needs_demote(struct gfs2_glock *gl) static void __gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned delay = 0; int fast_path = 0; /* - * This while loop is similar to function demote_incompat_holders: - * If the glock is due to be demoted (which may be from another node - * or even if this holder is GL_NOCACHE), the weak holders are - * demoted as well, allowing the glock to be demoted. + * This holder should not be cached, so mark it for demote. + * Note: this should be done before the check for needs_demote + * below. */ - while (gh) { - /* - * If we're in the process of file system withdraw, we cannot - * just dequeue any glocks until our journal is recovered, lest - * we introduce file system corruption. We need two exceptions - * to this rule: We need to allow unlocking of nondisk glocks - * and the glock for our own journal that needs recovery. - */ - if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && - glock_blocked_by_withdraw(gl) && - gh->gh_gl != sdp->sd_jinode_gl) { - sdp->sd_glock_dqs_held++; - spin_unlock(&gl->gl_lockref.lock); - might_sleep(); - wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, - TASK_UNINTERRUPTIBLE); - spin_lock(&gl->gl_lockref.lock); - } + if (gh->gh_flags & GL_NOCACHE) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); - /* - * This holder should not be cached, so mark it for demote. - * Note: this should be done before the check for needs_demote - * below. - */ - if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); - - list_del_init(&gh->gh_list); - clear_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_glock_queue(gh, 0); + list_del_init(&gh->gh_list); + clear_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_glock_queue(gh, 0); - /* - * If there hasn't been a demote request we are done. - * (Let the remaining holders, if any, keep holding it.) - */ - if (!needs_demote(gl)) { - if (list_empty(&gl->gl_holders)) - fast_path = 1; - break; - } - /* - * If we have another strong holder (we cannot auto-demote) - * we are done. It keeps holding it until it is done. - */ - if (find_first_strong_holder(gl)) - break; - - /* - * If we have a weak holder at the head of the list, it - * (and all others like it) must be auto-demoted. If there - * are no more weak holders, we exit the while loop. - */ - gh = find_first_holder(gl); + /* + * If there hasn't been a demote request we are done. + * (Let the remaining holders, if any, keep holding it.) + */ + if (!needs_demote(gl)) { + if (list_empty(&gl->gl_holders)) + fast_path = 1; } if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) @@ -1705,8 +1635,17 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; spin_lock(&gl->gl_lockref.lock); + if (!gfs2_holder_queued(gh)) { + /* + * May have already been dequeued because the locking request + * was GL_ASYNC and it has failed in the meantime. + */ + goto out; + } + if (list_is_first(&gh->gh_list, &gl->gl_holders) && !test_bit(HIF_HOLDER, &gh->gh_iflags)) { spin_unlock(&gl->gl_lockref.lock); @@ -1715,7 +1654,26 @@ void gfs2_glock_dq(struct gfs2_holder *gh) spin_lock(&gl->gl_lockref.lock); } + /* + * If we're in the process of file system withdraw, we cannot just + * dequeue any glocks until our journal is recovered, lest we introduce + * file system corruption. We need two exceptions to this rule: We need + * to allow unlocking of nondisk glocks and the glock for our own + * journal that needs recovery. + */ + if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && + glock_blocked_by_withdraw(gl) && + gh->gh_gl != sdp->sd_jinode_gl) { + sdp->sd_glock_dqs_held++; + spin_unlock(&gl->gl_lockref.lock); + might_sleep(); + wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, + TASK_UNINTERRUPTIBLE); + spin_lock(&gl->gl_lockref.lock); + } + __gfs2_glock_dq(gh); +out: spin_unlock(&gl->gl_lockref.lock); } @@ -1888,33 +1846,6 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) delay = gl->gl_hold_time; } - /* - * Note 1: We cannot call demote_incompat_holders from handle_callback - * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq -> - * handle_callback -> demote_incompat_holders -> gfs2_glock_dq - * Plus, we only want to demote the holders if the request comes from - * a remote cluster node because local holder conflicts are resolved - * elsewhere. - * - * Note 2: if a remote node wants this glock in EX mode, lock_dlm will - * request that we set our state to UNLOCKED. Here we mock up a holder - * to make it look like someone wants the lock EX locally. Any SH - * and DF requests should be able to share the lock without demoting. - * - * Note 3: We only want to demote the demoteable holders when there - * are no more strong holders. The demoteable holders might as well - * keep the glock until the last strong holder is done with it. - */ - if (!find_first_strong_holder(gl)) { - struct gfs2_holder mock_gh = { - .gh_gl = gl, - .gh_state = (state == LM_ST_UNLOCKED) ? - LM_ST_EXCLUSIVE : state, - .gh_iflags = BIT(HIF_HOLDER) - }; - - demote_incompat_holders(gl, &mock_gh); - } handle_callback(gl, state, delay, true); __gfs2_glock_queue_work(gl, delay); spin_unlock(&gl->gl_lockref.lock); @@ -2306,8 +2237,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) *p++ = 'H'; if (test_bit(HIF_WAIT, &iflags)) *p++ = 'W'; - if (test_bit(HIF_MAY_DEMOTE, &iflags)) - *p++ = 'D'; if (flags & GL_SKIP) *p++ = 's'; *p = 0; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 0d068f4fd7d6..f37ac087e2c1 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -156,8 +156,6 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) break; - if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags)) - continue; if (gh->gh_owner_pid == pid) goto out; } @@ -196,7 +194,7 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); -extern void gfs2_glock_hold(struct gfs2_glock *gl); +extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_glock_queue_put(struct gfs2_glock *gl); @@ -288,6 +286,9 @@ extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); extern void gfs2_register_debugfs(void); extern void gfs2_unregister_debugfs(void); +extern void glock_set_object(struct gfs2_glock *gl, void *object); +extern void glock_clear_object(struct gfs2_glock *gl, void *object); + extern const struct lm_lockops gfs2_dlm_ops; static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh) @@ -305,64 +306,6 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh) return !list_empty(&gh->gh_list); } -/** - * glock_set_object - set the gl_object field of a glock - * @gl: the glock - * @object: the object - */ -static inline void glock_set_object(struct gfs2_glock *gl, void *object) -{ - spin_lock(&gl->gl_lockref.lock); - if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL)) - gfs2_dump_glock(NULL, gl, true); - gl->gl_object = object; - spin_unlock(&gl->gl_lockref.lock); -} - -/** - * glock_clear_object - clear the gl_object field of a glock - * @gl: the glock - * @object: the object - * - * I'd love to similarly add this: - * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object)) - * gfs2_dump_glock(NULL, gl, true); - * Unfortunately, that's not possible because as soon as gfs2_delete_inode - * frees the block in the rgrp, another process can reassign it for an I_NEW - * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget. - * That means gfs2_delete_inode may subsequently try to call this function - * for a glock that's already pointing to a brand new inode. If we clear the - * new inode's gl_object, we'll introduce metadata corruption. Function - * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also - * tries to clear gl_object, so it's more than just gfs2_delete_inode. - * - */ -static inline void glock_clear_object(struct gfs2_glock *gl, void *object) -{ - spin_lock(&gl->gl_lockref.lock); - if (gl->gl_object == object) - gl->gl_object = NULL; - spin_unlock(&gl->gl_lockref.lock); -} - -static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - spin_lock(&gl->gl_lockref.lock); - set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags); - spin_unlock(&gl->gl_lockref.lock); -} - -static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - spin_lock(&gl->gl_lockref.lock); - clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags); - spin_unlock(&gl->gl_lockref.lock); -} - extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 49210a2e7ce7..d78b61ecc1cd 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -397,38 +397,39 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) struct timespec64 atime; u16 height, depth; umode_t mode = be32_to_cpu(str->di_mode); - bool is_new = ip->i_inode.i_state & I_NEW; + struct inode *inode = &ip->i_inode; + bool is_new = inode->i_state & I_NEW; if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) goto corrupt; - if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode))) + if (unlikely(!is_new && inode_wrong_type(inode, mode))) goto corrupt; ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); - ip->i_inode.i_mode = mode; + inode->i_mode = mode; if (is_new) { - ip->i_inode.i_rdev = 0; + inode->i_rdev = 0; switch (mode & S_IFMT) { case S_IFBLK: case S_IFCHR: - ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), - be32_to_cpu(str->di_minor)); + inode->i_rdev = MKDEV(be32_to_cpu(str->di_major), + be32_to_cpu(str->di_minor)); break; } } - i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); - i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); - set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); - i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); - gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); + i_uid_write(inode, be32_to_cpu(str->di_uid)); + i_gid_write(inode, be32_to_cpu(str->di_gid)); + set_nlink(inode, be32_to_cpu(str->di_nlink)); + i_size_write(inode, be64_to_cpu(str->di_size)); + gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); - if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0) - ip->i_inode.i_atime = atime; - ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); - ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); - ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + if (timespec64_compare(&inode->i_atime, &atime) < 0) + inode->i_atime = atime; + inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime); + inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); + inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime); + inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); ip->i_goal = be64_to_cpu(str->di_goal_meta); ip->i_generation = be64_to_cpu(str->di_generation); @@ -436,7 +437,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_diskflags = be32_to_cpu(str->di_flags); ip->i_eattr = be64_to_cpu(str->di_eattr); /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ - gfs2_set_inode_flags(&ip->i_inode); + gfs2_set_inode_flags(inode); height = be16_to_cpu(str->di_height); if (unlikely(height > GFS2_MAX_META_HEIGHT)) goto corrupt; @@ -448,8 +449,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); - if (S_ISREG(ip->i_inode.i_mode)) - gfs2_set_aops(&ip->i_inode); + if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) + goto corrupt; + + if (S_ISREG(inode->i_mode)) + gfs2_set_aops(inode); return 0; corrupt: diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d09d9892cd05..c26765080f28 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -252,7 +252,6 @@ struct gfs2_lkstats { enum { /* States */ - HIF_MAY_DEMOTE = 1, HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ HIF_WAIT = 10, }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1371e067d2a7..614db3055c02 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -142,6 +142,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (unlikely(error)) goto fail; + /* + * The only caller that sets @blktype to GFS2_BLKST_UNLINKED is + * delete_work_func(). Make sure not to cancel the delete work + * from within itself here. + */ if (blktype == GFS2_BLKST_UNLINKED) extra_flags |= LM_FLAG_TRY; else @@ -403,12 +408,17 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) goto out_ipreserv; error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); + if (error) + goto out_trans_end; + ip->i_no_formal_ino = ip->i_generation; ip->i_inode.i_ino = ip->i_no_addr; ip->i_goal = ip->i_no_addr; + if (*dblocks > 1) + ip->i_eattr = ip->i_no_addr + 1; +out_trans_end: gfs2_trans_end(sdp); - out_ipreserv: gfs2_inplace_release(ip); out_quota: @@ -586,6 +596,12 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, * @size: The initial size of the inode (ignored for directories) * @excl: Force fail if inode exists * + * FIXME: Change to allocate the disk blocks and write them out in the same + * transaction. That way, we can no longer end up in a situation in which an + * inode is allocated, the node crashes, and the block looks like a valid + * inode. (With atomic creates in place, we will also no longer need to zero + * the link count and dirty the inode here on failure.) + * * Returns: 0 on success, or error code */ @@ -596,12 +612,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, { const struct qstr *name = &dentry->d_name; struct posix_acl *default_acl, *acl; - struct gfs2_holder ghs[2]; + struct gfs2_holder d_gh, gh; struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error, free_vfs_inode = 1; + int error; u32 aflags = 0; unsigned blocks = 1; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; @@ -617,10 +633,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh); if (error) goto fail; - gfs2_holder_mark_uninitialized(ghs + 1); + gfs2_holder_mark_uninitialized(&gh); error = create_ok(dip, name, mode); if (error) @@ -642,7 +658,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, else error = finish_no_open(file, NULL); } - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); goto fail; } else if (error != -ENOENT) { goto fail_gunlock; @@ -656,12 +672,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = -ENOMEM; if (!inode) goto fail_gunlock; + ip = GFS2_I(inode); error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) goto fail_gunlock; - ip = GFS2_I(inode); error = gfs2_qa_get(ip); if (error) goto fail_free_acls; @@ -723,15 +739,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_free_inode; gfs2_cancel_delete_work(io_gl); +retry: error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); - BUG_ON(error); + if (error == -EBUSY) + goto retry; + if (error) + goto fail_gunlock2; error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID, &ip->i_iopen_gh); if (error) goto fail_gunlock2; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (error) goto fail_gunlock3; @@ -739,10 +759,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock3; - if (blocks > 1) { - ip->i_eattr = ip->i_no_addr + 1; + if (blocks > 1) gfs2_init_xattr(ip); - } init_dinode(dip, ip, symname); gfs2_trans_end(sdp); @@ -750,9 +768,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, glock_set_object(io_gl, ip); gfs2_set_iop(inode); - free_vfs_inode = 0; /* After this point, the inode is no longer - considered free. Any failures need to undo - the gfs2 structures. */ if (default_acl) { error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) @@ -785,9 +800,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, file->f_mode |= FMODE_CREATED; error = finish_open(file, dentry, gfs2_open_common); } - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); gfs2_qa_put(ip); - gfs2_glock_dq_uninit(ghs + 1); + gfs2_glock_dq_uninit(&gh); gfs2_glock_put(io_gl); gfs2_qa_put(dip); unlock_new_inode(inode); @@ -801,10 +816,6 @@ fail_gunlock3: fail_gunlock2: gfs2_glock_put(io_gl); fail_free_inode: - if (ip->i_gl) { - if (free_vfs_inode) /* else evict will do the put for us */ - gfs2_glock_put(ip->i_gl); - } gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: @@ -812,20 +823,19 @@ fail_free_acls: posix_acl_release(acl); fail_gunlock: gfs2_dir_no_add(&da); - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); if (!IS_ERR_OR_NULL(inode)) { + set_bit(GIF_ALLOC_FAILED, &ip->i_flags); clear_nlink(inode); - if (!free_vfs_inode) + if (ip->i_no_addr) mark_inode_dirty(inode); - set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, - &GFS2_I(inode)->i_flags); if (inode->i_state & I_NEW) iget_failed(inode); else iput(inode); } - if (gfs2_holder_initialized(ghs + 1)) - gfs2_glock_dq_uninit(ghs + 1); + if (gfs2_holder_initialized(&gh)) + gfs2_glock_dq_uninit(&gh); fail: gfs2_qa_put(dip); return error; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 6ed728aae9a5..3c41b864ee5b 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -442,6 +442,12 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) struct buffer_head *bh; int ty; + if (!ip->i_gl) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + gfs2_ail1_wipe(sdp, bstart, blen); while (blen) { ty = REMOVE_META; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b018957a1bb2..999cc146d708 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -379,6 +379,7 @@ out: void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { + const struct inode *inode = &ip->i_inode; struct gfs2_dinode *str = buf; str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); @@ -386,15 +387,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); - str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode)); - str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode)); - str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); - str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); - str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); - str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); - str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); + str->di_mode = cpu_to_be32(inode->i_mode); + str->di_uid = cpu_to_be32(i_uid_read(inode)); + str->di_gid = cpu_to_be32(i_gid_read(inode)); + str->di_nlink = cpu_to_be32(inode->i_nlink); + str->di_size = cpu_to_be64(i_size_read(inode)); + str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); + str->di_atime = cpu_to_be64(inode->i_atime.tv_sec); + str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); str->di_goal_meta = cpu_to_be64(ip->i_goal); str->di_goal_data = cpu_to_be64(ip->i_goal); @@ -402,16 +403,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_flags = cpu_to_be32(ip->i_diskflags); str->di_height = cpu_to_be16(ip->i_height); - str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) && !(ip->i_diskflags & GFS2_DIF_EXHASH) ? GFS2_FORMAT_DE : 0); str->di_depth = cpu_to_be16(ip->i_depth); str->di_entries = cpu_to_be32(ip->i_entries); str->di_eattr = cpu_to_be64(ip->i_eattr); - str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); - str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); - str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); + str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec); + str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); + str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); } /** @@ -475,6 +476,12 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; + if (unlikely(!ip->i_gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + if (unlikely(gfs2_withdrawn(sdp))) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { @@ -927,8 +934,7 @@ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && - inode->i_nlink && + if (inode->i_nlink && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (test_bit(GLF_DEMOTE, &gl->gl_flags)) @@ -1076,7 +1082,13 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) struct inode *inode = &ip->i_inode; struct gfs2_glock *gl = ip->i_gl; - truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0); + if (unlikely(!gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + + truncate_inode_pages(gfs2_glock2aspace(gl), 0); truncate_inode_pages(&inode->i_data, 0); if (atomic_read(&gl->gl_revokes) == 0) { @@ -1218,10 +1230,8 @@ static enum dinode_demise evict_should_delete(struct inode *inode, struct gfs2_sbd *sdp = sb->s_fs_info; int ret; - if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) { - BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl)); + if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) goto should_delete; - } if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) return SHOULD_DEFER_EVICTION; @@ -1294,13 +1304,22 @@ static int evict_unlinked_inode(struct inode *inode) goto out; } - /* We're about to clear the bitmap for the dinode, but as soon as we - do, gfs2_create_inode can create another inode at the same block - location and try to set gl_object again. We clear gl_object here so - that subsequent inode creates don't see an old gl_object. */ - glock_clear_object(ip->i_gl, ip); + if (ip->i_gl) + gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); + + /* + * As soon as we clear the bitmap for the dinode, gfs2_create_inode() + * can get called to recreate it, or even gfs2_inode_lookup() if the + * inode was recreated on another node in the meantime. + * + * However, inserting the new inode into the inode hash table will not + * succeed until the old inode is removed, and that only happens after + * ->evict_inode() returns. The new inode is attached to its inode and + * iopen glocks after inserting it into the inode hash table, so at + * that point we can be sure that both glocks are unused. + */ + ret = gfs2_dinode_dealloc(ip); - gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); out: return ret; } @@ -1367,12 +1386,7 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_holder gh; int ret; - if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { - clear_inode(inode); - return; - } - - if (inode->i_nlink || sb_rdonly(sb)) + if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) goto out; gfs2_holder_mark_uninitialized(&gh); @@ -1405,12 +1419,9 @@ out: struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; glock_clear_object(gl, ip); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&ip->i_iopen_gh); - } gfs2_glock_hold(gl); - gfs2_holder_uninit(&ip->i_iopen_gh); + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } if (ip->i_gl) { @@ -1429,6 +1440,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL); if (!ip) return NULL; + ip->i_no_addr = 0; ip->i_flags = 0; ip->i_gl = NULL; gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index f6a66050380e..518c0677e12a 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1412,11 +1412,13 @@ static int ea_dealloc_block(struct gfs2_inode *ip) ip->i_eattr = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - gfs2_trans_add_meta(ip->i_gl, dibh); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); + if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + } } gfs2_trans_end(sdp); @@ -1445,14 +1447,16 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) if (error) return error; - error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); - if (error) - goto out_quota; - - if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { - error = ea_dealloc_indirect(ip); + if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) goto out_quota; + + if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { + error = ea_dealloc_indirect(ip); + if (error) + goto out_quota; + } } error = ea_dealloc_block(ip); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index f33b3baad07c..935ef8cb02b2 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -125,9 +125,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, * kn_to: /n1/n2/n3 [depth=3] * result: /../.. * - * [3] when @kn_to is NULL result will be "(null)" + * [3] when @kn_to is %NULL result will be "(null)" * - * Returns the length of the full path. If the full length is equal to or + * Return: the length of the full path. If the full length is equal to or * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ @@ -185,10 +185,12 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, * @buflen: size of @buf * * Copies the name of @kn into @buf of @buflen bytes. The behavior is - * similar to strlcpy(). It returns the length of @kn's name and if @buf - * isn't long enough, it's filled upto @buflen-1 and nul terminated. + * similar to strlcpy(). * - * Fills buffer with "(null)" if @kn is NULL. + * Fills buffer with "(null)" if @kn is %NULL. + * + * Return: the length of @kn's name and if @buf isn't long enough, + * it's filled up to @buflen-1 and nul terminated. * * This function can be called from any context. */ @@ -215,7 +217,7 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) * path (which includes '..'s) as needed to reach from @from to @to is * returned. * - * Returns the length of the full path. If the full length is equal to or + * Return: the length of the full path. If the full length is equal to or * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ @@ -287,6 +289,8 @@ out: * * Determines @kn's parent, pins and returns it. This function can be * called from any context. + * + * Return: parent node of @kn */ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) { @@ -302,11 +306,11 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) } /** - * kernfs_name_hash + * kernfs_name_hash - calculate hash of @ns + @name * @name: Null terminated string to hash * @ns: Namespace tag to hash * - * Returns 31 bit hash of ns + name (so it fits in an off_t ) + * Return: 31-bit hash of ns + name (so it fits in an off_t) */ static unsigned int kernfs_name_hash(const char *name, const void *ns) { @@ -354,8 +358,8 @@ static int kernfs_sd_compare(const struct kernfs_node *left, * Locking: * kernfs_rwsem held exclusive * - * RETURNS: - * 0 on susccess -EEXIST on failure. + * Return: + * %0 on success, -EEXIST on failure. */ static int kernfs_link_sibling(struct kernfs_node *kn) { @@ -394,8 +398,10 @@ static int kernfs_link_sibling(struct kernfs_node *kn) * @kn: kernfs_node of interest * * Try to unlink @kn from its sibling rbtree which starts from - * kn->parent->dir.children. Returns %true if @kn was actually - * removed, %false if @kn wasn't on the rbtree. + * kn->parent->dir.children. + * + * Return: %true if @kn was actually removed, + * %false if @kn wasn't on the rbtree. * * Locking: * kernfs_rwsem held exclusive @@ -419,10 +425,10 @@ static bool kernfs_unlink_sibling(struct kernfs_node *kn) * @kn: kernfs_node to get an active reference to * * Get an active reference of @kn. This function is noop if @kn - * is NULL. + * is %NULL. * - * RETURNS: - * Pointer to @kn on success, NULL on failure. + * Return: + * Pointer to @kn on success, %NULL on failure. */ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) { @@ -442,7 +448,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) * @kn: kernfs_node to put an active reference to * * Put an active reference to @kn. This function is noop if @kn - * is NULL. + * is %NULL. */ void kernfs_put_active(struct kernfs_node *kn) { @@ -464,7 +470,7 @@ void kernfs_put_active(struct kernfs_node *kn) * kernfs_drain - drain kernfs_node * @kn: kernfs_node to drain * - * Drain existing usages and nuke all existing mmaps of @kn. Mutiple + * Drain existing usages and nuke all existing mmaps of @kn. Multiple * removers may invoke this function concurrently on @kn and all will * return after draining is complete. */ @@ -577,7 +583,7 @@ EXPORT_SYMBOL_GPL(kernfs_put); * kernfs_node_from_dentry - determine kernfs_node associated with a dentry * @dentry: the dentry in question * - * Return the kernfs_node associated with @dentry. If @dentry is not a + * Return: the kernfs_node associated with @dentry. If @dentry is not a * kernfs one, %NULL is returned. * * While the returned kernfs_node will stay accessible as long as @dentry @@ -684,8 +690,8 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, * @id's lower 32bits encode ino and upper gen. If the gen portion is * zero, all generations are matched. * - * RETURNS: - * NULL on failure. Return a kernfs node with reference counter incremented + * Return: %NULL on failure, + * otherwise a kernfs node with reference counter incremented. */ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, u64 id) @@ -733,8 +739,8 @@ err_unlock: * function increments nlink of the parent's inode if @kn is a * directory and link into the children list of the parent. * - * RETURNS: - * 0 on success, -EEXIST if entry with the given name already + * Return: + * %0 on success, -EEXIST if entry with the given name already * exists. */ int kernfs_add_one(struct kernfs_node *kn) @@ -797,8 +803,9 @@ out_unlock: * @name: name to look for * @ns: the namespace tag to use * - * Look for kernfs_node with name @name under @parent. Returns pointer to - * the found kernfs_node on success, %NULL on failure. + * Look for kernfs_node with name @name under @parent. + * + * Return: pointer to the found kernfs_node on success, %NULL on failure. */ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, const unsigned char *name, @@ -871,8 +878,9 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, * @ns: the namespace tag to use * * Look for kernfs_node with name @name under @parent and get a reference - * if found. This function may sleep and returns pointer to the found - * kernfs_node on success, %NULL on failure. + * if found. This function may sleep. + * + * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) @@ -896,8 +904,9 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); * @ns: the namespace tag to use * * Look for kernfs_node with path @path under @parent and get a reference - * if found. This function may sleep and returns pointer to the found - * kernfs_node on success, %NULL on failure. + * if found. This function may sleep. + * + * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns) @@ -919,7 +928,7 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, * @flags: KERNFS_ROOT_* flags * @priv: opaque data associated with the new directory * - * Returns the root of the new hierarchy on success, ERR_PTR() value on + * Return: the root of the new hierarchy on success, ERR_PTR() value on * failure. */ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, @@ -991,6 +1000,8 @@ void kernfs_destroy_root(struct kernfs_root *root) /** * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root * @root: root to use to lookup + * + * Return: @root's kernfs_node */ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) { @@ -1007,7 +1018,7 @@ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) * @priv: opaque data associated with the new directory * @ns: optional namespace tag of the directory * - * Returns the created node on success, ERR_PTR() value on failure. + * Return: the created node on success, ERR_PTR() value on failure. */ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, @@ -1041,7 +1052,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, * @parent: parent in which to create a new directory * @name: name of the new directory * - * Returns the created node on success, ERR_PTR() value on failure. + * Return: the created node on success, ERR_PTR() value on failure. */ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name) @@ -1083,20 +1094,30 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) /* If the kernfs parent node has changed discard and * proceed to ->lookup. + * + * There's nothing special needed here when getting the + * dentry parent, even if a concurrent rename is in + * progress. That's because the dentry is negative so + * it can only be the target of the rename and it will + * be doing a d_move() not a replace. Consequently the + * dentry d_parent won't change over the d_move(). + * + * Also kernfs negative dentries transitioning from + * negative to positive during revalidate won't happen + * because they are invalidated on containing directory + * changes and the lookup re-done so that a new positive + * dentry can be properly created. */ - spin_lock(&dentry->d_lock); + root = kernfs_root_from_sb(dentry->d_sb); + down_read(&root->kernfs_rwsem); parent = kernfs_dentry_node(dentry->d_parent); if (parent) { - spin_unlock(&dentry->d_lock); - root = kernfs_root(parent); - down_read(&root->kernfs_rwsem); if (kernfs_dir_changed(parent, dentry)) { up_read(&root->kernfs_rwsem); return 0; } - up_read(&root->kernfs_rwsem); - } else - spin_unlock(&dentry->d_lock); + } + up_read(&root->kernfs_rwsem); /* The kernfs parent node hasn't changed, leave the * dentry negative and return success. @@ -1290,6 +1311,8 @@ static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) * Find the next descendant to visit for post-order traversal of @root's * descendants. @root is included in the iteration and the last node to be * visited. + * + * Return: the next descendant to visit or %NULL when done. */ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, struct kernfs_node *root) @@ -1553,6 +1576,8 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn) * the whole kernfs_ops which won the arbitration. This can be used to * guarantee, for example, all concurrent writes to a "delete" file to * finish only after the whole operation is complete. + * + * Return: %true if @kn is removed by this call, otherwise %false. */ bool kernfs_remove_self(struct kernfs_node *kn) { @@ -1613,7 +1638,8 @@ bool kernfs_remove_self(struct kernfs_node *kn) * @ns: namespace tag of the kernfs_node to remove * * Look for the kernfs_node with @name and @ns under @parent and remove it. - * Returns 0 on success, -ENOENT if such entry doesn't exist. + * + * Return: %0 on success, -ENOENT if such entry doesn't exist. */ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns) @@ -1651,6 +1677,8 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, * @new_parent: new parent to put @sd under * @new_name: new name * @new_ns: new namespace tag + * + * Return: %0 on success, -errno on failure. */ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 9ab6c92e02da..e4a50e4ff0d2 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -33,7 +33,7 @@ struct kernfs_open_node { * pending queue is implemented as a singly linked list of kernfs_nodes. * The list is terminated with the self pointer so that whether a * kernfs_node is on the list or not can be determined by testing the next - * pointer for NULL. + * pointer for %NULL. */ #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) @@ -59,8 +59,10 @@ static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) } /** - * of_on - Return the kernfs_open_node of the specified kernfs_open_file - * @of: taret kernfs_open_file + * of_on - Get the kernfs_open_node of the specified kernfs_open_file + * @of: target kernfs_open_file + * + * Return: the kernfs_open_node of the kernfs_open_file */ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) { @@ -82,6 +84,8 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) * outside RCU read-side critical section. * * The caller needs to make sure that kernfs_open_file_mutex is held. + * + * Return: @kn->attr.open when kernfs_open_file_mutex is held. */ static struct kernfs_open_node * kernfs_deref_open_node_locked(struct kernfs_node *kn) @@ -548,11 +552,11 @@ out_unlock: * If @kn->attr.open exists, increment its reference count; otherwise, * create one. @of is chained to the files list. * - * LOCKING: + * Locking: * Kernel thread context (may sleep). * - * RETURNS: - * 0 on success, -errno on failure. + * Return: + * %0 on success, -errno on failure. */ static int kernfs_get_open_node(struct kernfs_node *kn, struct kernfs_open_file *of) @@ -1024,7 +1028,7 @@ const struct file_operations kernfs_file_fops = { * @ns: optional namespace tag of the file * @key: lockdep key for the file's active_ref, %NULL to disable lockdep * - * Returns the created node on success, ERR_PTR() value on error. + * Return: the created node on success, ERR_PTR() value on error. */ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 3d783d80f5da..eac0f210299a 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -94,7 +94,7 @@ int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) * @kn: target node * @iattr: iattr to set * - * Returns 0 on success, -errno on failure. + * Return: %0 on success, -errno on failure. */ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { @@ -190,10 +190,8 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns, struct kernfs_root *root = kernfs_root(kn); down_read(&root->kernfs_rwsem); - spin_lock(&inode->i_lock); kernfs_refresh_inode(kn, inode); generic_fillattr(&init_user_ns, inode, stat); - spin_unlock(&inode->i_lock); up_read(&root->kernfs_rwsem); return 0; @@ -241,11 +239,11 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) * allocated and basics are initialized. New inode is returned * locked. * - * LOCKING: + * Locking: * Kernel thread context (may sleep). * - * RETURNS: - * Pointer to allocated inode on success, NULL on failure. + * Return: + * Pointer to allocated inode on success, %NULL on failure. */ struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) { @@ -288,10 +286,8 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns, root = kernfs_root(kn); down_read(&root->kernfs_rwsem); - spin_lock(&inode->i_lock); kernfs_refresh_inode(kn, inode); ret = generic_permission(&init_user_ns, inode, mask); - spin_unlock(&inode->i_lock); up_read(&root->kernfs_rwsem); return ret; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index fc5821effd97..9046d9f39e63 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -58,7 +58,7 @@ struct kernfs_root { * kernfs_root - find out the kernfs_root a kernfs_node belongs to * @kn: kernfs_node of interest * - * Return the kernfs_root @kn belongs to. + * Return: the kernfs_root @kn belongs to. */ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) { diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d0859f72d2d6..e08e8d999807 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -153,7 +153,7 @@ static const struct export_operations kernfs_export_ops = { * kernfs_root_from_sb - determine kernfs_root associated with a super_block * @sb: the super_block in question * - * Return the kernfs_root associated with @sb. If @sb is not a kernfs one, + * Return: the kernfs_root associated with @sb. If @sb is not a kernfs one, * %NULL is returned. */ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) @@ -167,7 +167,7 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) * find the next ancestor in the path down to @child, where @parent was the * ancestor whose descendant we want to find. * - * Say the path is /a/b/c/d. @child is d, @parent is NULL. We return the root + * Say the path is /a/b/c/d. @child is d, @parent is %NULL. We return the root * node. If @parent is b, then we return the node for c. * Passing in d as @parent is not ok. */ @@ -192,6 +192,8 @@ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, * kernfs_node_dentry - get a dentry for the given kernfs_node * @kn: kernfs_node for which a dentry is needed * @sb: the kernfs super_block + * + * Return: the dentry pointer */ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb) @@ -296,7 +298,7 @@ static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) * kernfs_super_ns - determine the namespace tag of a kernfs super_block * @sb: super_block of interest * - * Return the namespace tag associated with kernfs super_block @sb. + * Return: the namespace tag associated with kernfs super_block @sb. */ const void *kernfs_super_ns(struct super_block *sb) { @@ -313,6 +315,8 @@ const void *kernfs_super_ns(struct super_block *sb) * implementation, which should set the specified ->@fs_type and ->@flags, and * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, * respectively. + * + * Return: %0 on success, -errno on failure. */ int kernfs_get_tree(struct fs_context *fc) { diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 0ab13824822f..45371a70caa7 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -19,7 +19,7 @@ * @name: name of the symlink * @target: target node for the symlink to point to * - * Returns the created node on success, ERR_PTR() value on error. + * Return: the created node on success, ERR_PTR() value on error. * Ownership of the link matches ownership of the target. */ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 26f8ece2a997..0cbcd2dfa732 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -26,7 +26,7 @@ static void nfs_netns_object_release(struct kobject *kobj) } static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( - struct kobject *kobj) + const struct kobject *kobj) { return &net_ns_type_operations; } @@ -130,7 +130,7 @@ static void nfs_netns_client_release(struct kobject *kobj) kfree(c); } -static const void *nfs_netns_client_namespace(struct kobject *kobj) +static const void *nfs_netns_client_namespace(const struct kobject *kobj) { return container_of(kobj, struct nfs_netns_client, kobject)->net; } diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1998b4d5f692..45b2c9e3f636 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -324,8 +324,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) if (key->gc) __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; - /* nf_ref is pre-incremented for hash table */ - refcount_set(&nf->nf_ref, 2); + refcount_set(&nf->nf_ref, 1); nf->nf_may = key->need; nf->nf_mark = NULL; } @@ -377,24 +376,35 @@ nfsd_file_unhash(struct nfsd_file *nf) return false; } -static bool +static void nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); - bool flush = false; trace_nfsd_file_free(nf); this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); + nfsd_file_unhash(nf); + + /* + * We call fsync here in order to catch writeback errors. It's not + * strictly required by the protocol, but an nfsd_file could get + * evicted from the cache before a COMMIT comes in. If another + * task were to open that file in the interim and scrape the error, + * then the client may never see it. By calling fsync here, we ensure + * that writeback happens before the entry is freed, and that any + * errors reported result in the write verifier changing. + */ + nfsd_file_fsync(nf); + if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { get_file(nf->nf_file); filp_close(nf->nf_file, NULL); fput(nf->nf_file); - flush = true; } /* @@ -402,10 +412,9 @@ nfsd_file_free(struct nfsd_file *nf) * WARN and leak it to preserve system stability. */ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) - return flush; + return; call_rcu(&nf->nf_rcu, nfsd_file_slab_free); - return flush; } static bool @@ -421,17 +430,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static void nfsd_file_lru_add(struct nfsd_file *nf) +static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_add(nf); + return true; + } + return false; } -static void nfsd_file_lru_remove(struct nfsd_file *nf) +static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); + return true; + } + return false; } struct nfsd_file * @@ -442,86 +457,60 @@ nfsd_file_get(struct nfsd_file *nf) return NULL; } -static void -nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) -{ - trace_nfsd_file_unhash_and_queue(nf); - if (nfsd_file_unhash(nf)) { - /* caller must call nfsd_file_dispose_list() later */ - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, dispose); - } -} - -static void -nfsd_file_put_noref(struct nfsd_file *nf) -{ - trace_nfsd_file_put(nf); - - if (refcount_dec_and_test(&nf->nf_ref)) { - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - -static void -nfsd_file_unhash_and_put(struct nfsd_file *nf) -{ - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); -} - +/** + * nfsd_file_put - put the reference to a nfsd_file + * @nf: nfsd_file of which to put the reference + * + * Put a reference to a nfsd_file. In the non-GC case, we just put the + * reference immediately. In the GC case, if the reference would be + * the last one, the put it on the LRU instead to be cleaned up later. + */ void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); + trace_nfsd_file_put(nf); - if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) - nfsd_file_lru_add(nf); - else if (refcount_read(&nf->nf_ref) == 2) - nfsd_file_unhash_and_put(nf); - - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); - } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { - nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); - } else - nfsd_file_put_noref(nf); -} - -static void -nfsd_file_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && + test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + /* + * If this is the last reference (nf_ref == 1), then try to + * transfer it to the LRU. + */ + if (refcount_dec_not_one(&nf->nf_ref)) + return; + + /* Try to add it to the LRU. If that fails, decrement. */ + if (nfsd_file_lru_add(nf)) { + /* If it's still hashed, we're done */ + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_schedule_laundrette(); + return; + } - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); + /* + * We're racing with unhashing, so try to remove it from + * the LRU. If removal fails, then someone else already + * has our reference. + */ + if (!nfsd_file_lru_remove(nf)) + return; + } } + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); } static void -nfsd_file_dispose_list_sync(struct list_head *dispose) +nfsd_file_dispose_list(struct list_head *dispose) { - bool flush = false; struct nfsd_file *nf; - while(!list_empty(dispose)) { + while (!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - if (!refcount_dec_and_test(&nf->nf_ref)) - continue; - if (nfsd_file_free(nf)) - flush = true; + nfsd_file_free(nf); } - if (flush) - flush_delayed_fput(); } static void @@ -591,21 +580,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* - * Do a lockless refcount check. The hashtable holds one reference, so - * we look to see if anything else has a reference, or if any have - * been put since the shrinker last ran. Those don't get unhashed and - * released. - * - * Note that in the put path, we set the flag and then decrement the - * counter. Here we check the counter and then test and clear the flag. - * That order is deliberate to ensure that we can do this locklessly. - */ - if (refcount_read(&nf->nf_ref) > 1) { - list_lru_isolate(lru, &nf->nf_lru); - trace_nfsd_file_gc_in_use(nf); - return LRU_REMOVED; - } + /* We should only be dealing with GC entries here */ + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); /* * Don't throw out files that are still undergoing I/O or @@ -616,40 +592,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } + /* If it was recently added to the list, skip it */ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { trace_nfsd_file_gc_referenced(nf); return LRU_ROTATE; } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_gc_hashed(nf); - return LRU_SKIP; + /* + * Put the reference held on behalf of the LRU. If it wasn't the last + * one, then just remove it from the LRU and ignore it. + */ + if (!refcount_dec_and_test(&nf->nf_ref)) { + trace_nfsd_file_gc_in_use(nf); + list_lru_isolate(lru, &nf->nf_lru); + return LRU_REMOVED; } + /* Refcount went to zero. Unhash it and queue it to the dispose list */ + nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } -/* - * Unhash items on @dispose immediately, then queue them on the - * disposal workqueue to finish releasing them in the background. - * - * cel: Note that between the time list_lru_shrink_walk runs and - * now, these items are in the hash table but marked unhashed. - * Why release these outside of lru_cb ? There's no lock ordering - * problem since lru_cb currently takes no lock. - */ -static void nfsd_file_gc_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; - - list_for_each_entry(nf, dispose, nf_lru) - nfsd_file_hash_remove(nf); - nfsd_file_dispose_list_delayed(dispose); -} - static void nfsd_file_gc(void) { @@ -659,7 +625,7 @@ nfsd_file_gc(void) ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &dispose, list_lru_count(&nfsd_file_lru)); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } static void @@ -685,7 +651,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); return ret; } @@ -695,72 +661,111 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -/* - * Find all cache items across all net namespaces that match @inode and - * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). +/** + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * + * An nfsd_file represents a struct file being held open on behalf of nfsd. An + * open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of + * conflicting access occurs and then attempt to close those files out. + * + * Populates the dispose list with entries that have already had their + * refcounts go to zero. The actual free of an nfsd_file can be expensive, + * so we leave it up to the caller whether it wants to wait or not. */ -static unsigned int -__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) +static void +nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, }; - unsigned int count = 0; struct nfsd_file *nf; rcu_read_lock(); do { + int decrement = 1; + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_queue(nf, dispose); - count++; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + continue; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + continue; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } } while (1); rcu_read_unlock(); - return count; } /** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put, then flush and fput all cache items associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * actual freeing is deferred to the dispose_list_delayed infrastructure. + * + * This is used by the fsnotify callbacks and setlease notifier. */ -void -nfsd_file_close_inode_sync(struct inode *inode) +static void +nfsd_file_close_inode(struct inode *inode) { LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode_sync(inode, count); - nfsd_file_dispose_list_sync(&dispose); + nfsd_file_queue_for_close(inode, &dispose); + nfsd_file_dispose_list_delayed(&dispose); } /** - * nfsd_file_close_inode - attempt a delayed close of a nfsd_file + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put all cache item associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * nfsd_files are closed out synchronously. + * + * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames + * when reexporting NFS. */ -static void -nfsd_file_close_inode(struct inode *inode) +void +nfsd_file_close_inode_sync(struct inode *inode) { + struct nfsd_file *nf; LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode(inode, count); - nfsd_file_dispose_list_delayed(&dispose); + trace_nfsd_file_close(inode); + + nfsd_file_queue_for_close(inode, &dispose); + while (!list_empty(&dispose)) { + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); + } + flush_delayed_fput(); } /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and close any entries that have not been used since + * Walk the LRU list and destroy any entries that have not been used since * the last scan. */ static void @@ -782,7 +787,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode_sync(file_inode(fl->fl_file)); + nfsd_file_close_inode(file_inode(fl->fl_file)); return 0; } @@ -903,6 +908,13 @@ out_err: goto out; } +/** + * __nfsd_file_cache_purge: clean out the cache for shutdown + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, + * then close out everything. Called when an nfsd instance is being shut down. + */ static void __nfsd_file_cache_purge(struct net *net) { @@ -916,8 +928,11 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) - nfsd_file_unhash_and_queue(nf, &dispose); + if (!net || nf->nf_net == net) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + list_add(&nf->nf_lru, &dispose); + } nf = rhashtable_walk_next(&iter); } @@ -1084,8 +1099,12 @@ retry: if (nf) nf = nfsd_file_get(nf); rcu_read_unlock(); - if (nf) + + if (nf) { + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; + } nf = nfsd_file_alloc(&key, may_flags); if (!nf) { @@ -1118,11 +1137,11 @@ wait_for_construction: goto out; } open_retry = false; - nfsd_file_put_noref(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); goto retry; } - nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1132,7 +1151,8 @@ out: this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { - nfsd_file_put(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); nf = NULL; } @@ -1158,8 +1178,10 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || key.inode->i_nlink == 0) - nfsd_file_unhash_and_put(nf); + if (status == nfs_ok && key.inode->i_nlink == 0) + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d6e1d3894c82..2a815f5a52c4 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -988,7 +988,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c } else { if (!conn->cb_xprt) return -EINVAL; - clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; @@ -1008,6 +1007,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c rpc_shutdown_client(client); return -ENOMEM; } + + if (clp->cl_minorversion != 0) + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_client = client; clp->cl_cb_cred = cred; rcu_read_lock(); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 73ed32ad23a2..bd880d55f565 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1461,13 +1461,6 @@ out_err: return status; } -static void -nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) -{ - nfs_do_sb_deactive(ss_mnt->mnt_sb); - mntput(ss_mnt); -} - /* * Verify COPY destination stateid. * @@ -1570,11 +1563,6 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, { } -static void -nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) -{ -} - static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh, nfs4_stateid *stateid) @@ -1770,7 +1758,7 @@ static int nfsd4_do_async_copy(void *data) default: nfserr = nfserr_offload_denied; } - nfsd4_interssc_disconnect(copy->ss_mnt); + /* ss_mnt will be unmounted by the laundromat */ goto do_callback; } nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, @@ -1851,8 +1839,10 @@ out_err: if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - if (nfsd4_ssc_is_inter(copy)) - nfsd4_interssc_disconnect(copy->ss_mnt); + /* + * source's vfsmount of inter-copy will be unmounted + * by the laundromat + */ goto out; } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 46b8f68a2497..c852ae8eaf37 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -876,8 +876,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ - { 1 << NFSD_FILE_GC, "GC"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), @@ -912,6 +912,7 @@ DEFINE_EVENT(nfsd_file_class, name, \ DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, @@ -1103,35 +1104,6 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) -DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO( - const struct inode *inode, - unsigned int count - ), - TP_ARGS(inode, count), - TP_STRUCT__entry( - __field(const struct inode *, inode) - __field(unsigned int, count) - ), - TP_fast_assign( - __entry->inode = inode; - __entry->count = count; - ), - TP_printk("inode=%p count=%u", - __entry->inode, __entry->count) -); - -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO( \ - const struct inode *inode, \ - unsigned int count \ - ), \ - TP_ARGS(inode, count)) - -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); - TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, @@ -1209,7 +1181,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1241,6 +1212,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_close, + TP_PROTO( + const struct inode *inode + ), + TP_ARGS(inode), + TP_STRUCT__entry( + __field(const void *, inode) + ), + TP_fast_assign( + __entry->inode = inode; + ), + TP_printk("inode=%p", + __entry->inode + ) +); + TRACE_EVENT(nfsd_file_fsync, TP_PROTO( const struct nfsd_file *nf, diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 3335ef352915..76c3bd88b858 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2752,7 +2752,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) down_write(&nilfs->ns_segctor_sem); - del_timer_sync(&sci->sc_timer); + timer_shutdown_sync(&sci->sc_timer); kfree(sci); } diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 71f870d497ae..5e6bafb10f42 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -55,33 +55,6 @@ static inline u64 get_pre_allocated(u64 size) } /* - * attr_must_be_resident - * - * Return: True if attribute must be resident. - */ -static inline bool attr_must_be_resident(struct ntfs_sb_info *sbi, - enum ATTR_TYPE type) -{ - const struct ATTR_DEF_ENTRY *de; - - switch (type) { - case ATTR_STD: - case ATTR_NAME: - case ATTR_ID: - case ATTR_LABEL: - case ATTR_VOL_INFO: - case ATTR_ROOT: - case ATTR_EA_INFO: - return true; - default: - de = ntfs_query_def(sbi, type); - if (de && (de->flags & NTFS_ATTR_MUST_BE_RESIDENT)) - return true; - return false; - } -} - -/* * attr_load_runs - Load all runs stored in @attr. */ static int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, @@ -101,6 +74,10 @@ static int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, asize = le32_to_cpu(attr->size); run_off = le16_to_cpu(attr->nres.run_off); + + if (run_off > asize) + return -EINVAL; + err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, vcn ? *vcn : svcn, Add2Ptr(attr, run_off), asize - run_off); @@ -172,7 +149,7 @@ out: int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, - CLST *new_lcn) + CLST *new_lcn, CLST *new_len) { int err; CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0; @@ -192,20 +169,36 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, if (err) goto out; - if (new_lcn && vcn == vcn0) - *new_lcn = lcn; + if (vcn == vcn0) { + /* Return the first fragment. */ + if (new_lcn) + *new_lcn = lcn; + if (new_len) + *new_len = flen; + } /* Add new fragment into run storage. */ - if (!run_add_entry(run, vcn, lcn, flen, opt == ALLOCATE_MFT)) { + if (!run_add_entry(run, vcn, lcn, flen, opt & ALLOCATE_MFT)) { /* Undo last 'ntfs_look_for_free_space' */ mark_as_free_ex(sbi, lcn, len, false); err = -ENOMEM; goto out; } + if (opt & ALLOCATE_ZERO) { + u8 shift = sbi->cluster_bits - SECTOR_SHIFT; + + err = blkdev_issue_zeroout(sbi->sb->s_bdev, + (sector_t)lcn << shift, + (sector_t)flen << shift, + GFP_NOFS, 0); + if (err) + goto out; + } + vcn += flen; - if (flen >= len || opt == ALLOCATE_MFT || + if (flen >= len || (opt & ALLOCATE_MFT) || (fr && run->count - cnt >= fr)) { *alen = vcn - vcn0; return 0; @@ -280,7 +273,8 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, const char *data = resident_data(attr); err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL, - ALLOCATE_DEF, &alen, 0, NULL); + ALLOCATE_DEF, &alen, 0, NULL, + NULL); if (err) goto out1; @@ -420,6 +414,7 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn; CLST next_svcn, pre_alloc = -1, done = 0; bool is_ext, is_bad = false; + bool dirty = false; u32 align; struct MFT_REC *rec; @@ -440,8 +435,10 @@ again: return err; /* Return if file is still resident. */ - if (!attr_b->non_res) + if (!attr_b->non_res) { + dirty = true; goto ok1; + } /* Layout of records may be changed, so do a full search. */ goto again; @@ -464,7 +461,7 @@ again_1: if (keep_prealloc && new_size < old_size) { attr_b->nres.data_size = cpu_to_le64(new_size); - mi_b->dirty = true; + mi_b->dirty = dirty = true; goto ok; } @@ -510,7 +507,7 @@ next_le: if (new_alloc <= old_alloc) { attr_b->nres.data_size = cpu_to_le64(new_size); - mi_b->dirty = true; + mi_b->dirty = dirty = true; goto ok; } @@ -575,13 +572,13 @@ add_alloc_in_same_attr_seg: /* ~3 bytes per fragment. */ err = attr_allocate_clusters( sbi, run, vcn, lcn, to_allocate, &pre_alloc, - is_mft ? ALLOCATE_MFT : 0, &alen, + is_mft ? ALLOCATE_MFT : ALLOCATE_DEF, &alen, is_mft ? 0 : (sbi->record_size - le32_to_cpu(rec->used) + 8) / 3 + 1, - NULL); + NULL, NULL); if (err) goto out; } @@ -601,7 +598,7 @@ pack_runs: next_svcn = le64_to_cpu(attr->nres.evcn) + 1; new_alloc_tmp = (u64)next_svcn << cluster_bits; attr_b->nres.alloc_size = cpu_to_le64(new_alloc_tmp); - mi_b->dirty = true; + mi_b->dirty = dirty = true; if (next_svcn >= vcn && !to_allocate) { /* Normal way. Update attribute and exit. */ @@ -687,7 +684,7 @@ pack_runs: old_valid = old_size = old_alloc = (u64)vcn << cluster_bits; attr_b->nres.valid_size = attr_b->nres.data_size = attr_b->nres.alloc_size = cpu_to_le64(old_size); - mi_b->dirty = true; + mi_b->dirty = dirty = true; goto again_1; } @@ -749,7 +746,7 @@ pack_runs: attr_b->nres.valid_size = attr_b->nres.alloc_size; } - mi_b->dirty = true; + mi_b->dirty = dirty = true; err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &dlen, true); @@ -810,16 +807,9 @@ ok1: if (ret) *ret = attr_b; - /* Update inode_set_bytes. */ if (((type == ATTR_DATA && !name_len) || (type == ATTR_ALLOC && name == I30_NAME))) { - bool dirty = false; - - if (ni->vfs_inode.i_size != new_size) { - ni->vfs_inode.i_size = new_size; - dirty = true; - } - + /* Update inode_set_bytes. */ if (attr_b->non_res) { new_alloc = le64_to_cpu(attr_b->nres.alloc_size); if (inode_get_bytes(&ni->vfs_inode) != new_alloc) { @@ -828,6 +818,7 @@ ok1: } } + /* Don't forget to update duplicate information in parent. */ if (dirty) { ni->ni_flags |= NI_FLAG_UPDATE_PARENT; mark_inode_dirty(&ni->vfs_inode); @@ -878,8 +869,19 @@ bad_inode: return err; } +/* + * attr_data_get_block - Returns 'lcn' and 'len' for given 'vcn'. + * + * @new == NULL means just to get current mapping for 'vcn' + * @new != NULL means allocate real cluster if 'vcn' maps to hole + * @zero - zeroout new allocated clusters + * + * NOTE: + * - @new != NULL is called only for sparsed or compressed attributes. + * - new allocated clusters are zeroed via blkdev_issue_zeroout. + */ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new) + CLST *len, bool *new, bool zero) { int err = 0; struct runs_tree *run = &ni->file.run; @@ -888,29 +890,29 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, struct ATTRIB *attr = NULL, *attr_b; struct ATTR_LIST_ENTRY *le, *le_b; struct mft_inode *mi, *mi_b; - CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end; - u64 total_size; - u32 clst_per_frame; - bool ok; + CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen; + CLST alloc, evcn; + unsigned fr; + u64 total_size, total_size0; + int step = 0; if (new) *new = false; + /* Try to find in cache. */ down_read(&ni->file.run_lock); - ok = run_lookup_entry(run, vcn, lcn, len, NULL); + if (!run_lookup_entry(run, vcn, lcn, len, NULL)) + *len = 0; up_read(&ni->file.run_lock); - if (ok && (*lcn != SPARSE_LCN || !new)) { - /* Normal way. */ - return 0; + if (*len) { + if (*lcn != SPARSE_LCN || !new) + return 0; /* Fast normal way without allocation. */ + else if (clen > *len) + clen = *len; } - if (!clen) - clen = 1; - - if (ok && clen > *len) - clen = *len; - + /* No cluster in cache or we need to allocate cluster in hole. */ sbi = ni->mi.sbi; cluster_bits = sbi->cluster_bits; @@ -932,16 +934,15 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, asize = le64_to_cpu(attr_b->nres.alloc_size) >> cluster_bits; if (vcn >= asize) { - err = -EINVAL; + if (new) { + err = -EINVAL; + } else { + *len = 1; + *lcn = SPARSE_LCN; + } goto out; } - clst_per_frame = 1u << attr_b->nres.c_unit; - to_alloc = (clen + clst_per_frame - 1) & ~(clst_per_frame - 1); - - if (vcn + to_alloc > asize) - to_alloc = asize - vcn; - svcn = le64_to_cpu(attr_b->nres.svcn); evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; @@ -960,36 +961,68 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, evcn1 = le64_to_cpu(attr->nres.evcn) + 1; } + /* Load in cache actual information. */ err = attr_load_runs(attr, ni, run, NULL); if (err) goto out; - if (!ok) { - ok = run_lookup_entry(run, vcn, lcn, len, NULL); - if (ok && (*lcn != SPARSE_LCN || !new)) { - /* Normal way. */ - err = 0; - goto ok; - } + if (!*len) { + if (run_lookup_entry(run, vcn, lcn, len, NULL)) { + if (*lcn != SPARSE_LCN || !new) + goto ok; /* Slow normal way without allocation. */ - if (!ok && !new) { - *len = 0; - err = 0; + if (clen > *len) + clen = *len; + } else if (!new) { + /* Here we may return -ENOENT. + * In any case caller gets zero length. */ goto ok; } - - if (ok && clen > *len) { - clen = *len; - to_alloc = (clen + clst_per_frame - 1) & - ~(clst_per_frame - 1); - } } if (!is_attr_ext(attr_b)) { + /* The code below only for sparsed or compressed attributes. */ err = -EINVAL; goto out; } + vcn0 = vcn; + to_alloc = clen; + fr = (sbi->record_size - le32_to_cpu(mi->mrec->used) + 8) / 3 + 1; + /* Allocate frame aligned clusters. + * ntfs.sys usually uses 16 clusters per frame for sparsed or compressed. + * ntfs3 uses 1 cluster per frame for new created sparsed files. */ + if (attr_b->nres.c_unit) { + CLST clst_per_frame = 1u << attr_b->nres.c_unit; + CLST cmask = ~(clst_per_frame - 1); + + /* Get frame aligned vcn and to_alloc. */ + vcn = vcn0 & cmask; + to_alloc = ((vcn0 + clen + clst_per_frame - 1) & cmask) - vcn; + if (fr < clst_per_frame) + fr = clst_per_frame; + zero = true; + + /* Check if 'vcn' and 'vcn0' in different attribute segments. */ + if (vcn < svcn || evcn1 <= vcn) { + /* Load attribute for truncated vcn. */ + attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, + &vcn, &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + err = attr_load_runs(attr, ni, run, NULL); + if (err) + goto out; + } + } + + if (vcn + to_alloc > asize) + to_alloc = asize - vcn; + /* Get the last LCN to allocate from. */ hint = 0; @@ -1003,18 +1036,35 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, hint = -1; } - err = attr_allocate_clusters( - sbi, run, vcn, hint + 1, to_alloc, NULL, 0, len, - (sbi->record_size - le32_to_cpu(mi->mrec->used) + 8) / 3 + 1, - lcn); + /* Allocate and zeroout new clusters. */ + err = attr_allocate_clusters(sbi, run, vcn, hint + 1, to_alloc, NULL, + zero ? ALLOCATE_ZERO : ALLOCATE_DEF, &alen, + fr, lcn, len); if (err) goto out; *new = true; + step = 1; - end = vcn + *len; + end = vcn + alen; + /* Save 'total_size0' to restore if error. */ + total_size0 = le64_to_cpu(attr_b->nres.total_size); + total_size = total_size0 + ((u64)alen << cluster_bits); - total_size = le64_to_cpu(attr_b->nres.total_size) + - ((u64)*len << cluster_bits); + if (vcn != vcn0) { + if (!run_lookup_entry(run, vcn0, lcn, len, NULL)) { + err = -EINVAL; + goto out; + } + if (*lcn == SPARSE_LCN) { + /* Internal error. Should not happened. */ + WARN_ON(1); + err = -EINVAL; + goto out; + } + /* Check case when vcn0 + len overlaps new allocated clusters. */ + if (vcn0 + *len > end) + *len = end - vcn0; + } repack: err = mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn); @@ -1040,7 +1090,7 @@ repack: if (!ni->attr_list.size) { err = ni_create_attr_list(ni); if (err) - goto out; + goto undo1; /* Layout of records is changed. */ le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, @@ -1057,67 +1107,83 @@ repack: } } + /* + * The code below may require additional cluster (to extend attribute list) + * and / or one MFT record + * It is too complex to undo operations if -ENOSPC occurs deep inside + * in 'ni_insert_nonresident'. + * Return in advance -ENOSPC here if there are no free cluster and no free MFT. + */ + if (!ntfs_check_for_free_space(sbi, 1, 1)) { + /* Undo step 1. */ + err = -ENOSPC; + goto undo1; + } + + step = 2; svcn = evcn1; /* Estimate next attribute. */ attr = ni_find_attr(ni, attr, &le, ATTR_DATA, NULL, 0, &svcn, &mi); - if (attr) { - CLST alloc = bytes_to_cluster( - sbi, le64_to_cpu(attr_b->nres.alloc_size)); - CLST evcn = le64_to_cpu(attr->nres.evcn); - - if (end < next_svcn) - end = next_svcn; - while (end > evcn) { - /* Remove segment [svcn : evcn). */ - mi_remove_attr(NULL, mi, attr); - - if (!al_remove_le(ni, le)) { - err = -EINVAL; - goto out; - } + if (!attr) { + /* Insert new attribute segment. */ + goto ins_ext; + } - if (evcn + 1 >= alloc) { - /* Last attribute segment. */ - evcn1 = evcn + 1; - goto ins_ext; - } + /* Try to update existed attribute segment. */ + alloc = bytes_to_cluster(sbi, le64_to_cpu(attr_b->nres.alloc_size)); + evcn = le64_to_cpu(attr->nres.evcn); - if (ni_load_mi(ni, le, &mi)) { - attr = NULL; - goto out; - } + if (end < next_svcn) + end = next_svcn; + while (end > evcn) { + /* Remove segment [svcn : evcn). */ + mi_remove_attr(NULL, mi, attr); - attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, - &le->id); - if (!attr) { - err = -EINVAL; - goto out; - } - svcn = le64_to_cpu(attr->nres.svcn); - evcn = le64_to_cpu(attr->nres.evcn); + if (!al_remove_le(ni, le)) { + err = -EINVAL; + goto out; } - if (end < svcn) - end = svcn; + if (evcn + 1 >= alloc) { + /* Last attribute segment. */ + evcn1 = evcn + 1; + goto ins_ext; + } - err = attr_load_runs(attr, ni, run, &end); - if (err) + if (ni_load_mi(ni, le, &mi)) { + attr = NULL; goto out; + } - evcn1 = evcn + 1; - attr->nres.svcn = cpu_to_le64(next_svcn); - err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn); - if (err) + attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, &le->id); + if (!attr) { + err = -EINVAL; goto out; + } + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + } - le->vcn = cpu_to_le64(next_svcn); - ni->attr_list.dirty = true; - mi->dirty = true; + if (end < svcn) + end = svcn; + + err = attr_load_runs(attr, ni, run, &end); + if (err) + goto out; + + evcn1 = evcn + 1; + attr->nres.svcn = cpu_to_le64(next_svcn); + err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn); + if (err) + goto out; + + le->vcn = cpu_to_le64(next_svcn); + ni->attr_list.dirty = true; + mi->dirty = true; + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; - next_svcn = le64_to_cpu(attr->nres.evcn) + 1; - } ins_ext: if (evcn1 > next_svcn) { err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run, @@ -1129,10 +1195,26 @@ ins_ext: ok: run_truncate_around(run, vcn); out: + if (err && step > 1) { + /* Too complex to restore. */ + _ntfs_bad_inode(&ni->vfs_inode); + } up_write(&ni->file.run_lock); ni_unlock(ni); return err; + +undo1: + /* Undo step1. */ + attr_b->nres.total_size = cpu_to_le64(total_size0); + inode_set_bytes(&ni->vfs_inode, total_size0); + + if (run_deallocate_ex(sbi, run, vcn, alen, NULL, false) || + !run_add_entry(run, vcn, SPARSE_LCN, alen, false) || + mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn)) { + _ntfs_bad_inode(&ni->vfs_inode); + } + goto out; } int attr_data_read_resident(struct ntfs_inode *ni, struct page *page) @@ -1217,6 +1299,11 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST svcn, evcn; u16 ro; + if (!ni) { + /* Is record corrupted? */ + return -ENOENT; + } + attr = ni_find_attr(ni, NULL, NULL, type, name, name_len, &vcn, NULL); if (!attr) { /* Is record corrupted? */ @@ -1232,6 +1319,10 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, } ro = le16_to_cpu(attr->nres.run_off); + + if (ro > le32_to_cpu(attr->size)) + return -EINVAL; + err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, svcn, Add2Ptr(attr, ro), le32_to_cpu(attr->size) - ro); if (err < 0) @@ -1530,7 +1621,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, struct ATTRIB *attr = NULL, *attr_b; struct ATTR_LIST_ENTRY *le, *le_b; struct mft_inode *mi, *mi_b; - CLST svcn, evcn1, next_svcn, lcn, len; + CLST svcn, evcn1, next_svcn, len; CLST vcn, end, clst_data; u64 total_size, valid_size, data_size; @@ -1606,8 +1697,9 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, } err = attr_allocate_clusters(sbi, run, vcn + clst_data, - hint + 1, len - clst_data, NULL, 0, - &alen, 0, &lcn); + hint + 1, len - clst_data, NULL, + ALLOCATE_DEF, &alen, 0, NULL, + NULL); if (err) goto out; @@ -1901,6 +1993,11 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) u16 le_sz; u16 roff = le16_to_cpu(attr->nres.run_off); + if (roff > le32_to_cpu(attr->size)) { + err = -EINVAL; + goto out; + } + run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn1 - 1, svcn, Add2Ptr(attr, roff), le32_to_cpu(attr->size) - roff); @@ -2020,7 +2117,7 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) return -ENOENT; if (!attr_b->non_res) { - u32 data_size = le32_to_cpu(attr->res.data_size); + u32 data_size = le32_to_cpu(attr_b->res.data_size); u32 from, to; if (vbo > data_size) @@ -2290,7 +2387,8 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) if (!attr_b->non_res) { /* Still resident. */ - char *data = Add2Ptr(attr_b, attr_b->res.data_off); + char *data = Add2Ptr(attr_b, + le16_to_cpu(attr_b->res.data_off)); memmove(data + bytes, data, bytes); memset(data, 0, bytes); @@ -2382,8 +2480,8 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) if (vbo <= ni->i_valid) ni->i_valid += bytes; - attr_b->nres.data_size = le64_to_cpu(data_size + bytes); - attr_b->nres.alloc_size = le64_to_cpu(alloc_size + bytes); + attr_b->nres.data_size = cpu_to_le64(data_size + bytes); + attr_b->nres.alloc_size = cpu_to_le64(alloc_size + bytes); /* ni->valid may be not equal valid_size (temporary). */ if (ni->i_valid > data_size + bytes) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index bad6d8a849a2..c0c6bcbc8c05 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -68,6 +68,11 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) run_init(&ni->attr_list.run); + if (run_off > le32_to_cpu(attr->size)) { + err = -EINVAL; + goto out; + } + err = run_unpack_ex(&ni->attr_list.run, ni->mi.sbi, ni->mi.rno, 0, le64_to_cpu(attr->nres.evcn), 0, Add2Ptr(attr, run_off), diff --git a/fs/ntfs3/bitfunc.c b/fs/ntfs3/bitfunc.c index 50d838093790..25a4d4896aa9 100644 --- a/fs/ntfs3/bitfunc.c +++ b/fs/ntfs3/bitfunc.c @@ -30,7 +30,7 @@ static const u8 zero_mask[] = { 0xFF, 0xFE, 0xFC, 0xF8, 0xF0, * * Return: True if all bits [bit, bit+nbits) are zeros "0". */ -bool are_bits_clear(const ulong *lmap, size_t bit, size_t nbits) +bool are_bits_clear(const void *lmap, size_t bit, size_t nbits) { size_t pos = bit & 7; const u8 *map = (u8 *)lmap + (bit >> 3); @@ -78,7 +78,7 @@ bool are_bits_clear(const ulong *lmap, size_t bit, size_t nbits) * * Return: True if all bits [bit, bit+nbits) are ones "1". */ -bool are_bits_set(const ulong *lmap, size_t bit, size_t nbits) +bool are_bits_set(const void *lmap, size_t bit, size_t nbits) { u8 mask; size_t pos = bit & 7; diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index e92bbd754365..723fb64e6531 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -59,14 +59,14 @@ void ntfs3_exit_bitmap(void) * * Return: -1 if not found. */ -static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, +static size_t wnd_scan(const void *buf, size_t wbit, u32 wpos, u32 wend, size_t to_alloc, size_t *prev_tail, size_t *b_pos, size_t *b_len) { while (wpos < wend) { size_t free_len; u32 free_bits, end; - u32 used = find_next_zero_bit(buf, wend, wpos); + u32 used = find_next_zero_bit_le(buf, wend, wpos); if (used >= wend) { if (*b_len < *prev_tail) { @@ -92,7 +92,7 @@ static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, * Now we have a fragment [wpos, wend) staring with 0. */ end = wpos + to_alloc - *prev_tail; - free_bits = find_next_bit(buf, min(end, wend), wpos); + free_bits = find_next_bit_le(buf, min(end, wend), wpos); free_len = *prev_tail + free_bits - wpos; @@ -504,7 +504,6 @@ static int wnd_rescan(struct wnd_bitmap *wnd) u8 cluster_bits = sbi->cluster_bits; u32 wbits = 8 * sb->s_blocksize; u32 used, frb; - const ulong *buf; size_t wpos, wbit, iw, vbo; struct buffer_head *bh = NULL; CLST lcn, clen; @@ -558,9 +557,7 @@ static int wnd_rescan(struct wnd_bitmap *wnd) goto out; } - buf = (ulong *)bh->b_data; - - used = bitmap_weight(buf, wbits); + used = ntfs_bitmap_weight_le(bh->b_data, wbits); if (used < wbits) { frb = wbits - used; wnd->free_bits[iw] = frb; @@ -574,7 +571,7 @@ static int wnd_rescan(struct wnd_bitmap *wnd) wbits = wnd->nbits - wbit; do { - used = find_next_zero_bit(buf, wbits, wpos); + used = find_next_zero_bit_le(bh->b_data, wbits, wpos); if (used > wpos && prev_tail) { wnd_add_free_ext(wnd, wbit + wpos - prev_tail, @@ -590,7 +587,7 @@ static int wnd_rescan(struct wnd_bitmap *wnd) break; } - frb = find_next_bit(buf, wbits, wpos); + frb = find_next_bit_le(bh->b_data, wbits, wpos); if (frb >= wbits) { /* Keep last free block. */ prev_tail += frb - wpos; @@ -661,7 +658,7 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) if (!wnd->bits_last) wnd->bits_last = wbits; - wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS); + wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN); if (!wnd->free_bits) return -ENOMEM; @@ -718,7 +715,6 @@ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) while (iw < wnd->nwnd && bits) { u32 tail, op; - ulong *buf; if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; @@ -732,11 +728,9 @@ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) break; } - buf = (ulong *)bh->b_data; - lock_buffer(bh); - __bitmap_clear(buf, wbit, op); + ntfs_bitmap_clear_le(bh->b_data, wbit, op); wnd->free_bits[iw] += op; @@ -771,7 +765,6 @@ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) while (iw < wnd->nwnd && bits) { u32 tail, op; - ulong *buf; if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; @@ -784,11 +777,10 @@ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) err = PTR_ERR(bh); break; } - buf = (ulong *)bh->b_data; lock_buffer(bh); - __bitmap_set(buf, wbit, op); + ntfs_bitmap_set_le(bh->b_data, wbit, op); wnd->free_bits[iw] -= op; set_buffer_uptodate(bh); @@ -809,6 +801,44 @@ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) } /* + * wnd_set_used_safe - Mark the bits range from bit to bit + bits as used. + * + * Unlikely wnd_set_used/wnd_set_free this function is not full trusted. + * It scans every bit in bitmap and marks free bit as used. + * @done - how many bits were marked as used. + * + * NOTE: normally *done should be 0. + */ +int wnd_set_used_safe(struct wnd_bitmap *wnd, size_t bit, size_t bits, + size_t *done) +{ + size_t i, from = 0, len = 0; + int err = 0; + + *done = 0; + for (i = 0; i < bits; i++) { + if (wnd_is_free(wnd, bit + i, 1)) { + if (!len) + from = bit + i; + len += 1; + } else if (len) { + err = wnd_set_used(wnd, from, len); + *done += len; + len = 0; + if (err) + break; + } + } + + if (len) { + /* last fragment. */ + err = wnd_set_used(wnd, from, len); + *done += len; + } + return err; +} + +/* * wnd_is_free_hlp * * Return: True if all clusters [bit, bit+bits) are free (bitmap only). @@ -836,7 +866,7 @@ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) if (IS_ERR(bh)) return false; - ret = are_bits_clear((ulong *)bh->b_data, wbit, op); + ret = are_bits_clear(bh->b_data, wbit, op); put_bh(bh); if (!ret) @@ -928,7 +958,7 @@ use_wnd: if (IS_ERR(bh)) goto out; - ret = are_bits_set((ulong *)bh->b_data, wbit, op); + ret = are_bits_set(bh->b_data, wbit, op); put_bh(bh); if (!ret) goto out; @@ -959,7 +989,6 @@ size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, size_t fnd, max_alloc, b_len, b_pos; size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; size_t to_alloc0 = to_alloc; - const ulong *buf; const struct e_node *e; const struct rb_node *pr, *cr; u8 log2_bits; @@ -1185,14 +1214,13 @@ Again: continue; } - buf = (ulong *)bh->b_data; - /* Scan range [wbit, zbit). */ if (wpos < wzbit) { /* Scan range [wpos, zbit). */ - fnd = wnd_scan(buf, wbit, wpos, wzbit, - to_alloc, &prev_tail, - &b_pos, &b_len); + fnd = wnd_scan(bh->b_data, wbit, wpos, + wzbit, to_alloc, + &prev_tail, &b_pos, + &b_len); if (fnd != MINUS_ONE_T) { put_bh(bh); goto found; @@ -1203,7 +1231,7 @@ Again: /* Scan range [zend, ebit). */ if (wzend < wbits) { - fnd = wnd_scan(buf, wbit, + fnd = wnd_scan(bh->b_data, wbit, max(wzend, wpos), wbits, to_alloc, &prev_tail, &b_pos, &b_len); @@ -1242,11 +1270,9 @@ Again: continue; } - buf = (ulong *)bh->b_data; - /* Scan range [wpos, eBits). */ - fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail, - &b_pos, &b_len); + fnd = wnd_scan(bh->b_data, wbit, wpos, wbits, to_alloc, + &prev_tail, &b_pos, &b_len); put_bh(bh); if (fnd != MINUS_ONE_T) goto found; @@ -1324,7 +1350,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) new_last = wbits; if (new_wnd != wnd->nwnd) { - new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS); + new_free = kmalloc_array(new_wnd, sizeof(u16), GFP_NOFS); if (!new_free) return -ENOMEM; @@ -1344,7 +1370,6 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) size_t frb; u64 vbo, lbo, bytes; struct buffer_head *bh; - ulong *buf; if (iw + 1 == new_wnd) wbits = new_last; @@ -1361,10 +1386,9 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) return -EIO; lock_buffer(bh); - buf = (ulong *)bh->b_data; - __bitmap_clear(buf, b0, blocksize * 8 - b0); - frb = wbits - bitmap_weight(buf, wbits); + ntfs_bitmap_clear_le(bh->b_data, b0, blocksize * 8 - b0); + frb = wbits - ntfs_bitmap_weight_le(bh->b_data, wbits); wnd->total_zeroes += frb - wnd->free_bits[iw]; wnd->free_bits[iw] = frb; @@ -1411,7 +1435,6 @@ int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) CLST lcn_from = bytes_to_cluster(sbi, range->start); size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); u32 wbit = lcn_from & (wbits - 1); - const ulong *buf; CLST lcn_to; if (!minlen) @@ -1424,7 +1447,7 @@ int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); - for (; iw < wnd->nbits; iw++, wbit = 0) { + for (; iw < wnd->nwnd; iw++, wbit = 0) { CLST lcn_wnd = iw * wbits; struct buffer_head *bh; @@ -1446,10 +1469,8 @@ int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) break; } - buf = (ulong *)bh->b_data; - for (; wbit < wbits; wbit++) { - if (!test_bit(wbit, buf)) { + if (!test_bit_le(wbit, bh->b_data)) { if (!len) lcn = lcn_wnd + wbit; len += 1; @@ -1481,3 +1502,70 @@ out: return err; } + +#if BITS_PER_LONG == 64 +typedef __le64 bitmap_ulong; +#define cpu_to_ul(x) cpu_to_le64(x) +#define ul_to_cpu(x) le64_to_cpu(x) +#else +typedef __le32 bitmap_ulong; +#define cpu_to_ul(x) cpu_to_le32(x) +#define ul_to_cpu(x) le32_to_cpu(x) +#endif + +void ntfs_bitmap_set_le(void *map, unsigned int start, int len) +{ + bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + bitmap_ulong mask_to_set = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); + + while (len - bits_to_set >= 0) { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = cpu_to_ul(~0UL); + p++; + } + if (len) { + mask_to_set &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); + *p |= mask_to_set; + } +} + +void ntfs_bitmap_clear_le(void *map, unsigned int start, int len) +{ + bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + bitmap_ulong mask_to_clear = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = cpu_to_ul(~0UL); + p++; + } + if (len) { + mask_to_clear &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); + *p &= ~mask_to_clear; + } +} + +unsigned int ntfs_bitmap_weight_le(const void *bitmap, int bits) +{ + const ulong *bmp = bitmap; + unsigned int k, lim = bits / BITS_PER_LONG; + unsigned int w = 0; + + for (k = 0; k < lim; k++) + w += hweight_long(bmp[k]); + + if (bits % BITS_PER_LONG) { + w += hweight_long(ul_to_cpu(((bitmap_ulong *)bitmap)[k]) & + BITMAP_LAST_WORD_MASK(bits)); + } + + return w; +} diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index fb438d604040..063a6654199b 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -26,8 +26,8 @@ int ntfs_utf16_to_nls(struct ntfs_sb_info *sbi, const __le16 *name, u32 len, if (!nls) { /* UTF-16 -> UTF-8 */ - ret = utf16s_to_utf8s(name, len, UTF16_LITTLE_ENDIAN, buf, - buf_len); + ret = utf16s_to_utf8s((wchar_t *)name, len, UTF16_LITTLE_ENDIAN, + buf, buf_len); buf[ret] = '\0'; return ret; } diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index c5e4a886593d..e5399ebc3a2b 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -122,31 +122,15 @@ static int ntfs_extend_initialized_size(struct file *file, bits = sbi->cluster_bits; vcn = pos >> bits; - err = attr_data_get_block(ni, vcn, 0, &lcn, &clen, - NULL); + err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, + false); if (err) goto out; if (lcn == SPARSE_LCN) { - loff_t vbo = (loff_t)vcn << bits; - loff_t to = vbo + ((loff_t)clen << bits); - - if (to <= new_valid) { - ni->i_valid = to; - pos = to; - goto next; - } - - if (vbo < pos) { - pos = vbo; - } else { - to = (new_valid >> bits) << bits; - if (pos < to) { - ni->i_valid = to; - pos = to; - goto next; - } - } + pos = ((loff_t)clen + vcn) << bits; + ni->i_valid = pos; + goto next; } } @@ -196,18 +180,18 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) struct address_space *mapping = inode->i_mapping; u32 blocksize = 1 << inode->i_blkbits; pgoff_t idx = vbo >> PAGE_SHIFT; - u32 z_start = vbo & (PAGE_SIZE - 1); + u32 from = vbo & (PAGE_SIZE - 1); pgoff_t idx_end = (vbo_to + PAGE_SIZE - 1) >> PAGE_SHIFT; loff_t page_off; struct buffer_head *head, *bh; - u32 bh_next, bh_off, z_end; + u32 bh_next, bh_off, to; sector_t iblock; struct page *page; - for (; idx < idx_end; idx += 1, z_start = 0) { + for (; idx < idx_end; idx += 1, from = 0) { page_off = (loff_t)idx << PAGE_SHIFT; - z_end = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off) - : PAGE_SIZE; + to = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off) + : PAGE_SIZE; iblock = page_off >> inode->i_blkbits; page = find_or_create_page(mapping, idx, @@ -224,7 +208,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) do { bh_next = bh_off + blocksize; - if (bh_next <= z_start || bh_off >= z_end) + if (bh_next <= from || bh_off >= to) continue; if (!buffer_mapped(bh)) { @@ -258,7 +242,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) } while (bh_off = bh_next, iblock += 1, head != (bh = bh->b_this_page)); - zero_user_segment(page, z_start, z_end); + zero_user_segment(page, from, to); unlock_page(page); put_page(page); @@ -270,81 +254,6 @@ out: } /* - * ntfs_sparse_cluster - Helper function to zero a new allocated clusters. - * - * NOTE: 512 <= cluster size <= 2M - */ -void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, - CLST len) -{ - struct address_space *mapping = inode->i_mapping; - struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; - u64 vbo = (u64)vcn << sbi->cluster_bits; - u64 bytes = (u64)len << sbi->cluster_bits; - u32 blocksize = 1 << inode->i_blkbits; - pgoff_t idx0 = page0 ? page0->index : -1; - loff_t vbo_clst = vbo & sbi->cluster_mask_inv; - loff_t end = ntfs_up_cluster(sbi, vbo + bytes); - pgoff_t idx = vbo_clst >> PAGE_SHIFT; - u32 from = vbo_clst & (PAGE_SIZE - 1); - pgoff_t idx_end = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - loff_t page_off; - u32 to; - bool partial; - struct page *page; - - for (; idx < idx_end; idx += 1, from = 0) { - page = idx == idx0 ? page0 : grab_cache_page(mapping, idx); - - if (!page) - continue; - - page_off = (loff_t)idx << PAGE_SHIFT; - to = (page_off + PAGE_SIZE) > end ? (end - page_off) - : PAGE_SIZE; - partial = false; - - if ((from || PAGE_SIZE != to) && - likely(!page_has_buffers(page))) { - create_empty_buffers(page, blocksize, 0); - } - - if (page_has_buffers(page)) { - struct buffer_head *head, *bh; - u32 bh_off = 0; - - bh = head = page_buffers(page); - do { - u32 bh_next = bh_off + blocksize; - - if (from <= bh_off && bh_next <= to) { - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - } else if (!buffer_uptodate(bh)) { - partial = true; - } - bh_off = bh_next; - } while (head != (bh = bh->b_this_page)); - } - - zero_user_segment(page, from, to); - - if (!partial) { - if (!PageUptodate(page)) - SetPageUptodate(page); - set_page_dirty(page); - } - - if (idx != idx0) { - unlock_page(page); - put_page(page); - } - cond_resched(); - } - mark_inode_dirty(inode); -} - -/* * ntfs_file_mmap - file_operations::mmap */ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) @@ -385,13 +294,9 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) for (; vcn < end; vcn += len) { err = attr_data_get_block(ni, vcn, 1, &lcn, - &len, &new); + &len, &new, true); if (err) goto out; - - if (!new) - continue; - ntfs_sparse_cluster(inode, NULL, vcn, 1); } } @@ -432,7 +337,6 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, err = ntfs_set_size(inode, end); if (err) goto out; - inode->i_size = end; } if (extend_init && !is_compressed(ni)) { @@ -535,7 +439,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) struct ntfs_sb_info *sbi = sb->s_fs_info; struct ntfs_inode *ni = ntfs_i(inode); loff_t end = vbo + len; - loff_t vbo_down = round_down(vbo, PAGE_SIZE); + loff_t vbo_down = round_down(vbo, max_t(unsigned long, + sbi->cluster_size, PAGE_SIZE)); bool is_supported_holes = is_sparsed(ni) || is_compressed(ni); loff_t i_size, new_size; bool map_locked; @@ -588,11 +493,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) u32 frame_size; loff_t mask, vbo_a, end_a, tmp; - err = filemap_write_and_wait_range(mapping, vbo, end - 1); - if (err) - goto out; - - err = filemap_write_and_wait_range(mapping, end, LLONG_MAX); + err = filemap_write_and_wait_range(mapping, vbo_down, + LLONG_MAX); if (err) goto out; @@ -685,47 +587,45 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) if (err) goto out; - /* - * Allocate clusters, do not change 'valid' size. - */ - err = ntfs_set_size(inode, new_size); - if (err) - goto out; + if (new_size > i_size) { + /* + * Allocate clusters, do not change 'valid' size. + */ + err = ntfs_set_size(inode, new_size); + if (err) + goto out; + } if (is_supported_holes) { - CLST vcn_v = ni->i_valid >> sbi->cluster_bits; CLST vcn = vbo >> sbi->cluster_bits; CLST cend = bytes_to_cluster(sbi, end); + CLST cend_v = bytes_to_cluster(sbi, ni->i_valid); CLST lcn, clen; bool new; + if (cend_v > cend) + cend_v = cend; + /* - * Allocate but do not zero new clusters. (see below comments) - * This breaks security: One can read unused on-disk areas. + * Allocate and zero new clusters. * Zeroing these clusters may be too long. - * Maybe we should check here for root rights? + */ + for (; vcn < cend_v; vcn += clen) { + err = attr_data_get_block(ni, vcn, cend_v - vcn, + &lcn, &clen, &new, + true); + if (err) + goto out; + } + /* + * Allocate but not zero new clusters. */ for (; vcn < cend; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, - &lcn, &clen, &new); + &lcn, &clen, &new, + false); if (err) goto out; - if (!new || vcn >= vcn_v) - continue; - - /* - * Unwritten area. - * NTFS is not able to store several unwritten areas. - * Activate 'ntfs_sparse_cluster' to zero new allocated clusters. - * - * Dangerous in case: - * 1G of sparsed clusters + 1 cluster of data => - * valid_size == 1G + 1 cluster - * fallocate(1G) will zero 1G and this can be very long - * xfstest 016/086 will fail without 'ntfs_sparse_cluster'. - */ - ntfs_sparse_cluster(inode, NULL, vcn, - min(vcn_v - vcn, clen)); } } @@ -736,6 +636,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) &ni->file.run, i_size, &ni->i_valid, true, NULL); ni_unlock(ni); + } else if (new_size > i_size) { + inode->i_size = new_size; } } @@ -779,7 +681,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto out; if (ia_valid & ATTR_SIZE) { - loff_t oldsize = inode->i_size; + loff_t newsize, oldsize; if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { /* Should never be here, see ntfs_file_open(). */ @@ -787,16 +689,19 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto out; } inode_dio_wait(inode); + oldsize = inode->i_size; + newsize = attr->ia_size; - if (attr->ia_size <= oldsize) - err = ntfs_truncate(inode, attr->ia_size); - else if (attr->ia_size > oldsize) - err = ntfs_extend(inode, attr->ia_size, 0, NULL); + if (newsize <= oldsize) + err = ntfs_truncate(inode, newsize); + else + err = ntfs_extend(inode, newsize, 0, NULL); if (err) goto out; ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + inode->i_size = newsize; } setattr_copy(mnt_userns, inode, attr); @@ -946,8 +851,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) frame_vbo = valid & ~(frame_size - 1); off = valid & (frame_size - 1); - err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 0, &lcn, - &clen, NULL); + err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 1, &lcn, + &clen, NULL, false); if (err) goto out; diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 381a38a06ec2..f1df52dfab74 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -557,7 +557,7 @@ static int ni_repack(struct ntfs_inode *ni) } if (!mi_p) { - /* Do not try if not enogh free space. */ + /* Do not try if not enough free space. */ if (le32_to_cpu(mi->mrec->used) + 8 >= rs) continue; @@ -568,6 +568,12 @@ static int ni_repack(struct ntfs_inode *ni) } roff = le16_to_cpu(attr->nres.run_off); + + if (roff > le32_to_cpu(attr->size)) { + err = -EINVAL; + break; + } + err = run_unpack(&run, sbi, ni->mi.rno, svcn, evcn, svcn, Add2Ptr(attr, roff), le32_to_cpu(attr->size) - roff); @@ -1589,6 +1595,9 @@ int ni_delete_all(struct ntfs_inode *ni) asize = le32_to_cpu(attr->size); roff = le16_to_cpu(attr->nres.run_off); + if (roff > asize) + return -EINVAL; + /* run==1 means unpack and deallocate. */ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, Add2Ptr(attr, roff), asize - roff); @@ -1636,6 +1645,7 @@ struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni, { struct ATTRIB *attr = NULL; struct ATTR_FILE_NAME *fname; + struct le_str *fns; if (le) *le = NULL; @@ -1659,8 +1669,8 @@ next: if (uni->len != fname->name_len) goto next; - if (ntfs_cmp_names_cpu(uni, (struct le_str *)&fname->name_len, NULL, - false)) + fns = (struct le_str *)&fname->name_len; + if (ntfs_cmp_names_cpu(uni, fns, NULL, false)) goto next; return fname; @@ -2214,7 +2224,7 @@ int ni_decompress_file(struct ntfs_inode *ni) for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, - &clen, &new); + &clen, &new, false); if (err) goto out; } @@ -2291,6 +2301,11 @@ remove_wof: asize = le32_to_cpu(attr->size); roff = le16_to_cpu(attr->nres.run_off); + if (roff > asize) { + err = -EINVAL; + goto out; + } + /*run==1 Means unpack and deallocate. */ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, Add2Ptr(attr, roff), asize - roff); @@ -2997,6 +3012,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, struct NTFS_DE *de) { int err; + struct ntfs_sb_info *sbi = ni->mi.sbi; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; struct mft_inode *mi; @@ -3004,6 +3020,19 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, struct ATTR_FILE_NAME *de_name = (struct ATTR_FILE_NAME *)(de + 1); u16 de_key_size = le16_to_cpu(de->key_size); + if (sbi->options->windows_names && + !valid_windows_name(sbi, (struct le_str *)&de_name->name_len)) + return -EINVAL; + + /* If option "hide_dot_files" then set hidden attribute for dot files. */ + if (ni->mi.sbi->options->hide_dot_files) { + if (de_name->name_len > 0 && + le16_to_cpu(de_name->name[0]) == '.') + ni->std_fa |= FILE_ATTRIBUTE_HIDDEN; + else + ni->std_fa &= ~FILE_ATTRIBUTE_HIDDEN; + } + mi_get_ref(&ni->mi, &de->ref); mi_get_ref(&dir_ni->mi, &de_name->home); @@ -3022,7 +3051,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), de_name, de_key_size); /* Insert new name into directory. */ - err = indx_insert_entry(&dir_ni->dir, dir_ni, de, ni->mi.sbi, NULL, 0); + err = indx_insert_entry(&dir_ni->dir, dir_ni, de, sbi, NULL, 0); if (err) ni_remove_attr_le(ni, attr, mi, le); @@ -3265,6 +3294,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) modified = true; } + /* std attribute is always in primary MFT record. */ if (modified) ni->mi.dirty = true; diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 0d611a6c5511..c6eb371a3695 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -1132,7 +1132,7 @@ static int read_log_page(struct ntfs_log *log, u32 vbo, return -EINVAL; if (!*buffer) { - to_free = kmalloc(bytes, GFP_NOFS); + to_free = kmalloc(log->page_size, GFP_NOFS); if (!to_free) return -ENOMEM; *buffer = to_free; @@ -1180,10 +1180,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, struct restart_info *info) { u32 skip, vbo; - struct RESTART_HDR *r_page = kmalloc(DefaultLogPageSize, GFP_NOFS); - - if (!r_page) - return -ENOMEM; + struct RESTART_HDR *r_page = NULL; /* Determine which restart area we are looking for. */ if (first) { @@ -1197,7 +1194,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, /* Loop continuously until we succeed. */ for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) { bool usa_error; - u32 sys_page_size; bool brst, bchk; struct RESTART_AREA *ra; @@ -1251,24 +1247,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, goto check_result; } - /* Read the entire restart area. */ - sys_page_size = le32_to_cpu(r_page->sys_page_size); - if (DefaultLogPageSize != sys_page_size) { - kfree(r_page); - r_page = kzalloc(sys_page_size, GFP_NOFS); - if (!r_page) - return -ENOMEM; - - if (read_log_page(log, vbo, - (struct RECORD_PAGE_HDR **)&r_page, - &usa_error)) { - /* Ignore any errors. */ - kfree(r_page); - r_page = NULL; - continue; - } - } - if (is_client_area_valid(r_page, usa_error)) { info->valid_page = true; ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off)); @@ -2727,6 +2705,9 @@ static inline bool check_attr(const struct MFT_REC *rec, return false; } + if (run_off > asize) + return false; + if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn, Add2Ptr(attr, run_off), asize - run_off) < 0) { return false; @@ -3048,7 +3029,7 @@ static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe, struct NEW_ATTRIBUTE_SIZES *new_sz; struct ATTR_FILE_NAME *fname; struct OpenAttr *oa, *oa2; - u32 nsize, t32, asize, used, esize, bmp_off, bmp_bits; + u32 nsize, t32, asize, used, esize, off, bits; u16 id, id2; u32 record_size = sbi->record_size; u64 t64; @@ -3635,30 +3616,28 @@ move_data: break; case SetBitsInNonresidentBitMap: - bmp_off = - le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off); - bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits); + off = le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off); + bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits); - if (cbo + (bmp_off + 7) / 8 > lco || - cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) { + if (cbo + (off + 7) / 8 > lco || + cbo + ((off + bits + 7) / 8) > lco) { goto dirty_vol; } - __bitmap_set(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits); + ntfs_bitmap_set_le(Add2Ptr(buffer_le, roff), off, bits); a_dirty = true; break; case ClearBitsInNonresidentBitMap: - bmp_off = - le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off); - bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits); + off = le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off); + bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits); - if (cbo + (bmp_off + 7) / 8 > lco || - cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) { + if (cbo + (off + 7) / 8 > lco || + cbo + ((off + bits + 7) / 8) > lco) { goto dirty_vol; } - __bitmap_clear(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits); + ntfs_bitmap_clear_le(Add2Ptr(buffer_le, roff), off, bits); a_dirty = true; break; @@ -4771,6 +4750,12 @@ fake_attr: u16 roff = le16_to_cpu(attr->nres.run_off); CLST svcn = le64_to_cpu(attr->nres.svcn); + if (roff > t32) { + kfree(oa->attr); + oa->attr = NULL; + goto fake_attr; + } + err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn, le64_to_cpu(attr->nres.evcn), svcn, Add2Ptr(attr, roff), t32 - roff); @@ -4839,8 +4824,7 @@ next_dirty_page_vcn: goto out; } attr = oa->attr; - t64 = le64_to_cpu(attr->nres.alloc_size); - if (size > t64) { + if (size > le64_to_cpu(attr->nres.alloc_size)) { attr->nres.valid_size = attr->nres.data_size = attr->nres.alloc_size = cpu_to_le64(size); } diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 4ed15f64b17f..567563771bf8 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -98,6 +98,30 @@ const __le16 WOF_NAME[17] = { }; #endif +static const __le16 CON_NAME[3] = { + cpu_to_le16('C'), cpu_to_le16('O'), cpu_to_le16('N'), +}; + +static const __le16 NUL_NAME[3] = { + cpu_to_le16('N'), cpu_to_le16('U'), cpu_to_le16('L'), +}; + +static const __le16 AUX_NAME[3] = { + cpu_to_le16('A'), cpu_to_le16('U'), cpu_to_le16('X'), +}; + +static const __le16 PRN_NAME[3] = { + cpu_to_le16('P'), cpu_to_le16('R'), cpu_to_le16('N'), +}; + +static const __le16 COM_NAME[3] = { + cpu_to_le16('C'), cpu_to_le16('O'), cpu_to_le16('M'), +}; + +static const __le16 LPT_NAME[3] = { + cpu_to_le16('L'), cpu_to_le16('P'), cpu_to_le16('T'), +}; + // clang-format on /* @@ -322,35 +346,6 @@ out: } /* - * ntfs_query_def - * - * Return: Current ATTR_DEF_ENTRY for given attribute type. - */ -const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi, - enum ATTR_TYPE type) -{ - int type_in = le32_to_cpu(type); - size_t min_idx = 0; - size_t max_idx = sbi->def_entries - 1; - - while (min_idx <= max_idx) { - size_t i = min_idx + ((max_idx - min_idx) >> 1); - const struct ATTR_DEF_ENTRY *entry = sbi->def_table + i; - int diff = le32_to_cpu(entry->type) - type_in; - - if (!diff) - return entry; - if (diff < 0) - min_idx = i + 1; - else if (i) - max_idx = i - 1; - else - return NULL; - } - return NULL; -} - -/* * ntfs_look_for_free_space - Look for a free space in bitmap. */ int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, @@ -449,6 +444,39 @@ up_write: } /* + * ntfs_check_for_free_space + * + * Check if it is possible to allocate 'clen' clusters and 'mlen' Mft records + */ +bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen) +{ + size_t free, zlen, avail; + struct wnd_bitmap *wnd; + + wnd = &sbi->used.bitmap; + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + free = wnd_zeroes(wnd); + zlen = min_t(size_t, NTFS_MIN_MFT_ZONE, wnd_zone_len(wnd)); + up_read(&wnd->rw_lock); + + if (free < zlen + clen) + return false; + + avail = free - (zlen + clen); + + wnd = &sbi->mft.bitmap; + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); + free = wnd_zeroes(wnd); + zlen = wnd_zone_len(wnd); + up_read(&wnd->rw_lock); + + if (free >= zlen + mlen) + return true; + + return avail >= bytes_to_cluster(sbi, mlen << sbi->record_bits); +} + +/* * ntfs_extend_mft - Allocate additional MFT records. * * sbi->mft.bitmap is locked for write. @@ -475,7 +503,7 @@ static int ntfs_extend_mft(struct ntfs_sb_info *sbi) struct ATTRIB *attr; struct wnd_bitmap *wnd = &sbi->mft.bitmap; - new_mft_total = (wnd->nbits + MFT_INCREASE_CHUNK + 127) & (CLST)~127; + new_mft_total = ALIGN(wnd->nbits + NTFS_MFT_INCREASE_STEP, 128); new_mft_bytes = (u64)new_mft_total << sbi->record_bits; /* Step 1: Resize $MFT::DATA. */ @@ -618,13 +646,13 @@ next: NULL, 0, NULL, NULL)) goto next; - __clear_bit(ir - MFT_REC_RESERVED, + __clear_bit_le(ir - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); } } /* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */ - zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap, + zbit = find_next_zero_bit_le(&sbi->mft.reserved_bitmap, MFT_REC_FREE, MFT_REC_RESERVED); if (zbit >= MFT_REC_FREE) { sbi->mft.next_reserved = MFT_REC_FREE; @@ -692,7 +720,7 @@ found: if (*rno >= MFT_REC_FREE) wnd_set_used(wnd, *rno, 1); else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) - __set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); + __set_bit_le(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); out: if (!mft) @@ -720,7 +748,7 @@ void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft) else wnd_set_free(wnd, rno, 1); } else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) { - __clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); + __clear_bit_le(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); } if (rno < wnd_zone_bit(wnd)) @@ -830,7 +858,6 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) if (!(sbi->flags & NTFS_FLAGS_MFTMIRR)) return; - err = 0; bytes = sbi->mft.recs_mirr << sbi->record_bits; block1 = sbi->mft.lbo >> sb->s_blocksize_bits; block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits; @@ -860,8 +887,7 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) put_bh(bh1); bh1 = NULL; - if (wait) - err = sync_dirty_buffer(bh2); + err = wait ? sync_dirty_buffer(bh2) : 0; put_bh(bh2); if (err) @@ -1849,9 +1875,10 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sdh = resident_data(attr); + root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); if (root_sdh->type != ATTR_ZERO || - root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) { + root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || + offsetof(struct INDEX_ROOT, ihdr) + root_sdh->ihdr.used > attr->res.data_size) { err = -EINVAL; goto out; } @@ -1867,9 +1894,10 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sii = resident_data(attr); + root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); if (root_sii->type != ATTR_ZERO || - root_sii->rule != NTFS_COLLATION_TYPE_UINT) { + root_sii->rule != NTFS_COLLATION_TYPE_UINT || + offsetof(struct INDEX_ROOT, ihdr) + root_sii->ihdr.used > attr->res.data_size) { err = -EINVAL; goto out; } @@ -2502,3 +2530,83 @@ int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim) return 0; } + +static inline bool name_has_forbidden_chars(const struct le_str *fname) +{ + int i, ch; + + /* check for forbidden chars */ + for (i = 0; i < fname->len; ++i) { + ch = le16_to_cpu(fname->name[i]); + + /* control chars */ + if (ch < 0x20) + return true; + + switch (ch) { + /* disallowed by Windows */ + case '\\': + case '/': + case ':': + case '*': + case '?': + case '<': + case '>': + case '|': + case '\"': + return true; + + default: + /* allowed char */ + break; + } + } + + /* file names cannot end with space or . */ + if (fname->len > 0) { + ch = le16_to_cpu(fname->name[fname->len - 1]); + if (ch == ' ' || ch == '.') + return true; + } + + return false; +} + +static inline bool is_reserved_name(struct ntfs_sb_info *sbi, + const struct le_str *fname) +{ + int port_digit; + const __le16 *name = fname->name; + int len = fname->len; + u16 *upcase = sbi->upcase; + + /* check for 3 chars reserved names (device names) */ + /* name by itself or with any extension is forbidden */ + if (len == 3 || (len > 3 && le16_to_cpu(name[3]) == '.')) + if (!ntfs_cmp_names(name, 3, CON_NAME, 3, upcase, false) || + !ntfs_cmp_names(name, 3, NUL_NAME, 3, upcase, false) || + !ntfs_cmp_names(name, 3, AUX_NAME, 3, upcase, false) || + !ntfs_cmp_names(name, 3, PRN_NAME, 3, upcase, false)) + return true; + + /* check for 4 chars reserved names (port name followed by 1..9) */ + /* name by itself or with any extension is forbidden */ + if (len == 4 || (len > 4 && le16_to_cpu(name[4]) == '.')) { + port_digit = le16_to_cpu(name[3]); + if (port_digit >= '1' && port_digit <= '9') + if (!ntfs_cmp_names(name, 3, COM_NAME, 3, upcase, false) || + !ntfs_cmp_names(name, 3, LPT_NAME, 3, upcase, false)) + return true; + } + + return false; +} + +/* + * valid_windows_name - Check if a file name is valid in Windows. + */ +bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *fname) +{ + return !name_has_forbidden_chars(fname) && + !is_reserved_name(sbi, fname); +} diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 440328147e7e..51ab75954640 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -47,7 +47,7 @@ static int cmp_fnames(const void *key1, size_t l1, const void *key2, size_t l2, if (l2 < fsize2) return -1; - both_case = f2->type != FILE_NAME_DOS /*&& !sbi->options.nocase*/; + both_case = f2->type != FILE_NAME_DOS && !sbi->options->nocase; if (!l1) { const struct le_str *s2 = (struct le_str *)&f2->name_len; @@ -323,7 +323,7 @@ static int indx_mark_used(struct ntfs_index *indx, struct ntfs_inode *ni, if (err) return err; - __set_bit(bit - bbuf.bit, bbuf.buf); + __set_bit_le(bit - bbuf.bit, bbuf.buf); bmp_buf_put(&bbuf, true); @@ -343,7 +343,7 @@ static int indx_mark_free(struct ntfs_index *indx, struct ntfs_inode *ni, if (err) return err; - __clear_bit(bit - bbuf.bit, bbuf.buf); + __clear_bit_le(bit - bbuf.bit, bbuf.buf); bmp_buf_put(&bbuf, true); @@ -457,7 +457,7 @@ next_run: static bool scan_for_free(const ulong *buf, u32 bit, u32 bits, size_t *ret) { - size_t pos = find_next_zero_bit(buf, bits, bit); + size_t pos = find_next_zero_bit_le(buf, bits, bit); if (pos >= bits) return false; @@ -489,7 +489,7 @@ static int indx_find_free(struct ntfs_index *indx, struct ntfs_inode *ni, if (!b->non_res) { u32 nbits = 8 * le32_to_cpu(b->res.data_size); - size_t pos = find_next_zero_bit(resident_data(b), nbits, 0); + size_t pos = find_next_zero_bit_le(resident_data(b), nbits, 0); if (pos < nbits) *bit = pos; @@ -505,7 +505,7 @@ static int indx_find_free(struct ntfs_index *indx, struct ntfs_inode *ni, static bool scan_for_used(const ulong *buf, u32 bit, u32 bits, size_t *ret) { - size_t pos = find_next_bit(buf, bits, bit); + size_t pos = find_next_bit_le(buf, bits, bit); if (pos >= bits) return false; @@ -536,7 +536,7 @@ int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit) if (!b->non_res) { u32 nbits = le32_to_cpu(b->res.data_size) * 8; - size_t pos = find_next_bit(resident_data(b), nbits, from); + size_t pos = find_next_bit_le(resident_data(b), nbits, from); if (pos < nbits) *bit = pos; @@ -605,11 +605,58 @@ static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr, return e; } +/* + * index_hdr_check + * + * return true if INDEX_HDR is valid + */ +static bool index_hdr_check(const struct INDEX_HDR *hdr, u32 bytes) +{ + u32 end = le32_to_cpu(hdr->used); + u32 tot = le32_to_cpu(hdr->total); + u32 off = le32_to_cpu(hdr->de_off); + + if (!IS_ALIGNED(off, 8) || tot > bytes || end > tot || + off + sizeof(struct NTFS_DE) > end) { + /* incorrect index buffer. */ + return false; + } + + return true; +} + +/* + * index_buf_check + * + * return true if INDEX_BUFFER seems is valid + */ +static bool index_buf_check(const struct INDEX_BUFFER *ib, u32 bytes, + const CLST *vbn) +{ + const struct NTFS_RECORD_HEADER *rhdr = &ib->rhdr; + u16 fo = le16_to_cpu(rhdr->fix_off); + u16 fn = le16_to_cpu(rhdr->fix_num); + + if (bytes <= offsetof(struct INDEX_BUFFER, ihdr) || + rhdr->sign != NTFS_INDX_SIGNATURE || + fo < sizeof(struct INDEX_BUFFER) + /* Check index buffer vbn. */ + || (vbn && *vbn != le64_to_cpu(ib->vbn)) || (fo % sizeof(short)) || + fo + fn * sizeof(short) >= bytes || + fn != ((bytes >> SECTOR_SHIFT) + 1)) { + /* incorrect index buffer. */ + return false; + } + + return index_hdr_check(&ib->ihdr, + bytes - offsetof(struct INDEX_BUFFER, ihdr)); +} + void fnd_clear(struct ntfs_fnd *fnd) { int i; - for (i = 0; i < fnd->level; i++) { + for (i = fnd->level - 1; i >= 0; i--) { struct indx_node *n = fnd->nodes[i]; if (!n) @@ -625,9 +672,8 @@ void fnd_clear(struct ntfs_fnd *fnd) static int fnd_push(struct ntfs_fnd *fnd, struct indx_node *n, struct NTFS_DE *e) { - int i; + int i = fnd->level; - i = fnd->level; if (i < 0 || i >= ARRAY_SIZE(fnd->nodes)) return -EINVAL; fnd->nodes[i] = n; @@ -820,9 +866,16 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, u32 t32; const struct INDEX_ROOT *root = resident_data(attr); + t32 = le32_to_cpu(attr->res.data_size); + if (t32 <= offsetof(struct INDEX_ROOT, ihdr) || + !index_hdr_check(&root->ihdr, + t32 - offsetof(struct INDEX_ROOT, ihdr))) { + goto out; + } + /* Check root fields. */ if (!root->index_block_clst) - return -EINVAL; + goto out; indx->type = type; indx->idx2vbn_bits = __ffs(root->index_block_clst); @@ -834,19 +887,19 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, if (t32 < sbi->cluster_size) { /* Index record is smaller than a cluster, use 512 blocks. */ if (t32 != root->index_block_clst * SECTOR_SIZE) - return -EINVAL; + goto out; /* Check alignment to a cluster. */ if ((sbi->cluster_size >> SECTOR_SHIFT) & (root->index_block_clst - 1)) { - return -EINVAL; + goto out; } indx->vbn2vbo_bits = SECTOR_SHIFT; } else { /* Index record must be a multiple of cluster size. */ if (t32 != root->index_block_clst << sbi->cluster_bits) - return -EINVAL; + goto out; indx->vbn2vbo_bits = sbi->cluster_bits; } @@ -854,7 +907,14 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, init_rwsem(&indx->run_lock); indx->cmp = get_cmp_func(root); - return indx->cmp ? 0 : -EINVAL; + if (!indx->cmp) + goto out; + + return 0; + +out: + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); + return -EINVAL; } static struct indx_node *indx_new(struct ntfs_index *indx, @@ -1012,11 +1072,24 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, goto out; ok: + if (!index_buf_check(ib, bytes, &vbn)) { + ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); + ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + err = -EINVAL; + goto out; + } + if (err == -E_NTFS_FIXUP) { ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0); err = 0; } + /* check for index header length */ + if (offsetof(struct INDEX_BUFFER, ihdr) + ib->ihdr.used > bytes) { + err = -EINVAL; + goto out; + } + in->index = ib; *node = in; @@ -1341,8 +1414,8 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, run_init(&run); - err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, 0, &alen, 0, - NULL); + err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, ALLOCATE_DEF, + &alen, 0, NULL, NULL); if (err) goto out; @@ -1440,6 +1513,9 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, goto out1; } + if (in->name == I30_NAME) + ni->vfs_inode.i_size = data_size; + *vbn = bit << indx->idx2vbn_bits; return 0; @@ -1593,9 +1669,9 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, if (err) { /* Restore root. */ - if (mi_resize_attr(mi, attr, -ds_root)) + if (mi_resize_attr(mi, attr, -ds_root)) { memcpy(attr, a_root, asize); - else { + } else { /* Bug? */ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } @@ -1947,7 +2023,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, if (bit >= nbits) return 0; - pos = find_next_bit(bm, nbits, bit); + pos = find_next_bit_le(bm, nbits, bit); if (pos < nbits) return 0; } else { @@ -1973,6 +2049,9 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, if (err) return err; + if (in->name == I30_NAME) + ni->vfs_inode.i_size = new_data; + bpb = bitmap_size(bit); if (bpb * 8 == nbits) return 0; @@ -2115,9 +2194,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx, fnd->de[level] = e; indx_write(indx, ni, n, 0); - /* Check to see if this action created an empty leaf. */ - if (ib_is_leaf(ib) && ib_is_empty(ib)) + if (ib_is_leaf(ib) && ib_is_empty(ib)) { + /* An empty leaf. */ return 0; + } out: fnd_clear(fnd); @@ -2455,6 +2535,9 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, 0, NULL, false, NULL); + if (in->name == I30_NAME) + ni->vfs_inode.i_size = 0; + err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len, false, NULL); run_close(&indx->alloc_run); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index d5a3afbbbfd8..20b953871574 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -81,7 +81,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, le16_to_cpu(ref->seq), le16_to_cpu(rec->seq)); goto out; } else if (!is_rec_inuse(rec)) { - err = -EINVAL; + err = -ESTALE; ntfs_err(sb, "Inode r=%x is not in use!", (u32)ino); goto out; } @@ -92,8 +92,10 @@ static struct inode *ntfs_read_mft(struct inode *inode, goto out; } - if (!is_rec_base(rec)) - goto Ok; + if (!is_rec_base(rec)) { + err = -EINVAL; + goto out; + } /* Record should contain $I30 root. */ is_dir = rec->flags & RECORD_FLAG_DIR; @@ -129,6 +131,16 @@ next_attr: rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size); asize = le32_to_cpu(attr->size); + if (le16_to_cpu(attr->name_off) + attr->name_len > asize) + goto out; + + if (attr->non_res) { + t64 = le64_to_cpu(attr->nres.alloc_size); + if (le64_to_cpu(attr->nres.data_size) > t64 || + le64_to_cpu(attr->nres.valid_size) > t64) + goto out; + } + switch (attr->type) { case ATTR_STD: if (attr->non_res || @@ -364,7 +376,13 @@ next_attr: attr_unpack_run: roff = le16_to_cpu(attr->nres.run_off); + if (roff > asize) { + err = -EINVAL; + goto out; + } + t64 = le64_to_cpu(attr->nres.svcn); + err = run_unpack_ex(run, sbi, ino, t64, le64_to_cpu(attr->nres.evcn), t64, Add2Ptr(attr, roff), asize - roff); if (err < 0) @@ -450,7 +468,6 @@ end_enum: inode->i_flags |= S_NOSEC; } -Ok: if (ino == MFT_REC_MFT && !sb->s_root) sbi->mft.ni = NULL; @@ -504,6 +521,9 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, _ntfs_bad_inode(inode); } + if (IS_ERR(inode) && name) + ntfs_set_state(sb->s_fs_info, NTFS_DIRTY_ERROR); + return inode; } @@ -535,17 +555,6 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, clear_buffer_new(bh); clear_buffer_uptodate(bh); - /* Direct write uses 'create=0'. */ - if (!create && vbo >= ni->i_valid) { - /* Out of valid. */ - return 0; - } - - if (vbo >= inode->i_size) { - /* Out of size. */ - return 0; - } - if (is_resident(ni)) { ni_lock(ni); err = attr_data_read_resident(ni, page); @@ -561,7 +570,8 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, off = vbo & sbi->cluster_mask; new = false; - err = attr_data_get_block(ni, vcn, 1, &lcn, &len, create ? &new : NULL); + err = attr_data_get_block(ni, vcn, 1, &lcn, &len, create ? &new : NULL, + create && sbi->cluster_size > PAGE_SIZE); if (err) goto out; @@ -579,11 +589,8 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, WARN_ON(1); } - if (new) { + if (new) set_buffer_new(bh); - if ((len << cluster_bits) > block_size) - ntfs_sparse_cluster(inode, page, vcn, len); - } lbo = ((u64)lcn << cluster_bits) + off; @@ -611,7 +618,6 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, } } else if (vbo >= valid) { /* Read out of valid data. */ - /* Should never be here 'cause already checked. */ clear_buffer_mapped(bh); } else if (vbo + bytes <= valid) { /* Normal read. */ @@ -953,6 +959,11 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, dirty = true; } + if (pos + err > inode->i_size) { + inode->i_size = pos + err; + dirty = true; + } + if (dirty) mark_inode_dirty(inode); } @@ -1162,6 +1173,18 @@ out: return ERR_PTR(err); } +/* + * ntfs_create_inode + * + * Helper function for: + * - ntfs_create + * - ntfs_mknod + * - ntfs_symlink + * - ntfs_mkdir + * - ntfs_atomic_open + * + * NOTE: if fnd != NULL (ntfs_atomic_open) then @dir is locked + */ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, const struct cpu_str *uni, umode_t mode, @@ -1191,7 +1214,8 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, struct REPARSE_DATA_BUFFER *rp = NULL; bool rp_inserted = false; - ni_lock_dir(dir_ni); + if (!fnd) + ni_lock_dir(dir_ni); dir_root = indx_get_root(&dir_ni->dir, dir_ni, NULL, NULL); if (!dir_root) { @@ -1254,6 +1278,10 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, fa = FILE_ATTRIBUTE_ARCHIVE; } + /* If option "hide_dot_files" then set hidden attribute for dot files. */ + if (sbi->options->hide_dot_files && name->name[0] == '.') + fa |= FILE_ATTRIBUTE_HIDDEN; + if (!(mode & 0222)) fa |= FILE_ATTRIBUTE_READONLY; @@ -1339,6 +1367,13 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, mi_get_ref(&ni->mi, &new_de->ref); fname = (struct ATTR_FILE_NAME *)(new_de + 1); + + if (sbi->options->windows_names && + !valid_windows_name(sbi, (struct le_str *)&fname->name_len)) { + err = -EINVAL; + goto out4; + } + mi_get_ref(&dir_ni->mi, &fname->home); fname->dup.cr_time = fname->dup.m_time = fname->dup.c_time = fname->dup.a_time = std5->cr_time; @@ -1502,8 +1537,8 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, cpu_to_le64(ntfs_up_cluster(sbi, nsize)); err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0, - clst, NULL, 0, &alen, 0, - NULL); + clst, NULL, ALLOCATE_DEF, + &alen, 0, NULL, NULL); if (err) goto out5; @@ -1550,7 +1585,8 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, goto out6; /* Unlock parent directory before ntfs_init_acl. */ - ni_unlock(dir_ni); + if (!fnd) + ni_unlock(dir_ni); inode->i_generation = le16_to_cpu(rec->seq); @@ -1610,7 +1646,8 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, out7: /* Undo 'indx_insert_entry'. */ - ni_lock_dir(dir_ni); + if (!fnd) + ni_lock_dir(dir_ni); indx_delete_entry(&dir_ni->dir, dir_ni, new_de + 1, le16_to_cpu(new_de->key_size), sbi); /* ni_unlock(dir_ni); will be called later. */ @@ -1619,10 +1656,8 @@ out6: ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref); out5: - if (S_ISDIR(mode) || run_is_empty(&ni->file.run)) - goto out4; - - run_deallocate(sbi, &ni->file.run, false); + if (!S_ISDIR(mode)) + run_deallocate(sbi, &ni->file.run, false); out4: clear_rec_inuse(rec); @@ -1638,7 +1673,8 @@ out2: out1: if (err) { - ni_unlock(dir_ni); + if (!fnd) + ni_unlock(dir_ni); return ERR_PTR(err); } @@ -1746,7 +1782,103 @@ void ntfs_evict_inode(struct inode *inode) ni_clear(ntfs_i(inode)); } -static noinline int ntfs_readlink_hlp(struct inode *inode, char *buffer, +/* + * ntfs_translate_junction + * + * Translate a Windows junction target to the Linux equivalent. + * On junctions, targets are always absolute (they include the drive + * letter). We have no way of knowing if the target is for the current + * mounted device or not so we just assume it is. + */ +static int ntfs_translate_junction(const struct super_block *sb, + const struct dentry *link_de, char *target, + int target_len, int target_max) +{ + int tl_len, err = target_len; + char *link_path_buffer = NULL, *link_path; + char *translated = NULL; + char *target_start; + int copy_len; + + link_path_buffer = kmalloc(PATH_MAX, GFP_NOFS); + if (!link_path_buffer) { + err = -ENOMEM; + goto out; + } + /* Get link path, relative to mount point */ + link_path = dentry_path_raw(link_de, link_path_buffer, PATH_MAX); + if (IS_ERR(link_path)) { + ntfs_err(sb, "Error getting link path"); + err = -EINVAL; + goto out; + } + + translated = kmalloc(PATH_MAX, GFP_NOFS); + if (!translated) { + err = -ENOMEM; + goto out; + } + + /* Make translated path a relative path to mount point */ + strcpy(translated, "./"); + ++link_path; /* Skip leading / */ + for (tl_len = sizeof("./") - 1; *link_path; ++link_path) { + if (*link_path == '/') { + if (PATH_MAX - tl_len < sizeof("../")) { + ntfs_err(sb, + "Link path %s has too many components", + link_path); + err = -EINVAL; + goto out; + } + strcpy(translated + tl_len, "../"); + tl_len += sizeof("../") - 1; + } + } + + /* Skip drive letter */ + target_start = target; + while (*target_start && *target_start != ':') + ++target_start; + + if (!*target_start) { + ntfs_err(sb, "Link target (%s) missing drive separator", + target); + err = -EINVAL; + goto out; + } + + /* Skip drive separator and leading /, if exists */ + target_start += 1 + (target_start[1] == '/'); + copy_len = target_len - (target_start - target); + + if (PATH_MAX - tl_len <= copy_len) { + ntfs_err(sb, "Link target %s too large for buffer (%d <= %d)", + target_start, PATH_MAX - tl_len, copy_len); + err = -EINVAL; + goto out; + } + + /* translated path has a trailing / and target_start does not */ + strcpy(translated + tl_len, target_start); + tl_len += copy_len; + if (target_max <= tl_len) { + ntfs_err(sb, "Target path %s too large for buffer (%d <= %d)", + translated, target_max, tl_len); + err = -EINVAL; + goto out; + } + strcpy(target, translated); + err = tl_len; + +out: + kfree(link_path_buffer); + kfree(translated); + return err; +} + +static noinline int ntfs_readlink_hlp(const struct dentry *link_de, + struct inode *inode, char *buffer, int buflen) { int i, err = -EINVAL; @@ -1889,6 +2021,11 @@ static noinline int ntfs_readlink_hlp(struct inode *inode, char *buffer, /* Always set last zero. */ buffer[err] = 0; + + /* If this is a junction, translate the link target. */ + if (rp->ReparseTag == IO_REPARSE_TAG_MOUNT_POINT) + err = ntfs_translate_junction(sb, link_de, buffer, err, buflen); + out: kfree(to_free); return err; @@ -1907,7 +2044,7 @@ static const char *ntfs_get_link(struct dentry *de, struct inode *inode, if (!ret) return ERR_PTR(-ENOMEM); - err = ntfs_readlink_hlp(inode, ret, PAGE_SIZE); + err = ntfs_readlink_hlp(de, inode, ret, PAGE_SIZE); if (err < 0) { kfree(ret); return ERR_PTR(err); diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index 053cc0e0f8b5..c8db35e2ae17 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -7,6 +7,8 @@ #include <linux/fs.h> #include <linux/nls.h> +#include <linux/ctype.h> +#include <linux/posix_acl.h> #include "debug.h" #include "ntfs.h" @@ -303,6 +305,8 @@ static int ntfs_rename(struct user_namespace *mnt_userns, struct inode *dir, ni_lock_dir(dir_ni); ni_lock(ni); + if (dir_ni != new_dir_ni) + ni_lock_dir2(new_dir_ni); is_bad = false; err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de, &is_bad); @@ -326,6 +330,8 @@ static int ntfs_rename(struct user_namespace *mnt_userns, struct inode *dir, ntfs_sync_inode(inode); } + if (dir_ni != new_dir_ni) + ni_unlock(new_dir_ni); ni_unlock(ni); ni_unlock(dir_ni); out: @@ -333,6 +339,104 @@ out: return err; } +/* + * ntfs_atomic_open + * + * inode_operations::atomic_open + */ +static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, u32 flags, umode_t mode) +{ + int err; + struct inode *inode; + struct ntfs_fnd *fnd = NULL; + struct ntfs_inode *ni = ntfs_i(dir); + struct dentry *d = NULL; + struct cpu_str *uni = __getname(); + bool locked = false; + + if (!uni) + return -ENOMEM; + + err = ntfs_nls_to_utf16(ni->mi.sbi, dentry->d_name.name, + dentry->d_name.len, uni, NTFS_NAME_LEN, + UTF16_HOST_ENDIAN); + if (err < 0) + goto out; + +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if (IS_POSIXACL(dir)) { + /* + * Load in cache current acl to avoid ni_lock(dir): + * ntfs_create_inode -> ntfs_init_acl -> posix_acl_create -> + * ntfs_get_acl -> ntfs_get_acl_ex -> ni_lock + */ + struct posix_acl *p = get_inode_acl(dir, ACL_TYPE_DEFAULT); + + if (IS_ERR(p)) { + err = PTR_ERR(p); + goto out; + } + posix_acl_release(p); + } +#endif + + if (d_in_lookup(dentry)) { + ni_lock_dir(ni); + locked = true; + fnd = fnd_get(); + if (!fnd) { + err = -ENOMEM; + goto out1; + } + + d = d_splice_alias(dir_search_u(dir, uni, fnd), dentry); + if (IS_ERR(d)) { + err = PTR_ERR(d); + d = NULL; + goto out2; + } + + if (d) + dentry = d; + } + + if (!(flags & O_CREAT) || d_really_is_positive(dentry)) { + err = finish_no_open(file, d); + goto out2; + } + + file->f_mode |= FMODE_CREATED; + + /* + * fnd contains tree's path to insert to. + * If fnd is not NULL then dir is locked. + */ + + /* + * Unfortunately I don't know how to get here correct 'struct nameidata *nd' + * or 'struct user_namespace *mnt_userns'. + * See atomic_open in fs/namei.c. + * This is why xfstest/633 failed. + * Looks like ntfs_atomic_open must accept 'struct user_namespace *mnt_userns' as argument. + */ + + inode = ntfs_create_inode(&init_user_ns, dir, dentry, uni, mode, 0, + NULL, 0, fnd); + err = IS_ERR(inode) ? PTR_ERR(inode) + : finish_open(file, dentry, ntfs_file_open); + dput(d); + +out2: + fnd_put(fnd); +out1: + if (locked) + ni_unlock(ni); +out: + __putname(uni); + return err; +} + struct dentry *ntfs3_get_parent(struct dentry *child) { struct inode *inode = d_inode(child); @@ -355,6 +459,133 @@ struct dentry *ntfs3_get_parent(struct dentry *child) return ERR_PTR(-ENOENT); } +/* + * dentry_operations::d_hash + */ +static int ntfs_d_hash(const struct dentry *dentry, struct qstr *name) +{ + struct ntfs_sb_info *sbi; + const char *n = name->name; + unsigned int len = name->len; + unsigned long hash; + struct cpu_str *uni; + unsigned int c; + int err; + + /* First try fast implementation. */ + hash = init_name_hash(dentry); + + for (;;) { + if (!len--) { + name->hash = end_name_hash(hash); + return 0; + } + + c = *n++; + if (c >= 0x80) + break; + + hash = partial_name_hash(toupper(c), hash); + } + + /* + * Try slow way with current upcase table + */ + uni = __getname(); + if (!uni) + return -ENOMEM; + + sbi = dentry->d_sb->s_fs_info; + + err = ntfs_nls_to_utf16(sbi, name->name, name->len, uni, NTFS_NAME_LEN, + UTF16_HOST_ENDIAN); + if (err < 0) + goto out; + + if (!err) { + err = -EINVAL; + goto out; + } + + hash = ntfs_names_hash(uni->name, uni->len, sbi->upcase, + init_name_hash(dentry)); + name->hash = end_name_hash(hash); + err = 0; + +out: + __putname(uni); + return err; +} + +/* + * dentry_operations::d_compare + */ +static int ntfs_d_compare(const struct dentry *dentry, unsigned int len1, + const char *str, const struct qstr *name) +{ + struct ntfs_sb_info *sbi; + int ret; + const char *n1 = str; + const char *n2 = name->name; + unsigned int len2 = name->len; + unsigned int lm = min(len1, len2); + unsigned char c1, c2; + struct cpu_str *uni1; + struct le_str *uni2; + + /* First try fast implementation. */ + for (;;) { + if (!lm--) + return len1 != len2; + + if ((c1 = *n1++) == (c2 = *n2++)) + continue; + + if (c1 >= 0x80 || c2 >= 0x80) + break; + + if (toupper(c1) != toupper(c2)) + return 1; + } + + /* + * Try slow way with current upcase table + */ + sbi = dentry->d_sb->s_fs_info; + uni1 = __getname(); + if (!uni1) + return -ENOMEM; + + ret = ntfs_nls_to_utf16(sbi, str, len1, uni1, NTFS_NAME_LEN, + UTF16_HOST_ENDIAN); + if (ret < 0) + goto out; + + if (!ret) { + ret = -EINVAL; + goto out; + } + + uni2 = Add2Ptr(uni1, 2048); + + ret = ntfs_nls_to_utf16(sbi, name->name, name->len, + (struct cpu_str *)uni2, NTFS_NAME_LEN, + UTF16_LITTLE_ENDIAN); + if (ret < 0) + goto out; + + if (!ret) { + ret = -EINVAL; + goto out; + } + + ret = !ntfs_cmp_names_cpu(uni1, uni2, sbi->upcase, false) ? 0 : 1; + +out: + __putname(uni1); + return ret; +} + // clang-format off const struct inode_operations ntfs_dir_inode_operations = { .lookup = ntfs_lookup, @@ -372,6 +603,7 @@ const struct inode_operations ntfs_dir_inode_operations = { .setattr = ntfs3_setattr, .getattr = ntfs_getattr, .listxattr = ntfs_listxattr, + .atomic_open = ntfs_atomic_open, .fiemap = ntfs_fiemap, }; @@ -382,4 +614,10 @@ const struct inode_operations ntfs_special_inode_operations = { .get_inode_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, }; + +const struct dentry_operations ntfs_dentry_ops = { + .d_hash = ntfs_d_hash, + .d_compare = ntfs_d_compare, +}; + // clang-format on diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 9cc396b117bf..86ea1826d099 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -84,7 +84,6 @@ typedef u32 CLST; #define COMPRESSION_UNIT 4 #define COMPRESS_MAX_CLUSTER 0x1000 -#define MFT_INCREASE_CHUNK 1024 enum RECORD_NUM { MFT_REC_MFT = 0, @@ -715,12 +714,13 @@ static inline struct NTFS_DE *hdr_first_de(const struct INDEX_HDR *hdr) { u32 de_off = le32_to_cpu(hdr->de_off); u32 used = le32_to_cpu(hdr->used); - struct NTFS_DE *e = Add2Ptr(hdr, de_off); + struct NTFS_DE *e; u16 esize; - if (de_off >= used || de_off >= le32_to_cpu(hdr->total)) + if (de_off >= used || de_off + sizeof(struct NTFS_DE) > used ) return NULL; + e = Add2Ptr(hdr, de_off); esize = le16_to_cpu(e->size); if (esize < sizeof(struct NTFS_DE) || de_off + esize > used) return NULL; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index a4d292809a33..0e051c5595a2 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -97,9 +97,12 @@ struct ntfs_mount_options { unsigned sparse : 1; /* Create sparse files. */ unsigned showmeta : 1; /* Show meta files. */ unsigned nohidden : 1; /* Do not show hidden files. */ + unsigned hide_dot_files : 1; /* Set hidden flag on dot files. */ + unsigned windows_names : 1; /* Disallow names forbidden by Windows. */ unsigned force : 1; /* RW mount dirty volume. */ unsigned noacsrules : 1; /* Exclude acs rules. */ unsigned prealloc : 1; /* Preallocate space when file is growing. */ + unsigned nocase : 1; /* case insensitive. */ }; /* Special value to unpack and deallocate. */ @@ -124,6 +127,7 @@ struct ntfs_buffers { enum ALLOCATE_OPT { ALLOCATE_DEF = 0, // Allocate all clusters. ALLOCATE_MFT = 1, // Allocate for MFT. + ALLOCATE_ZERO = 2, // Zeroout new allocated clusters }; enum bitmap_mutex_classes { @@ -195,6 +199,8 @@ struct ntfs_index { /* Minimum MFT zone. */ #define NTFS_MIN_MFT_ZONE 100 +/* Step to increase the MFT. */ +#define NTFS_MFT_INCREASE_STEP 1024 /* Ntfs file system in-core superblock data. */ struct ntfs_sb_info { @@ -330,6 +336,7 @@ enum ntfs_inode_mutex_lock_class { NTFS_INODE_MUTEX_REPARSE, NTFS_INODE_MUTEX_NORMAL, NTFS_INODE_MUTEX_PARENT, + NTFS_INODE_MUTEX_PARENT2, }; /* @@ -412,7 +419,7 @@ enum REPARSE_SIGN { int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, - CLST *new_lcn); + CLST *new_lcn, CLST *new_len); int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, struct ATTR_LIST_ENTRY *le, struct mft_inode *mi, u64 new_size, struct runs_tree *run, @@ -422,7 +429,7 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, u64 new_size, const u64 *new_valid, bool keep_prealloc, struct ATTRIB **ret); int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new); + CLST *len, bool *new, bool zero); int attr_data_read_resident(struct ntfs_inode *ni, struct page *page); int attr_data_write_resident(struct ntfs_inode *ni, struct page *page); int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, @@ -469,9 +476,9 @@ static inline size_t al_aligned(size_t size) } /* Globals from bitfunc.c */ -bool are_bits_clear(const ulong *map, size_t bit, size_t nbits); -bool are_bits_set(const ulong *map, size_t bit, size_t nbits); -size_t get_set_bits_ex(const ulong *map, size_t bit, size_t nbits); +bool are_bits_clear(const void *map, size_t bit, size_t nbits); +bool are_bits_set(const void *map, size_t bit, size_t nbits); +size_t get_set_bits_ex(const void *map, size_t bit, size_t nbits); /* Globals from dir.c */ int ntfs_utf16_to_nls(struct ntfs_sb_info *sbi, const __le16 *name, u32 len, @@ -487,8 +494,6 @@ extern const struct file_operations ntfs_dir_operations; /* Globals from file.c */ int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); -void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, - CLST len); int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *attr); int ntfs_file_open(struct inode *inode, struct file *file); @@ -582,11 +587,10 @@ int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); int ntfs_extend_init(struct ntfs_sb_info *sbi); int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi); -const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi, - enum ATTR_TYPE Type); int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, CLST *new_lcn, CLST *new_len, enum ALLOCATE_OPT opt); +bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen); int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, struct ntfs_inode *ni, struct mft_inode **mi); void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft); @@ -643,6 +647,7 @@ int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag, const struct MFT_REF *ref); void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim); int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim); +bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *name); /* Globals from index.c */ int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit); @@ -720,6 +725,7 @@ struct dentry *ntfs3_get_parent(struct dentry *child); extern const struct inode_operations ntfs_dir_inode_operations; extern const struct inode_operations ntfs_special_inode_operations; +extern const struct dentry_operations ntfs_dentry_ops; /* Globals from record.c */ int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi); @@ -793,12 +799,12 @@ int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf, u32 run_buf_size, CLST *packed_vcns); int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size); + int run_buf_size); #ifdef NTFS3_CHECK_FREE_CLST int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size); + int run_buf_size); #else #define run_unpack_ex run_unpack #endif @@ -822,6 +828,8 @@ static inline size_t wnd_zeroes(const struct wnd_bitmap *wnd) int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits); int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits); int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits); +int wnd_set_used_safe(struct wnd_bitmap *wnd, size_t bit, size_t bits, + size_t *done); bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits); bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits); @@ -834,11 +842,17 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits); void wnd_zone_set(struct wnd_bitmap *wnd, size_t Lcn, size_t Len); int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range); +void ntfs_bitmap_set_le(void *map, unsigned int start, int len); +void ntfs_bitmap_clear_le(void *map, unsigned int start, int len); +unsigned int ntfs_bitmap_weight_le(const void *bitmap, int bits); + /* Globals from upcase.c */ int ntfs_cmp_names(const __le16 *s1, size_t l1, const __le16 *s2, size_t l2, const u16 *upcase, bool bothcase); int ntfs_cmp_names_cpu(const struct cpu_str *uni1, const struct le_str *uni2, const u16 *upcase, bool bothcase); +unsigned long ntfs_names_hash(const u16 *name, size_t len, const u16 *upcase, + unsigned long hash); /* globals from xattr.c */ #ifdef CONFIG_NTFS3_FS_POSIX_ACL @@ -1113,6 +1127,11 @@ static inline void ni_lock_dir(struct ntfs_inode *ni) mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_PARENT); } +static inline void ni_lock_dir2(struct ntfs_inode *ni) +{ + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_PARENT2); +} + static inline void ni_unlock(struct ntfs_inode *ni) { mutex_unlock(&ni->ni_lock); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 7d2fac5ee215..defce6a5c8e1 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -220,6 +220,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } + if (off + asize < off) { + /* overflow check */ + return NULL; + } + attr = Add2Ptr(attr, asize); off += asize; } @@ -260,6 +265,10 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if (t16 + t32 > asize) return NULL; + t32 = sizeof(short) * attr->name_len; + if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) + return NULL; + return attr; } @@ -537,6 +546,10 @@ bool mi_resize_attr(struct mft_inode *mi, struct ATTRIB *attr, int bytes) return true; } +/* + * Pack runs in MFT record. + * If failed record is not changed. + */ int mi_pack_runs(struct mft_inode *mi, struct ATTRIB *attr, struct runs_tree *run, CLST len) { diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index aaaa0d3d35a2..a5af71cd8d14 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -919,12 +919,15 @@ out: */ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size) + int run_buf_size) { u64 prev_lcn, vcn64, lcn, next_vcn; const u8 *run_last, *run_0; bool is_mft = ino == MFT_REC_MFT; + if (run_buf_size < 0) + return -EINVAL; + /* Check for empty. */ if (evcn + 1 == svcn) return 0; @@ -1046,7 +1049,7 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, */ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size) + int run_buf_size) { int ret, err; CLST next_vcn, lcn, len; @@ -1093,25 +1096,8 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, if (down_write_trylock(&wnd->rw_lock)) { /* Mark all zero bits as used in range [lcn, lcn+len). */ - CLST i, lcn_f = 0, len_f = 0; - - err = 0; - for (i = 0; i < len; i++) { - if (wnd_is_free(wnd, lcn + i, 1)) { - if (!len_f) - lcn_f = lcn + i; - len_f += 1; - } else if (len_f) { - err = wnd_set_used(wnd, lcn_f, len_f); - len_f = 0; - if (err) - break; - } - } - - if (len_f) - err = wnd_set_used(wnd, lcn_f, len_f); - + size_t done; + err = wnd_set_used_safe(wnd, lcn, len, &done); up_write(&wnd->rw_lock); if (err) return err; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 47012c9bf505..ef4ea3f21905 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -21,6 +21,30 @@ * https://docs.microsoft.com/en-us/windows/wsl/file-permissions * It stores uid/gid/mode/dev in xattr * + * ntfs allows up to 2^64 clusters per volume. + * It means you should use 64 bits lcn to operate with ntfs. + * Implementation of ntfs.sys uses only 32 bits lcn. + * Default ntfs3 uses 32 bits lcn too. + * ntfs3 built with CONFIG_NTFS3_64BIT_CLUSTER (ntfs3_64) uses 64 bits per lcn. + * + * + * ntfs limits, cluster size is 4K (2^12) + * ----------------------------------------------------------------------------- + * | Volume size | Clusters | ntfs.sys | ntfs3 | ntfs3_64 | mkntfs | chkdsk | + * ----------------------------------------------------------------------------- + * | < 16T, 2^44 | < 2^32 | yes | yes | yes | yes | yes | + * | > 16T, 2^44 | > 2^32 | no | no | yes | yes | yes | + * ----------------------------------------------------------|------------------ + * + * To mount large volumes as ntfs one should use large cluster size (up to 2M) + * The maximum volume size in this case is 2^32 * 2^21 = 2^53 = 8P + * + * ntfs limits, cluster size is 2M (2^31) + * ----------------------------------------------------------------------------- + * | < 8P, 2^54 | < 2^32 | yes | yes | yes | yes | yes | + * | > 8P, 2^54 | > 2^32 | no | no | yes | yes | yes | + * ----------------------------------------------------------|------------------ + * */ #include <linux/blkdev.h> @@ -223,11 +247,14 @@ enum Opt { Opt_force, Opt_sparse, Opt_nohidden, + Opt_hide_dot_files, + Opt_windows_names, Opt_showmeta, Opt_acl, Opt_iocharset, Opt_prealloc, Opt_noacsrules, + Opt_nocase, Opt_err, }; @@ -242,10 +269,13 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_flag_no("force", Opt_force), fsparam_flag_no("sparse", Opt_sparse), fsparam_flag_no("hidden", Opt_nohidden), + fsparam_flag_no("hide_dot_files", Opt_hide_dot_files), + fsparam_flag_no("windows_names", Opt_windows_names), fsparam_flag_no("acl", Opt_acl), fsparam_flag_no("showmeta", Opt_showmeta), fsparam_flag_no("prealloc", Opt_prealloc), fsparam_flag_no("acsrules", Opt_noacsrules), + fsparam_flag_no("nocase", Opt_nocase), fsparam_string("iocharset", Opt_iocharset), {} }; @@ -330,6 +360,12 @@ static int ntfs_fs_parse_param(struct fs_context *fc, case Opt_nohidden: opts->nohidden = result.negated ? 1 : 0; break; + case Opt_hide_dot_files: + opts->hide_dot_files = result.negated ? 0 : 1; + break; + case Opt_windows_names: + opts->windows_names = result.negated ? 0 : 1; + break; case Opt_acl: if (!result.negated) #ifdef CONFIG_NTFS3_FS_POSIX_ACL @@ -354,6 +390,9 @@ static int ntfs_fs_parse_param(struct fs_context *fc, case Opt_noacsrules: opts->noacsrules = result.negated ? 1 : 0; break; + case Opt_nocase: + opts->nocase = result.negated ? 1 : 0; + break; default: /* Should not be here unless we forget add case. */ return -EINVAL; @@ -406,27 +445,18 @@ static struct inode *ntfs_alloc_inode(struct super_block *sb) return NULL; memset(ni, 0, offsetof(struct ntfs_inode, vfs_inode)); - mutex_init(&ni->ni_lock); - return &ni->vfs_inode; } -static void ntfs_i_callback(struct rcu_head *head) +static void ntfs_free_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); struct ntfs_inode *ni = ntfs_i(inode); mutex_destroy(&ni->ni_lock); - kmem_cache_free(ntfs_inode_cachep, ni); } -static void ntfs_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, ntfs_i_callback); -} - static void init_once(void *foo) { struct ntfs_inode *ni = foo; @@ -519,9 +549,9 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",gid=%u", from_kgid_munged(user_ns, opts->fs_gid)); if (opts->fmask) - seq_printf(m, ",fmask=%04o", ~opts->fs_fmask_inv); + seq_printf(m, ",fmask=%04o", opts->fs_fmask_inv ^ 0xffff); if (opts->dmask) - seq_printf(m, ",dmask=%04o", ~opts->fs_dmask_inv); + seq_printf(m, ",dmask=%04o", opts->fs_dmask_inv ^ 0xffff); if (opts->nls) seq_printf(m, ",iocharset=%s", opts->nls->charset); else @@ -536,6 +566,10 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",showmeta"); if (opts->nohidden) seq_puts(m, ",nohidden"); + if (opts->windows_names) + seq_puts(m, ",windows_names"); + if (opts->hide_dot_files) + seq_puts(m, ",hide_dot_files"); if (opts->force) seq_puts(m, ",force"); if (opts->noacsrules) @@ -592,7 +626,7 @@ static int ntfs_sync_fs(struct super_block *sb, int wait) static const struct super_operations ntfs_sops = { .alloc_inode = ntfs_alloc_inode, - .destroy_inode = ntfs_destroy_inode, + .free_inode = ntfs_free_inode, .evict_inode = ntfs_evict_inode, .put_super = ntfs_put_super, .statfs = ntfs_statfs, @@ -672,7 +706,7 @@ static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) if (boot->sectors_per_clusters <= 0x80) return boot->sectors_per_clusters; if (boot->sectors_per_clusters >= 0xf4) /* limit shift to 2MB max */ - return 1U << (0 - boot->sectors_per_clusters); + return 1U << -(s8)boot->sectors_per_clusters; return -EINVAL; } @@ -789,7 +823,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, : (u32)boot->record_size << sbi->cluster_bits; - if (record_size > MAXIMUM_BYTES_PER_MFT) + if (record_size > MAXIMUM_BYTES_PER_MFT || record_size < SECTOR_SIZE) goto out; sbi->record_bits = blksize_bits(record_size); @@ -896,7 +930,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) struct block_device *bdev = sb->s_bdev; struct inode *inode; struct ntfs_inode *ni; - size_t i, tt; + size_t i, tt, bad_len, bad_frags; CLST vcn, lcn, len; struct ATTRIB *attr; const struct VOLUME_INFO *info; @@ -916,6 +950,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_export_op = &ntfs_export_ops; sb->s_time_gran = NTFS_TIME_GRAN; // 100 nsec sb->s_xattr = ntfs_xattr_handlers; + sb->s_d_op = sbi->options->nocase ? &ntfs_dentry_ops : NULL; sbi->options->nls = ntfs_load_nls(sbi->options->nls_name); if (IS_ERR(sbi->options->nls)) { @@ -1065,30 +1100,6 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) sbi->mft.ni = ni; - /* Load $BadClus. */ - ref.low = cpu_to_le32(MFT_REC_BADCLUST); - ref.seq = cpu_to_le16(MFT_REC_BADCLUST); - inode = ntfs_iget5(sb, &ref, &NAME_BADCLUS); - if (IS_ERR(inode)) { - ntfs_err(sb, "Failed to load $BadClus."); - err = PTR_ERR(inode); - goto out; - } - - ni = ntfs_i(inode); - - for (i = 0; run_get_entry(&ni->file.run, i, &vcn, &lcn, &len); i++) { - if (lcn == SPARSE_LCN) - continue; - - if (!sbi->bad_clusters) - ntfs_notice(sb, "Volume contains bad blocks"); - - sbi->bad_clusters += len; - } - - iput(inode); - /* Load $Bitmap. */ ref.low = cpu_to_le32(MFT_REC_BITMAP); ref.seq = cpu_to_le16(MFT_REC_BITMAP); @@ -1126,6 +1137,44 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) goto out; + /* Load $BadClus. */ + ref.low = cpu_to_le32(MFT_REC_BADCLUST); + ref.seq = cpu_to_le16(MFT_REC_BADCLUST); + inode = ntfs_iget5(sb, &ref, &NAME_BADCLUS); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $BadClus (%d).", err); + goto out; + } + + ni = ntfs_i(inode); + bad_len = bad_frags = 0; + for (i = 0; run_get_entry(&ni->file.run, i, &vcn, &lcn, &len); i++) { + if (lcn == SPARSE_LCN) + continue; + + bad_len += len; + bad_frags += 1; + if (sb_rdonly(sb)) + continue; + + if (wnd_set_used_safe(&sbi->used.bitmap, lcn, len, &tt) || tt) { + /* Bad blocks marked as free in bitmap. */ + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + } + } + if (bad_len) { + /* + * Notice about bad blocks. + * In normal cases these blocks are marked as used in bitmap. + * And we never allocate space in it. + */ + ntfs_notice(sb, + "Volume contains %zu bad blocks in %zu fragments.", + bad_len, bad_frags); + } + iput(inode); + /* Load $AttrDef. */ ref.low = cpu_to_le32(MFT_REC_ATTR); ref.seq = cpu_to_le16(MFT_REC_ATTR); @@ -1141,7 +1190,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) goto put_inode_out; } bytes = inode->i_size; - sbi->def_table = t = kmalloc(bytes, GFP_NOFS); + sbi->def_table = t = kmalloc(bytes, GFP_NOFS | __GFP_NOWARN); if (!t) { err = -ENOMEM; goto put_inode_out; @@ -1260,9 +1309,9 @@ load_root: ref.low = cpu_to_le32(MFT_REC_ROOT); ref.seq = cpu_to_le16(MFT_REC_ROOT); inode = ntfs_iget5(sb, &ref, &NAME_ROOT); - if (IS_ERR(inode)) { + if (IS_ERR(inode) || !inode->i_op) { ntfs_err(sb, "Failed to load root."); - err = PTR_ERR(inode); + err = IS_ERR(inode) ? PTR_ERR(inode) : -EINVAL; goto out; } @@ -1281,6 +1330,7 @@ out: * Free resources here. * ntfs_fs_free will be called with fc->s_fs_info = NULL */ + put_mount_options(sbi->options); put_ntfs(sbi); sb->s_fs_info = NULL; @@ -1488,11 +1538,8 @@ out1: static void __exit exit_ntfs_fs(void) { - if (ntfs_inode_cachep) { - rcu_barrier(); - kmem_cache_destroy(ntfs_inode_cachep); - } - + rcu_barrier(); + kmem_cache_destroy(ntfs_inode_cachep); unregister_filesystem(&ntfs_fs_type); ntfs3_exit_bitmap(); } diff --git a/fs/ntfs3/upcase.c b/fs/ntfs3/upcase.c index b5e8256fd710..7681eefacb4b 100644 --- a/fs/ntfs3/upcase.c +++ b/fs/ntfs3/upcase.c @@ -102,3 +102,15 @@ case_insentive: diff2 = l1 - l2; return diff2 ? diff2 : diff1; } + +/* Helper function for ntfs_d_hash. */ +unsigned long ntfs_names_hash(const u16 *name, size_t len, const u16 *upcase, + unsigned long hash) +{ + while (len--) { + unsigned int c = upcase_unicode_char(upcase, *name++); + hash = partial_name_hash(c, hash); + } + + return hash; +} diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index aafe98ee0b21..616df209feea 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -15,9 +15,10 @@ #include "ntfs_fs.h" // clang-format off -#define SYSTEM_DOS_ATTRIB "system.dos_attrib" -#define SYSTEM_NTFS_ATTRIB "system.ntfs_attrib" -#define SYSTEM_NTFS_SECURITY "system.ntfs_security" +#define SYSTEM_DOS_ATTRIB "system.dos_attrib" +#define SYSTEM_NTFS_ATTRIB "system.ntfs_attrib" +#define SYSTEM_NTFS_ATTRIB_BE "system.ntfs_attrib_be" +#define SYSTEM_NTFS_SECURITY "system.ntfs_security" // clang-format on static inline size_t unpacked_ea_size(const struct EA_FULL *ea) @@ -42,28 +43,26 @@ static inline size_t packed_ea_size(const struct EA_FULL *ea) * Assume there is at least one xattr in the list. */ static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes, - const char *name, u8 name_len, u32 *off) + const char *name, u8 name_len, u32 *off, u32 *ea_sz) { - *off = 0; + u32 ea_size; - if (!ea_all || !bytes) + *off = 0; + if (!ea_all) return false; - for (;;) { + for (; *off < bytes; *off += ea_size) { const struct EA_FULL *ea = Add2Ptr(ea_all, *off); - u32 next_off = *off + unpacked_ea_size(ea); - - if (next_off > bytes) - return false; - + ea_size = unpacked_ea_size(ea); if (ea->name_len == name_len && - !memcmp(ea->name, name, name_len)) + !memcmp(ea->name, name, name_len)) { + if (ea_sz) + *ea_sz = ea_size; return true; - - *off = next_off; - if (next_off >= bytes) - return false; + } } + + return false; } /* @@ -74,12 +73,12 @@ static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes, static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, size_t add_bytes, const struct EA_INFO **info) { - int err; + int err = -EINVAL; struct ntfs_sb_info *sbi = ni->mi.sbi; struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *attr_info, *attr_ea; void *ea_p; - u32 size; + u32 size, off, ea_size; static_assert(le32_to_cpu(ATTR_EA_INFO) < le32_to_cpu(ATTR_EA)); @@ -96,24 +95,31 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, *info = resident_data_ex(attr_info, sizeof(struct EA_INFO)); if (!*info) - return -EINVAL; + goto out; /* Check Ea limit. */ size = le32_to_cpu((*info)->size); - if (size > sbi->ea_max_size) - return -EFBIG; + if (size > sbi->ea_max_size) { + err = -EFBIG; + goto out; + } - if (attr_size(attr_ea) > sbi->ea_max_size) - return -EFBIG; + if (attr_size(attr_ea) > sbi->ea_max_size) { + err = -EFBIG; + goto out; + } + + if (!size) { + /* EA info persists, but xattr is empty. Looks like EA problem. */ + goto out; + } /* Allocate memory for packed Ea. */ - ea_p = kmalloc(size + add_bytes, GFP_NOFS); + ea_p = kmalloc(size_add(size, add_bytes), GFP_NOFS); if (!ea_p) return -ENOMEM; - if (!size) { - /* EA info persists, but xattr is empty. Looks like EA problem. */ - } else if (attr_ea->non_res) { + if (attr_ea->non_res) { struct runs_tree run; run_init(&run); @@ -124,24 +130,52 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, run_close(&run); if (err) - goto out; + goto out1; } else { void *p = resident_data_ex(attr_ea, size); - if (!p) { - err = -EINVAL; - goto out; - } + if (!p) + goto out1; memcpy(ea_p, p, size); } memset(Add2Ptr(ea_p, size), 0, add_bytes); + + /* Check all attributes for consistency. */ + for (off = 0; off < size; off += ea_size) { + const struct EA_FULL *ef = Add2Ptr(ea_p, off); + u32 bytes = size - off; + + /* Check if we can use field ea->size. */ + if (bytes < sizeof(ef->size)) + goto out1; + + if (ef->size) { + ea_size = le32_to_cpu(ef->size); + if (ea_size > bytes) + goto out1; + continue; + } + + /* Check if we can use fields ef->name_len and ef->elength. */ + if (bytes < offsetof(struct EA_FULL, name)) + goto out1; + + ea_size = ALIGN(struct_size(ef, name, + 1 + ef->name_len + + le16_to_cpu(ef->elength)), + 4); + if (ea_size > bytes) + goto out1; + } + *ea = ea_p; return 0; -out: +out1: kfree(ea_p); - *ea = NULL; +out: + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); return err; } @@ -163,6 +197,7 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, const struct EA_FULL *ea; u32 off, size; int err; + int ea_size; size_t ret; err = ntfs_read_ea(ni, &ea_all, 0, &info); @@ -175,8 +210,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, size = le32_to_cpu(info->size); /* Enumerate all xattrs. */ - for (ret = 0, off = 0; off < size; off += unpacked_ea_size(ea)) { + for (ret = 0, off = 0; off < size; off += ea_size) { ea = Add2Ptr(ea_all, off); + ea_size = unpacked_ea_size(ea); if (buffer) { if (ret + ea->name_len + 1 > bytes_per_buffer) { @@ -227,7 +263,8 @@ static int ntfs_get_ea(struct inode *inode, const char *name, size_t name_len, goto out; /* Enumerate all xattrs. */ - if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off)) { + if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off, + NULL)) { err = -ENODATA; goto out; } @@ -269,7 +306,7 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, struct EA_FULL *new_ea; struct EA_FULL *ea_all = NULL; size_t add, new_pack; - u32 off, size; + u32 off, size, ea_sz; __le16 size_pack; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; @@ -304,9 +341,8 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, size_pack = ea_info.size_pack; } - if (info && find_ea(ea_all, size, name, name_len, &off)) { + if (info && find_ea(ea_all, size, name, name_len, &off, &ea_sz)) { struct EA_FULL *ea; - size_t ea_sz; if (flags & XATTR_CREATE) { err = -EEXIST; @@ -329,8 +365,6 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, if (ea->flags & FILE_NEED_EA) le16_add_cpu(&ea_info.count, -1); - ea_sz = unpacked_ea_size(ea); - le16_add_cpu(&ea_info.size_pack, 0 - packed_ea_size(ea)); memmove(ea, Add2Ptr(ea, ea_sz), size - off - ea_sz); @@ -604,10 +638,9 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, err = 0; /* Removing non existed xattr. */ if (!err) { set_cached_acl(inode, type, acl); - if (inode->i_mode != mode) { - inode->i_mode = mode; - mark_inode_dirty(inode); - } + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); } out: @@ -721,11 +754,9 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, { int err; struct ntfs_inode *ni = ntfs_i(inode); - size_t name_len = strlen(name); /* Dispatch request. */ - if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 && - !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) { + if (!strcmp(name, SYSTEM_DOS_ATTRIB)) { /* system.dos_attrib */ if (!buffer) { err = sizeof(u8); @@ -738,8 +769,8 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, goto out; } - if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 && - !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) { + if (!strcmp(name, SYSTEM_NTFS_ATTRIB) || + !strcmp(name, SYSTEM_NTFS_ATTRIB_BE)) { /* system.ntfs_attrib */ if (!buffer) { err = sizeof(u32); @@ -748,12 +779,13 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, } else { err = sizeof(u32); *(u32 *)buffer = le32_to_cpu(ni->std_fa); + if (!strcmp(name, SYSTEM_NTFS_ATTRIB_BE)) + *(u32 *)buffer = cpu_to_be32(*(u32 *)buffer); } goto out; } - if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 && - !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) { + if (!strcmp(name, SYSTEM_NTFS_SECURITY)) { /* system.ntfs_security*/ struct SECURITY_DESCRIPTOR_RELATIVE *sd = NULL; size_t sd_size = 0; @@ -793,7 +825,7 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, } /* Deal with NTFS extended attribute. */ - err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL); + err = ntfs_get_ea(inode, name, strlen(name), buffer, size, NULL); out: return err; @@ -810,23 +842,24 @@ static noinline int ntfs_setxattr(const struct xattr_handler *handler, { int err = -EINVAL; struct ntfs_inode *ni = ntfs_i(inode); - size_t name_len = strlen(name); enum FILE_ATTRIBUTE new_fa; /* Dispatch request. */ - if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 && - !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) { + if (!strcmp(name, SYSTEM_DOS_ATTRIB)) { if (sizeof(u8) != size) goto out; new_fa = cpu_to_le32(*(u8 *)value); goto set_new_fa; } - if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 && - !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) { + if (!strcmp(name, SYSTEM_NTFS_ATTRIB) || + !strcmp(name, SYSTEM_NTFS_ATTRIB_BE)) { if (size != sizeof(u32)) goto out; - new_fa = cpu_to_le32(*(u32 *)value); + if (!strcmp(name, SYSTEM_NTFS_ATTRIB_BE)) + new_fa = cpu_to_le32(be32_to_cpu(*(u32 *)value)); + else + new_fa = cpu_to_le32(*(u32 *)value); if (S_ISREG(inode->i_mode)) { /* Process compressed/sparsed in special way. */ @@ -861,8 +894,7 @@ set_new_fa: goto out; } - if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 && - !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) { + if (!strcmp(name, SYSTEM_NTFS_SECURITY)) { /* system.ntfs_security*/ __le32 security_id; bool inserted; @@ -905,7 +937,7 @@ set_new_fa: } /* Deal with NTFS extended attribute. */ - err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0); + err = ntfs_set_ea(inode, name, strlen(name), value, size, flags, 0); out: inode->i_ctime = current_time(inode); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 37d222bdfc8c..a07b24d170f2 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1602,6 +1602,7 @@ static void o2net_start_connect(struct work_struct *work) sc->sc_sock = sock; /* freed by sc_kref_release */ sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_use_task_frag = false; myaddr.sin_family = AF_INET; myaddr.sin_addr.s_addr = mynode->nd_ipv4_address; diff --git a/fs/pnode.c b/fs/pnode.c index 1106137c747a..468e4e65a615 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -244,7 +244,7 @@ static int propagate_one(struct mount *m) } do { struct mount *parent = last_source->mnt_parent; - if (last_source == first_source) + if (peers(last_source, first_source)) break; done = parent->mnt_master == p; if (done && peers(n, parent)) diff --git a/fs/proc/page.c b/fs/proc/page.c index f2273b164535..6249c347809a 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -219,8 +219,9 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); + u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3); #endif return u; diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 8adabde685f1..c49d554cc9ae 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -126,6 +126,7 @@ config PSTORE_CONSOLE config PSTORE_PMSG bool "Log user space messages" depends on PSTORE + select RT_MUTEXES help When the option is enabled, pstore will export a character interface /dev/pmsg0 to log user space messages. On reboot diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c index d8542ec2f38c..ab82e5f05346 100644 --- a/fs/pstore/pmsg.c +++ b/fs/pstore/pmsg.c @@ -7,9 +7,10 @@ #include <linux/device.h> #include <linux/fs.h> #include <linux/uaccess.h> +#include <linux/rtmutex.h> #include "internal.h" -static DEFINE_MUTEX(pmsg_lock); +static DEFINE_RT_MUTEX(pmsg_lock); static ssize_t write_pmsg(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -28,9 +29,9 @@ static ssize_t write_pmsg(struct file *file, const char __user *buf, if (!access_ok(buf, count)) return -EFAULT; - mutex_lock(&pmsg_lock); + rt_mutex_lock(&pmsg_lock); ret = psinfo->write_user(&record, buf); - mutex_unlock(&pmsg_lock); + rt_mutex_unlock(&pmsg_lock); return ret ? ret : count; } @@ -46,7 +47,7 @@ static int pmsg_major; #undef pr_fmt #define pr_fmt(fmt) PMSG_NAME ": " fmt -static char *pmsg_devnode(struct device *dev, umode_t *mode) +static char *pmsg_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0220; diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 9a5052431fd3..ade66dbe5f39 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -680,7 +680,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, field = value; \ } - parse_u32("mem-type", pdata->record_size, pdata->mem_type); + parse_u32("mem-type", pdata->mem_type, pdata->mem_type); parse_u32("record-size", pdata->record_size, 0); parse_u32("console-size", pdata->console_size, 0); parse_u32("ftrace-size", pdata->ftrace_size, 0); |