aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c2
-rw-r--r--fs/9p/v9fs.c21
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/vfs_dir.c19
-rw-r--r--fs/9p/vfs_file.c24
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/extent_io.c12
-rw-r--r--fs/buffer.c14
-rw-r--r--fs/cifs/cifs_debug.c17
-rw-r--r--fs/cifs/cifs_debug.h28
-rw-r--r--fs/cifs/cifs_dfs_ref.c7
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifs_ioctl.h11
-rw-r--r--fs/cifs/cifsfs.c30
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h14
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c23
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/cifs/file.c56
-rw-r--r--fs/cifs/inode.c73
-rw-r--r--fs/cifs/ioctl.c48
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/smb2glob.h2
-rw-r--r--fs/cifs/smb2inode.c332
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/cifs/smb2ops.c212
-rw-r--r--fs/cifs/smb2pdu.c260
-rw-r--r--fs/cifs/smb2pdu.h13
-rw-r--r--fs/cifs/smb2proto.h28
-rw-r--r--fs/cifs/smbdirect.c38
-rw-r--r--fs/cifs/trace.h109
-rw-r--r--fs/cifs/transport.c78
-rw-r--r--fs/compat_ioctl.c131
-rw-r--r--fs/cramfs/inode.c5
-rw-r--r--fs/dax.c917
-rw-r--r--fs/dcache.c38
-rw-r--r--fs/ext2/acl.c4
-rw-r--r--fs/ext2/ext2.h4
-rw-r--r--fs/ext2/super.c5
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/f2fs/data.c6
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/inline.c2
-rw-r--r--fs/f2fs/node.c6
-rw-r--r--fs/fs-writeback.c25
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/iomap.c2
-rw-r--r--fs/isofs/dir.c2
-rw-r--r--fs/kernfs/mount.c3
-rw-r--r--fs/kernfs/symlink.c5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/delegation.c17
-rw-r--r--fs/nfs/dir.c295
-rw-r--r--fs/nfs/filelayout/filelayout.c1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/inode.c70
-rw-r--r--fs/nfs/nfs3proc.c5
-rw-r--r--fs/nfs/nfs3xdr.c10
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c16
-rw-r--r--fs/nfs/nfs4proc.c53
-rw-r--r--fs/nfs/nfs4state.c254
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/pagelist.c49
-rw-r--r--fs/nfs/pnfs.c16
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nilfs2/btnode.c26
-rw-r--r--fs/nilfs2/page.c29
-rw-r--r--fs/notify/fanotify/fanotify.c17
-rw-r--r--fs/notify/fanotify/fanotify.h4
-rw-r--r--fs/notify/fanotify/fanotify_user.c103
-rw-r--r--fs/notify/fdinfo.c29
-rw-r--r--fs/notify/fsnotify.c42
-rw-r--r--fs/notify/fsnotify.h11
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/notify/mark.c43
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/ocfs2/aops.c3
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmthread.c2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/proc/inode.c3
-rw-r--r--fs/proc/loadavg.c3
-rw-r--r--fs/proc/meminfo.c16
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/super.c2
-rw-r--r--fs/udf/balloc.c30
-rw-r--r--fs/udf/super.c139
-rw-r--r--fs/udf/udf_sb.h10
-rw-r--r--fs/userfaultfd.c8
95 files changed, 2235 insertions, 1773 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 082d227fa56b..6261719f6f2a 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -276,7 +276,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
switch (handler->flags) {
case ACL_TYPE_ACCESS:
if (acl) {
- struct iattr iattr;
+ struct iattr iattr = { 0 };
struct posix_acl *old_acl = acl;
retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 89bac3d2f05b..619128b55837 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -61,6 +61,8 @@ enum {
Opt_cache_loose, Opt_fscache, Opt_mmap,
/* Access options */
Opt_access, Opt_posixacl,
+ /* Lock timeout option */
+ Opt_locktimeout,
/* Error token */
Opt_err
};
@@ -80,6 +82,7 @@ static const match_table_t tokens = {
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_posixacl, "posixacl"},
+ {Opt_locktimeout, "locktimeout=%u"},
{Opt_err, NULL}
};
@@ -187,6 +190,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
#ifdef CONFIG_9P_FSCACHE
v9ses->cachetag = NULL;
#endif
+ v9ses->session_lock_timeout = P9_LOCK_TIMEOUT;
if (!opts)
return 0;
@@ -359,6 +363,23 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
#endif
break;
+ case Opt_locktimeout:
+ r = match_int(&args[0], &option);
+ if (r < 0) {
+ p9_debug(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ ret = r;
+ continue;
+ }
+ if (option < 1) {
+ p9_debug(P9_DEBUG_ERROR,
+ "locktimeout must be a greater than zero integer.\n");
+ ret = -EINVAL;
+ continue;
+ }
+ v9ses->session_lock_timeout = (long)option * HZ;
+ break;
+
default:
continue;
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 982e017acadb..129e5243a6bf 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,6 +116,7 @@ struct v9fs_session_info {
struct p9_client *clnt; /* 9p client */
struct list_head slist; /* list of sessions registered with v9fs */
struct rw_semaphore rename_sem;
+ long session_lock_timeout; /* retry interval for blocking locks */
};
/* cache_validity flags */
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b0405d6aac85..cb6c4031af55 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -76,15 +76,6 @@ static inline int dt_type(struct p9_wstat *mistat)
return rettype;
}
-static void p9stat_init(struct p9_wstat *stbuf)
-{
- stbuf->name = NULL;
- stbuf->uid = NULL;
- stbuf->gid = NULL;
- stbuf->muid = NULL;
- stbuf->extension = NULL;
-}
-
/**
* v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir
* @filp: opened file structure
@@ -114,7 +105,6 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
int err = 0;
struct p9_fid *fid;
int buflen;
- int reclen = 0;
struct p9_rdir *rdir;
struct kvec kvec;
@@ -145,15 +135,12 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
rdir->tail = n;
}
while (rdir->head < rdir->tail) {
- p9stat_init(&st);
err = p9stat_read(fid->clnt, rdir->buf + rdir->head,
rdir->tail - rdir->head, &st);
- if (err) {
+ if (err <= 0) {
p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
- p9stat_free(&st);
return -EIO;
}
- reclen = st.size+2;
over = !dir_emit(ctx, st.name, strlen(st.name),
v9fs_qid2ino(&st.qid), dt_type(&st));
@@ -161,8 +148,8 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
if (over)
return 0;
- rdir->head += reclen;
- ctx->pos += reclen;
+ rdir->head += err;
+ ctx->pos += err;
}
}
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 5f2e48d41d72..a25efa782fcc 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -154,6 +154,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
uint8_t status = P9_LOCK_ERROR;
int res = 0;
unsigned char fl_type;
+ struct v9fs_session_info *v9ses;
fid = filp->private_data;
BUG_ON(fid == NULL);
@@ -189,6 +190,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
if (IS_SETLKW(cmd))
flock.flags = P9_LOCK_FLAGS_BLOCK;
+ v9ses = v9fs_inode2v9ses(file_inode(filp));
+
/*
* if its a blocked request and we get P9_LOCK_BLOCKED as the status
* for lock request, keep on trying
@@ -202,8 +205,17 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
break;
if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
break;
- if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0)
+ if (schedule_timeout_interruptible(v9ses->session_lock_timeout)
+ != 0)
break;
+ /*
+ * p9_client_lock_dotl overwrites flock.client_id with the
+ * server message, free and reuse the client name
+ */
+ if (flock.client_id != fid->clnt->name) {
+ kfree(flock.client_id);
+ flock.client_id = fid->clnt->name;
+ }
}
/* map 9p status to VFS status */
@@ -216,7 +228,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
break;
default:
WARN_ONCE(1, "unknown lock status code: %d\n", status);
- /* fallthough */
+ /* fall through */
case P9_LOCK_ERROR:
case P9_LOCK_GRACE:
res = -ENOLCK;
@@ -235,6 +247,8 @@ out_unlock:
locks_lock_file_wait(filp, fl);
fl->fl_type = fl_type;
}
+ if (flock.client_id != fid->clnt->name)
+ kfree(flock.client_id);
out:
return res;
}
@@ -269,7 +283,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
res = p9_client_getlock_dotl(fid, &glock);
if (res < 0)
- return res;
+ goto out;
/* map 9p lock type to os lock type */
switch (glock.type) {
case P9_LOCK_TYPE_RDLCK:
@@ -290,7 +304,9 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
fl->fl_end = glock.start + glock.length - 1;
fl->fl_pid = -glock.proc_id;
}
- kfree(glock.client_id);
+out:
+ if (glock.client_id != fid->clnt->name)
+ kfree(glock.client_id);
return res;
}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 8703ce68fe9d..2955a4ea2fa8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -437,10 +437,8 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (pg_index > end_index)
break;
- rcu_read_lock();
- page = radix_tree_lookup(&mapping->i_pages, pg_index);
- rcu_read_unlock();
- if (page && !radix_tree_exceptional_entry(page)) {
+ page = xa_load(&mapping->i_pages, pg_index);
+ if (page && !xa_is_value(page)) {
misses++;
if (misses > 4)
break;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6877a74c7469..d228f706ff3e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3784,7 +3784,7 @@ int btree_write_cache_pages(struct address_space *mapping,
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
- int tag;
+ xa_mark_t tag;
pagevec_init(&pvec);
if (wbc->range_cyclic) {
@@ -3909,7 +3909,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
pgoff_t done_index;
int range_whole = 0;
int scanned = 0;
- int tag;
+ xa_mark_t tag;
/*
* We have to hold onto the inode so that ordered extents can do their
@@ -5159,11 +5159,9 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
clear_page_dirty_for_io(page);
xa_lock_irq(&page->mapping->i_pages);
- if (!PageDirty(page)) {
- radix_tree_tag_clear(&page->mapping->i_pages,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- }
+ if (!PageDirty(page))
+ __xa_clear_mark(&page->mapping->i_pages,
+ page_index(page), PAGECACHE_TAG_DIRTY);
xa_unlock_irq(&page->mapping->i_pages);
ClearPageError(page);
unlock_page(page);
diff --git a/fs/buffer.c b/fs/buffer.c
index 109f55196866..d60d61e8ed7d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -562,7 +562,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
EXPORT_SYMBOL(mark_buffer_dirty_inode);
/*
- * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
+ * Mark the page dirty, and set it dirty in the page cache, and mark the inode
* dirty.
*
* If warn is true, then emit a warning if the page is not uptodate and has
@@ -579,8 +579,8 @@ void __set_page_dirty(struct page *page, struct address_space *mapping,
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
- radix_tree_tag_set(&mapping->i_pages,
- page_index(page), PAGECACHE_TAG_DIRTY);
+ __xa_set_mark(&mapping->i_pages, page_index(page),
+ PAGECACHE_TAG_DIRTY);
}
xa_unlock_irqrestore(&mapping->i_pages, flags);
}
@@ -1050,7 +1050,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
* The relationship between dirty buffers and dirty pages:
*
* Whenever a page has any dirty buffers, the page's dirty bit is set, and
- * the page is tagged dirty in its radix tree.
+ * the page is tagged dirty in the page cache.
*
* At all times, the dirtiness of the buffers represents the dirtiness of
* subsections of the page. If the page has buffers, the page dirty bit is
@@ -1073,9 +1073,9 @@ __getblk_slow(struct block_device *bdev, sector_t block,
* mark_buffer_dirty - mark a buffer_head as needing writeout
* @bh: the buffer_head to mark dirty
*
- * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
- * backing page dirty, then tag the page as dirty in its address_space's radix
- * tree and then attach the address_space's inode to its superblock's dirty
+ * mark_buffer_dirty() will set the dirty bit against the buffer, then set
+ * its backing page dirty, then tag the page as dirty in the page cache
+ * and then attach the address_space's inode to its superblock's dirty
* inode list.
*
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index f1fbea947fef..3e812428ac8d 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -132,7 +132,7 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr;
struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr;
- seq_printf(m, "\t\tSpeed: %zu bps\n", iface->speed);
+ seq_printf(m, "\tSpeed: %zu bps\n", iface->speed);
seq_puts(m, "\t\tCapabilities: ");
if (iface->rdma_capable)
seq_puts(m, "rdma ");
@@ -285,7 +285,7 @@ skip_rdma:
if ((ses->serverDomain == NULL) ||
(ses->serverOS == NULL) ||
(ses->serverNOS == NULL)) {
- seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
+ seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
i, ses->serverName, ses->ses_count,
ses->capabilities, ses->status);
if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
@@ -296,16 +296,18 @@ skip_rdma:
seq_printf(m,
"\n%d) Name: %s Domain: %s Uses: %d OS:"
" %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
- " session status: %d\t",
+ " session status: %d ",
i, ses->serverName, ses->serverDomain,
ses->ses_count, ses->serverOS, ses->serverNOS,
ses->capabilities, ses->status);
}
if (server->rdma)
seq_printf(m, "RDMA\n\t");
- seq_printf(m, "TCP status: %d\n\tLocal Users To "
+ seq_printf(m, "TCP status: %d Instance: %d\n\tLocal Users To "
"Server: %d SecMode: 0x%x Req On Wire: %d",
- server->tcpStatus, server->srv_count,
+ server->tcpStatus,
+ server->reconnect_instance,
+ server->srv_count,
server->sec_mode, in_flight(server));
#ifdef CONFIG_CIFS_STATS2
@@ -352,7 +354,7 @@ skip_rdma:
seq_printf(m, "\n\tServer interfaces: %zu\n",
ses->iface_count);
for (j = 0; j < ses->iface_count; j++) {
- seq_printf(m, "\t%d)\n", j);
+ seq_printf(m, "\t%d)", j);
cifs_dump_iface(m, &ses->iface_list[j]);
}
spin_unlock(&ses->iface_lock);
@@ -383,6 +385,9 @@ static ssize_t cifs_stats_proc_write(struct file *file,
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
#endif /* CONFIG_CIFS_STATS2 */
+ atomic_set(&tcpSesReconnectCount, 0);
+ atomic_set(&tconInfoReconnectCount, 0);
+
spin_lock(&GlobalMid_Lock);
GlobalMaxActiveXid = 0;
GlobalCurrentXid = 0;
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index f4f3f0853c6e..631dc1bb21c1 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -47,6 +47,29 @@ extern int cifsFYI;
*/
#ifdef CONFIG_CIFS_DEBUG
+
+/*
+ * When adding tracepoints and debug messages we have various choices.
+ * Some considerations:
+ *
+ * Use cifs_dbg(VFS, ...) for things we always want logged, and the user to see
+ * cifs_info(...) slightly less important, admin can filter via loglevel > 6
+ * cifs_dbg(FYI, ...) minor debugging messages, off by default
+ * trace_smb3_* ftrace functions are preferred for complex debug messages
+ * intended for developers or experienced admins, off by default
+ */
+
+/* Information level messages, minor events */
+#define cifs_info_func(ratefunc, fmt, ...) \
+do { \
+ pr_info_ ## ratefunc("CIFS: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define cifs_info(fmt, ...) \
+do { \
+ cifs_info_func(ratelimited, fmt, ##__VA_ARGS__); \
+} while (0)
+
/* information message: e.g., configuration, major event */
#define cifs_dbg_func(ratefunc, type, fmt, ...) \
do { \
@@ -81,6 +104,11 @@ do { \
if (0) \
pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
+
+#define cifs_info(fmt, ...) \
+do { \
+ pr_info("CIFS: "fmt, ##__VA_ARGS__); \
+} while (0)
#endif
#endif /* _H_CIFS_DEBUG */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 6b61df117fd4..b97c74efd04a 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -304,12 +304,17 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
*/
mnt = ERR_PTR(-ENOMEM);
+ cifs_sb = CIFS_SB(mntpt->d_sb);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) {
+ mnt = ERR_PTR(-EREMOTE);
+ goto cdda_exit;
+ }
+
/* always use tree name prefix */
full_path = build_path_from_dentry_optional_prefix(mntpt, true);
if (full_path == NULL)
goto cdda_exit;
- cifs_sb = CIFS_SB(mntpt->d_sb);
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
mnt = ERR_CAST(tlink);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 9731d0d891e7..63d7530f2e1d 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -51,6 +51,7 @@
*/
#define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
#define CIFS_MOUNT_NO_HANDLE_CACHE 0x4000000 /* disable caching dir handles */
+#define CIFS_MOUNT_NO_DFS 0x8000000 /* disable DFS resolving */
struct cifs_sb_info {
struct rb_root tlink_tree;
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 57ff0756e30c..d8bce2f862de 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -43,8 +43,19 @@ struct smb_snapshot_array {
/* snapshots[]; */
} __packed;
+struct smb_query_info {
+ __u32 info_type;
+ __u32 file_info_class;
+ __u32 additional_information;
+ __u32 flags;
+ __u32 input_buffer_length;
+ __u32 output_buffer_length;
+ /* char buffer[]; */
+} __packed;
+
#define CIFS_IOCTL_MAGIC 0xCF
#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4)
#define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info)
#define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
+#define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7065426b3280..7de9603c54f1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -81,6 +81,14 @@ module_param(cifs_max_pending, uint, 0444);
MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
"CIFS/SMB1 dialect (N/A for SMB3) "
"Default: 32767 Range: 2 to 32767.");
+#ifdef CONFIG_CIFS_STATS2
+unsigned int slow_rsp_threshold = 1;
+module_param(slow_rsp_threshold, uint, 0644);
+MODULE_PARM_DESC(slow_rsp_threshold, "Amount of time (in seconds) to wait "
+ "before logging that a response is delayed. "
+ "Default: 1 (if set to 0 disables msg).");
+#endif /* STATS2 */
+
module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
@@ -492,6 +500,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",unix");
else
seq_puts(s, ",nounix");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+ seq_puts(s, ",nodfs");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
seq_puts(s, ",posixpaths");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
@@ -707,7 +717,14 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
struct cifs_mnt_data mnt_data;
struct dentry *root;
- cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+ /*
+ * Prints in Kernel / CIFS log the attempted mount operation
+ * If CIFS_DEBUG && cifs_FYI
+ */
+ if (cifsFYI)
+ cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+ else
+ cifs_info("Attempting to mount %s\n", dev_name);
volume_info = cifs_get_volume_info((char *)data, dev_name, is_smb3);
if (IS_ERR(volume_info))
@@ -1418,6 +1435,11 @@ init_cifs(void)
#ifdef CONFIG_CIFS_STATS2
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
+ if (slow_rsp_threshold < 1)
+ cifs_dbg(FYI, "slow_response_threshold msgs disabled\n");
+ else if (slow_rsp_threshold > 32767)
+ cifs_dbg(VFS,
+ "slow response threshold set higher than recommended (0 to 32767)\n");
#endif /* CONFIG_CIFS_STATS2 */
atomic_set(&midCount, 0);
@@ -1538,11 +1560,11 @@ exit_cifs(void)
cifs_proc_clean();
}
-MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
+MODULE_AUTHOR("Steve French");
MODULE_LICENSE("GPL"); /* combination of LGPL + GPL source behaves as GPL */
MODULE_DESCRIPTION
- ("VFS to access servers complying with the SNIA CIFS Specification "
- "e.g. Samba and Windows");
+ ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
+ "also older servers complying with the SNIA CIFS Specification)");
MODULE_VERSION(CIFS_VERSION);
MODULE_SOFTDEP("pre: arc4");
MODULE_SOFTDEP("pre: des");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f047e87871a1..24e265a51874 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -148,5 +148,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.13"
+#define CIFS_VERSION "2.14"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9dcaed031843..ed1e0fcb69e3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -33,6 +33,7 @@
#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
+#define SMB_PATH_MAX 260
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -465,6 +466,11 @@ struct smb_version_operations {
enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
enum securityEnum);
int (*next_header)(char *);
+ /* ioctl passthrough for query_info */
+ int (*ioctl_query_info)(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ __le16 *path, int is_dir,
+ unsigned long p);
};
struct smb_version_values {
@@ -654,6 +660,7 @@ struct TCP_Server_Info {
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* for signing, protected by srv_mutex */
+ __u32 reconnect_instance; /* incremented on each reconnect */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
@@ -798,6 +805,7 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
* a single wsize request with a single call.
*/
#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
+#define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
/*
* Windows only supports a max of 60kb reads and 65535 byte writes. Default to
@@ -924,6 +932,8 @@ struct cifs_tcon {
struct list_head tcon_list;
int tc_count;
struct list_head rlist; /* reconnect list */
+ atomic_t num_local_opens; /* num of all opens including disconnected */
+ atomic_t num_remote_opens; /* num of all network opens on server */
struct list_head openFileList;
spinlock_t open_file_lock; /* protects list above */
struct cifs_ses *ses; /* pointer to session associated with */
@@ -1072,7 +1082,8 @@ struct cifsLockInfo {
__u64 offset;
__u64 length;
__u32 pid;
- __u32 type;
+ __u16 type;
+ __u16 flags;
};
/*
@@ -1715,6 +1726,7 @@ GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */
#ifdef CONFIG_CIFS_STATS2
GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */
GLOBAL_EXTERN atomic_t totSmBufAllocCount;
+extern unsigned int slow_rsp_threshold; /* number of secs before logging */
#endif
GLOBAL_EXTERN atomic_t smBufAllocCount;
GLOBAL_EXTERN atomic_t midCount;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 20adda4de83b..fa361bc00602 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -219,7 +219,7 @@ extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
- __u64 length, __u8 type,
+ __u64 length, __u8 type, __u16 flags,
struct cifsLockInfo **conf_lock,
int rw_check);
extern void cifs_add_pending_open(struct cifs_fid *fid,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5657b79dbc99..f82fd342bca5 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1607,6 +1607,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
.rq_pages = rdata->pages,
+ .rq_offset = rdata->page_offset,
.rq_npages = rdata->nr_pages,
.rq_pagesz = rdata->pagesz,
.rq_tailsz = rdata->tailsz };
@@ -2210,6 +2211,7 @@ cifs_async_writev(struct cifs_writedata *wdata,
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
rqst.rq_pages = wdata->pages;
+ rqst.rq_offset = wdata->page_offset;
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz;
@@ -5027,6 +5029,13 @@ oldQFSInfoRetry:
le16_to_cpu(response_data->BytesPerSector) *
le32_to_cpu(response_data->
SectorsPerAllocationUnit);
+ /*
+ * much prefer larger but if server doesn't report
+ * a valid size than 4K is a reasonable minimum
+ */
+ if (FSData->f_bsize < 512)
+ FSData->f_bsize = 4096;
+
FSData->f_blocks =
le32_to_cpu(response_data->TotalAllocationUnits);
FSData->f_bfree = FSData->f_bavail =
@@ -5107,6 +5116,13 @@ QFSInfoRetry:
le32_to_cpu(response_data->BytesPerSector) *
le32_to_cpu(response_data->
SectorsPerAllocationUnit);
+ /*
+ * much prefer larger but if server doesn't report
+ * a valid size than 4K is a reasonable minimum
+ */
+ if (FSData->f_bsize < 512)
+ FSData->f_bsize = 4096;
+
FSData->f_blocks =
le64_to_cpu(response_data->TotalAllocationUnits);
FSData->f_bfree = FSData->f_bavail =
@@ -5470,6 +5486,13 @@ QFSPosixRetry:
data_offset);
FSData->f_bsize =
le32_to_cpu(response_data->BlockSize);
+ /*
+ * much prefer larger but if server doesn't report
+ * a valid size than 4K is a reasonable minimum
+ */
+ if (FSData->f_bsize < 512)
+ FSData->f_bsize = 4096;
+
FSData->f_blocks =
le64_to_cpu(response_data->TotalBlocks);
FSData->f_bfree =
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 52d71b64c0c6..d82f0cc71755 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -250,6 +250,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_ignore, "dev" },
{ Opt_ignore, "mand" },
{ Opt_ignore, "nomand" },
+ { Opt_ignore, "relatime" },
{ Opt_ignore, "_netdev" },
{ Opt_err, NULL }
@@ -347,7 +348,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->maxBuf = 0;
server->max_read = 0;
- cifs_dbg(FYI, "Reconnecting tcp session\n");
+ cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
trace_smb3_reconnect(server->CurrentMid, server->hostname);
/* before reconnecting the tcp session, mark the smb session (uid)
@@ -2396,6 +2397,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
tcp_ses->session_estab = false;
tcp_ses->sequence_number = 0;
+ tcp_ses->reconnect_instance = 0;
tcp_ses->lstrp = jiffies;
spin_lock_init(&tcp_ses->req_lock);
INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
@@ -3085,10 +3087,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
if (rc)
goto out_fail;
- if (volume_info->nodfs) {
- tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
- cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags);
- }
tcon->use_persistent = false;
/* check if SMB2 or later, CIFS does not support persistent handles */
if (volume_info->persistent) {
@@ -3663,6 +3661,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->actimeo = pvolume_info->actimeo;
cifs_sb->local_nls = pvolume_info->local_nls;
+ if (pvolume_info->nodfs)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
if (pvolume_info->noperm)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
if (pvolume_info->setuids)
@@ -3819,6 +3819,9 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
struct dfs_info3_param *referrals = NULL;
char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+ return -EREMOTE;
+
full_path = build_unc_path_to_root(volume_info, cifs_sb);
if (IS_ERR(full_path))
return PTR_ERR(full_path);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8d41ca7bfcf1..c620d4b5d5d4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -334,6 +334,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
server->ops->set_fid(cfile, fid, oplock);
list_add(&cfile->tlist, &tcon->openFileList);
+ atomic_inc(&tcon->num_local_opens);
/* if readable file instance put first in list*/
if (file->f_mode & FMODE_READ)
@@ -395,6 +396,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
/* remove it from the lists */
list_del(&cifs_file->flist);
list_del(&cifs_file->tlist);
+ atomic_dec(&tcon->num_local_opens);
if (list_empty(&cifsi->openFileList)) {
cifs_dbg(FYI, "closing last open instance for inode %p\n",
@@ -864,7 +866,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
}
static struct cifsLockInfo *
-cifs_lock_init(__u64 offset, __u64 length, __u8 type)
+cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
{
struct cifsLockInfo *lock =
kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
@@ -874,6 +876,7 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type)
lock->length = length;
lock->type = type;
lock->pid = current->tgid;
+ lock->flags = flags;
INIT_LIST_HEAD(&lock->blist);
init_waitqueue_head(&lock->block_q);
return lock;
@@ -896,7 +899,8 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
/* @rw_check : 0 - no op, 1 - read, 2 - write */
static bool
cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
- __u64 length, __u8 type, struct cifsFileInfo *cfile,
+ __u64 length, __u8 type, __u16 flags,
+ struct cifsFileInfo *cfile,
struct cifsLockInfo **conf_lock, int rw_check)
{
struct cifsLockInfo *li;
@@ -918,6 +922,10 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
((server->ops->compare_fids(cfile, cur_cfile) &&
current->tgid == li->pid) || type == li->type))
continue;
+ if (rw_check == CIFS_LOCK_OP &&
+ (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
+ server->ops->compare_fids(cfile, cur_cfile))
+ continue;
if (conf_lock)
*conf_lock = li;
return true;
@@ -927,8 +935,8 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
bool
cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
- __u8 type, struct cifsLockInfo **conf_lock,
- int rw_check)
+ __u8 type, __u16 flags,
+ struct cifsLockInfo **conf_lock, int rw_check)
{
bool rc = false;
struct cifs_fid_locks *cur;
@@ -936,7 +944,8 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
list_for_each_entry(cur, &cinode->llist, llist) {
rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
- cfile, conf_lock, rw_check);
+ flags, cfile, conf_lock,
+ rw_check);
if (rc)
break;
}
@@ -964,7 +973,8 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
down_read(&cinode->lock_sem);
exist = cifs_find_lock_conflict(cfile, offset, length, type,
- &conf_lock, CIFS_LOCK_OP);
+ flock->fl_flags, &conf_lock,
+ CIFS_LOCK_OP);
if (exist) {
flock->fl_start = conf_lock->offset;
flock->fl_end = conf_lock->offset + conf_lock->length - 1;
@@ -1011,7 +1021,8 @@ try_again:
down_write(&cinode->lock_sem);
exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
- lock->type, &conf_lock, CIFS_LOCK_OP);
+ lock->type, lock->flags, &conf_lock,
+ CIFS_LOCK_OP);
if (!exist && cinode->can_cache_brlcks) {
list_add_tail(&lock->llist, &cfile->llist->locks);
up_write(&cinode->lock_sem);
@@ -1321,7 +1332,7 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
cifs_dbg(FYI, "Lease on file - not implemented yet\n");
if (flock->fl_flags &
(~(FL_POSIX | FL_FLOCK | FL_SLEEP |
- FL_ACCESS | FL_LEASE | FL_CLOSE)))
+ FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
*type = server->vals->large_lock_type;
@@ -1584,7 +1595,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
if (lock) {
struct cifsLockInfo *lock;
- lock = cifs_lock_init(flock->fl_start, length, type);
+ lock = cifs_lock_init(flock->fl_start, length, type,
+ flock->fl_flags);
if (!lock)
return -ENOMEM;
@@ -1653,7 +1665,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
tcon->ses->server);
-
cifs_sb = CIFS_FILE_SB(file);
netfid = cfile->fid.netfid;
cinode = CIFS_I(file_inode(file));
@@ -2098,6 +2109,7 @@ static int cifs_writepages(struct address_space *mapping,
pgoff_t end, index;
struct cifs_writedata *wdata;
int rc = 0;
+ unsigned int xid;
/*
* If wsize is smaller than the page cache size, default to writing
@@ -2106,6 +2118,7 @@ static int cifs_writepages(struct address_space *mapping,
if (cifs_sb->wsize < PAGE_SIZE)
return generic_writepages(mapping, wbc);
+ xid = get_xid();
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
@@ -2199,6 +2212,7 @@ retry:
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;
+ free_xid(xid);
return rc;
}
@@ -2817,8 +2831,8 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
goto out;
if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
- server->vals->exclusive_lock_type, NULL,
- CIFS_WRITE_OP))
+ server->vals->exclusive_lock_type, 0,
+ NULL, CIFS_WRITE_OP))
rc = __generic_file_write_iter(iocb, from);
else
rc = -EACCES;
@@ -3388,7 +3402,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
down_read(&cinode->lock_sem);
if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
tcon->ses->server->vals->shared_lock_type,
- NULL, CIFS_READ_OP))
+ 0, NULL, CIFS_READ_OP))
rc = generic_file_read_iter(iocb, to);
up_read(&cinode->lock_sem);
return rc;
@@ -3743,7 +3757,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct TCP_Server_Info *server;
pid_t pid;
+ unsigned int xid;
+ xid = get_xid();
/*
* Reads as many pages as possible from fscache. Returns -ENOBUFS
* immediately if the cookie is negative
@@ -3753,8 +3769,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
*/
rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
&num_pages);
- if (rc == 0)
+ if (rc == 0) {
+ free_xid(xid);
return rc;
+ }
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
@@ -3798,6 +3816,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
*/
if (unlikely(rsize < PAGE_SIZE)) {
add_credits_and_wake_if(server, credits, 0);
+ free_xid(xid);
return 0;
}
@@ -3862,6 +3881,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
* allocator.
*/
cifs_fscache_readpages_cancel(mapping->host, page_list);
+ free_xid(xid);
return rc;
}
@@ -3889,8 +3909,12 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
else
cifs_dbg(FYI, "Bytes read %d\n", rc);
- file_inode(file)->i_atime =
- current_time(file_inode(file));
+ /* we do not want atime to be less than mtime, it broke some apps */
+ file_inode(file)->i_atime = current_time(file_inode(file));
+ if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
+ file_inode(file)->i_atime = file_inode(file)->i_mtime;
+ else
+ file_inode(file)->i_atime = current_time(file_inode(file));
if (PAGE_SIZE > rc)
memset(read_data + rc, 0, PAGE_SIZE - rc);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 6e8765f44508..1023d78673fb 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -162,7 +162,11 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
cifs_revalidate_cache(inode, fattr);
spin_lock(&inode->i_lock);
- inode->i_atime = fattr->cf_atime;
+ /* we do not want atime to be less than mtime, it broke some apps */
+ if (timespec64_compare(&fattr->cf_atime, &fattr->cf_mtime))
+ inode->i_atime = fattr->cf_mtime;
+ else
+ inode->i_atime = fattr->cf_atime;
inode->i_mtime = fattr->cf_mtime;
inode->i_ctime = fattr->cf_ctime;
inode->i_rdev = fattr->cf_rdev;
@@ -777,38 +781,53 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
} else if (rc == -EREMOTE) {
cifs_create_dfs_fattr(&fattr, sb);
rc = 0;
- } else if (rc == -EACCES && backup_cred(cifs_sb)) {
- srchinf = kzalloc(sizeof(struct cifs_search_info),
- GFP_KERNEL);
- if (srchinf == NULL) {
- rc = -ENOMEM;
- goto cgii_exit;
- }
+ } else if ((rc == -EACCES) && backup_cred(cifs_sb) &&
+ (strcmp(server->vals->version_string, SMB1_VERSION_STRING)
+ == 0)) {
+ /*
+ * For SMB2 and later the backup intent flag is already
+ * sent if needed on open and there is no path based
+ * FindFirst operation to use to retry with
+ */
- srchinf->endOfSearch = false;
+ srchinf = kzalloc(sizeof(struct cifs_search_info),
+ GFP_KERNEL);
+ if (srchinf == NULL) {
+ rc = -ENOMEM;
+ goto cgii_exit;
+ }
+
+ srchinf->endOfSearch = false;
+ if (tcon->unix_ext)
+ srchinf->info_level = SMB_FIND_FILE_UNIX;
+ else if ((tcon->ses->capabilities &
+ tcon->ses->server->vals->cap_nt_find) == 0)
+ srchinf->info_level = SMB_FIND_FILE_INFO_STANDARD;
+ else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
+ else /* no srvino useful for fallback to some netapp */
+ srchinf->info_level = SMB_FIND_FILE_DIRECTORY_INFO;
- srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
- CIFS_SEARCH_CLOSE_AT_END |
- CIFS_SEARCH_BACKUP_SEARCH;
+ srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
+ CIFS_SEARCH_CLOSE_AT_END |
+ CIFS_SEARCH_BACKUP_SEARCH;
- rc = CIFSFindFirst(xid, tcon, full_path,
- cifs_sb, NULL, srchflgs, srchinf, false);
- if (!rc) {
- data =
- (FILE_ALL_INFO *)srchinf->srch_entries_start;
+ rc = CIFSFindFirst(xid, tcon, full_path,
+ cifs_sb, NULL, srchflgs, srchinf, false);
+ if (!rc) {
+ data = (FILE_ALL_INFO *)srchinf->srch_entries_start;
- cifs_dir_info_to_fattr(&fattr,
- (FILE_DIRECTORY_INFO *)data, cifs_sb);
- fattr.cf_uniqueid = le64_to_cpu(
- ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
- validinum = true;
+ cifs_dir_info_to_fattr(&fattr,
+ (FILE_DIRECTORY_INFO *)data, cifs_sb);
+ fattr.cf_uniqueid = le64_to_cpu(
+ ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
+ validinum = true;
- cifs_buf_release(srchinf->ntwrk_buf_start);
- }
- kfree(srchinf);
- if (rc)
- goto cgii_exit;
+ cifs_buf_release(srchinf->ntwrk_buf_start);
+ }
+ kfree(srchinf);
+ if (rc)
+ goto cgii_exit;
} else
goto cgii_exit;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 54f32f9143a9..76ddd98b6298 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -32,8 +32,51 @@
#include "cifs_debug.h"
#include "cifsfs.h"
#include "cifs_ioctl.h"
+#include "smb2proto.h"
#include <linux/btrfs.h>
+static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
+ unsigned long p)
+{
+ struct inode *inode = file_inode(filep);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct dentry *dentry = filep->f_path.dentry;
+ unsigned char *path;
+ __le16 *utf16_path = NULL, root_path;
+ int rc = 0;
+
+ path = build_path_from_dentry(dentry);
+ if (path == NULL)
+ return -ENOMEM;
+
+ cifs_dbg(FYI, "%s %s\n", __func__, path);
+
+ if (!path[0]) {
+ root_path = 0;
+ utf16_path = &root_path;
+ } else {
+ utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb);
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ goto ici_exit;
+ }
+ }
+
+ if (tcon->ses->server->ops->ioctl_query_info)
+ rc = tcon->ses->server->ops->ioctl_query_info(
+ xid, tcon, utf16_path,
+ filep->private_data ? 0 : 1, p);
+ else
+ rc = -EOPNOTSUPP;
+
+ ici_exit:
+ if (utf16_path != &root_path)
+ kfree(utf16_path);
+ kfree(path);
+ return rc;
+}
+
static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
unsigned long srcfd)
{
@@ -123,7 +166,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
struct inode *inode = file_inode(filep);
int rc = -ENOTTY; /* strange error - but the precedent */
unsigned int xid;
- struct cifs_sb_info *cifs_sb;
struct cifsFileInfo *pSMBFile = filep->private_data;
struct cifs_tcon *tcon;
__u64 ExtAttrBits = 0;
@@ -131,7 +173,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
xid = get_xid();
- cifs_sb = CIFS_SB(inode->i_sb);
cifs_dbg(FYI, "cifs ioctl 0x%x\n", command);
switch (command) {
case FS_IOC_GETFLAGS:
@@ -196,6 +237,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
case CIFS_IOC_COPYCHUNK_FILE:
rc = cifs_ioctl_copychunk(xid, filep, arg);
break;
+ case CIFS_QUERY_INFO:
+ rc = cifs_ioctl_query_info(xid, filep, arg);
+ break;
case CIFS_IOC_SET_INTEGRITY:
if (pSMBFile == NULL)
break;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 6926685e513c..fc43d5d25d1d 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -123,6 +123,8 @@ tconInfoAlloc(void)
ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid),
GFP_KERNEL);
spin_lock_init(&ret_buf->stat_lock);
+ atomic_set(&ret_buf->num_local_opens, 0);
+ atomic_set(&ret_buf->num_remote_opens, 0);
}
return ret_buf;
}
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 0ffa18094335..dd10f0ce4cd5 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -33,7 +33,7 @@
/*
* Identifiers for functions that use the open, operation, close pattern
- * in smb2inode.c:smb2_open_op_close()
+ * in smb2inode.c:smb2_compound_op()
*/
#define SMB2_OP_SET_DELETE 1
#define SMB2_OP_SET_INFO 2
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1eef1791d0c4..9e7ef7ec2d70 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -38,54 +38,83 @@
#include "smb2proto.h"
static int
-smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- __u32 desired_access, __u32 create_disposition,
- __u32 create_options, void *data, int command)
+smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ __u32 desired_access, __u32 create_disposition,
+ __u32 create_options, void *ptr, int command)
{
- int rc, tmprc = 0;
+ int rc;
__le16 *utf16_path = NULL;
__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
- bool use_cached_root_handle = false;
-
- if ((strcmp(full_path, "") == 0) && (create_options == 0) &&
- (desired_access == FILE_READ_ATTRIBUTES) &&
- (create_disposition == FILE_OPEN) &&
- (tcon->nohandlecache == false)) {
- rc = open_shroot(xid, tcon, &fid);
- if (rc == 0)
- use_cached_root_handle = true;
- }
+ struct cifs_ses *ses = tcon->ses;
+ struct TCP_Server_Info *server = ses->server;
+ int num_rqst = 0;
+ struct smb_rqst rqst[3];
+ int resp_buftype[3];
+ struct kvec rsp_iov[3];
+ struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+ struct kvec qi_iov[1];
+ struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
+ struct kvec close_iov[1];
+ struct smb2_query_info_rsp *qi_rsp = NULL;
+ int flags = 0;
+ __u8 delete_pending[8] = {1, 0, 0, 0, 0, 0, 0, 0};
+ unsigned int size[2];
+ void *data[2];
+ struct smb2_file_rename_info rename_info;
+ struct smb2_file_link_info link_info;
+ int len;
- if (use_cached_root_handle == false) {
- utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
- if (!utf16_path)
- return -ENOMEM;
-
- oparms.tcon = tcon;
- oparms.desired_access = desired_access;
- oparms.disposition = create_disposition;
- oparms.create_options = create_options;
- oparms.fid = &fid;
- oparms.reconnect = false;
-
- rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL,
- NULL);
- if (rc) {
- kfree(utf16_path);
- return rc;
- }
- }
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ memset(rqst, 0, sizeof(rqst));
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+ memset(rsp_iov, 0, sizeof(rsp_iov));
+
+ /* Open */
+ utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
+
+ oparms.tcon = tcon;
+ oparms.desired_access = desired_access;
+ oparms.disposition = create_disposition;
+ oparms.create_options = create_options;
+ if (backup_cred(cifs_sb))
+ oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+ memset(&open_iov, 0, sizeof(open_iov));
+ rqst[num_rqst].rq_iov = open_iov;
+ rqst[num_rqst].rq_nvec = SMB2_CREATE_IOV_SIZE;
+ rc = SMB2_open_init(tcon, &rqst[num_rqst], &oplock, &oparms,
+ utf16_path);
+ kfree(utf16_path);
+ if (rc)
+ goto finished;
+
+ smb2_set_next_command(server, &rqst[num_rqst++]);
+
+ /* Operation */
switch (command) {
- case SMB2_OP_DELETE:
- break;
case SMB2_OP_QUERY_INFO:
- tmprc = SMB2_query_info(xid, tcon, fid.persistent_fid,
- fid.volatile_fid,
- (struct smb2_file_all_info *)data);
+ memset(&qi_iov, 0, sizeof(qi_iov));
+ rqst[num_rqst].rq_iov = qi_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+ rc = SMB2_query_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, FILE_ALL_INFORMATION,
+ SMB2_O_INFO_FILE, 0,
+ sizeof(struct smb2_file_all_info) +
+ PATH_MAX * 2, 0, NULL);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
+ break;
+ case SMB2_OP_DELETE:
break;
case SMB2_OP_MKDIR:
/*
@@ -94,39 +123,156 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
*/
break;
case SMB2_OP_RMDIR:
- tmprc = SMB2_rmdir(xid, tcon, fid.persistent_fid,
- fid.volatile_fid);
- break;
- case SMB2_OP_RENAME:
- tmprc = SMB2_rename(xid, tcon, fid.persistent_fid,
- fid.volatile_fid, (__le16 *)data);
- break;
- case SMB2_OP_HARDLINK:
- tmprc = SMB2_set_hardlink(xid, tcon, fid.persistent_fid,
- fid.volatile_fid, (__le16 *)data);
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+ size[0] = 8;
+ data[0] = &delete_pending[0];
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_DISPOSITION_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_SET_EOF:
- tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid,
- fid.volatile_fid, current->tgid,
- (__le64 *)data, false);
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+ size[0] = 8; /* sizeof __le64 */
+ data[0] = ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_END_OF_FILE_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_SET_INFO:
- tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid,
- fid.volatile_fid,
- (FILE_BASIC_INFO *)data);
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+
+ size[0] = sizeof(FILE_BASIC_INFO);
+ data[0] = ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_BASIC_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
+ break;
+ case SMB2_OP_RENAME:
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 2;
+
+ len = (2 * UniStrnlen((wchar_t *)ptr, PATH_MAX));
+
+ rename_info.ReplaceIfExists = 1;
+ rename_info.RootDirectory = 0;
+ rename_info.FileNameLength = cpu_to_le32(len);
+
+ size[0] = sizeof(struct smb2_file_rename_info);
+ data[0] = &rename_info;
+
+ size[1] = len + 2 /* null */;
+ data[1] = (__le16 *)ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_RENAME_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
+ break;
+ case SMB2_OP_HARDLINK:
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 2;
+
+ len = (2 * UniStrnlen((wchar_t *)ptr, PATH_MAX));
+
+ link_info.ReplaceIfExists = 0;
+ link_info.RootDirectory = 0;
+ link_info.FileNameLength = cpu_to_le32(len);
+
+ size[0] = sizeof(struct smb2_file_link_info);
+ data[0] = &link_info;
+
+ size[1] = len + 2 /* null */;
+ data[1] = (__le16 *)ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_LINK_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
break;
default:
cifs_dbg(VFS, "Invalid command\n");
- break;
+ rc = -EINVAL;
}
+ if (rc)
+ goto finished;
- if (use_cached_root_handle)
- close_shroot(&tcon->crfid);
- else
- rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
- if (tmprc)
- rc = tmprc;
- kfree(utf16_path);
+ /* Close */
+ memset(&close_iov, 0, sizeof(close_iov));
+ rqst[num_rqst].rq_iov = close_iov;
+ rqst[num_rqst].rq_nvec = 1;
+ rc = SMB2_close_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID);
+ smb2_set_related(&rqst[num_rqst++]);
+ if (rc)
+ goto finished;
+
+ rc = compound_send_recv(xid, ses, flags, num_rqst, rqst,
+ resp_buftype, rsp_iov);
+
+ finished:
+ SMB2_open_free(&rqst[0]);
+ switch (command) {
+ case SMB2_OP_QUERY_INFO:
+ if (rc == 0) {
+ qi_rsp = (struct smb2_query_info_rsp *)
+ rsp_iov[1].iov_base;
+ rc = smb2_validate_and_copy_iov(
+ le16_to_cpu(qi_rsp->OutputBufferOffset),
+ le32_to_cpu(qi_rsp->OutputBufferLength),
+ &rsp_iov[1], sizeof(struct smb2_file_all_info),
+ ptr);
+ }
+ if (rqst[1].rq_iov)
+ SMB2_query_info_free(&rqst[1]);
+ if (rqst[2].rq_iov)
+ SMB2_close_free(&rqst[2]);
+ break;
+ case SMB2_OP_DELETE:
+ case SMB2_OP_MKDIR:
+ if (rqst[1].rq_iov)
+ SMB2_close_free(&rqst[1]);
+ break;
+ case SMB2_OP_HARDLINK:
+ case SMB2_OP_RENAME:
+ case SMB2_OP_RMDIR:
+ case SMB2_OP_SET_EOF:
+ case SMB2_OP_SET_INFO:
+ if (rqst[1].rq_iov)
+ SMB2_set_info_free(&rqst[1]);
+ if (rqst[2].rq_iov)
+ SMB2_close_free(&rqst[2]);
+ break;
+ }
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
return rc;
}
@@ -147,6 +293,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
{
int rc;
struct smb2_file_all_info *smb2_data;
+ __u32 create_options = 0;
*adjust_tz = false;
*symlink = false;
@@ -155,17 +302,21 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
+ if (backup_cred(cifs_sb))
+ create_options |= CREATE_OPEN_BACKUP_INTENT;
- rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN, 0,
- smb2_data, SMB2_OP_QUERY_INFO);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
+ FILE_READ_ATTRIBUTES, FILE_OPEN, create_options,
+ smb2_data, SMB2_OP_QUERY_INFO);
if (rc == -EOPNOTSUPP) {
*symlink = true;
+ create_options |= OPEN_REPARSE_POINT;
+
/* Failed on a symbolic link - query a reparse point info */
- rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN,
- OPEN_REPARSE_POINT, smb2_data,
- SMB2_OP_QUERY_INFO);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
+ FILE_READ_ATTRIBUTES, FILE_OPEN,
+ create_options, smb2_data,
+ SMB2_OP_QUERY_INFO);
}
if (rc)
goto out;
@@ -180,9 +331,9 @@ int
smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
- return smb2_open_op_close(xid, tcon, cifs_sb, name,
- FILE_WRITE_ATTRIBUTES, FILE_CREATE,
- CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR);
+ return smb2_compound_op(xid, tcon, cifs_sb, name,
+ FILE_WRITE_ATTRIBUTES, FILE_CREATE,
+ CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR);
}
void
@@ -199,9 +350,9 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,
cifs_i = CIFS_I(inode);
dosattrs = cifs_i->cifsAttrs | ATTR_READONLY;
data.Attributes = cpu_to_le32(dosattrs);
- tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name,
- FILE_WRITE_ATTRIBUTES, FILE_CREATE,
- CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO);
+ tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,
+ FILE_WRITE_ATTRIBUTES, FILE_CREATE,
+ CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO);
if (tmprc == 0)
cifs_i->cifsAttrs = dosattrs;
}
@@ -210,18 +361,18 @@ int
smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
- return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
- CREATE_NOT_FILE,
- NULL, SMB2_OP_RMDIR);
+ return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+ CREATE_NOT_FILE,
+ NULL, SMB2_OP_RMDIR);
}
int
smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
- return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
- CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
- NULL, SMB2_OP_DELETE);
+ return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+ CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
+ NULL, SMB2_OP_DELETE);
}
static int
@@ -238,8 +389,8 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
goto smb2_rename_path;
}
- rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access,
- FILE_OPEN, 0, smb2_to_name, command);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
+ FILE_OPEN, 0, smb2_to_name, command);
smb2_rename_path:
kfree(smb2_to_name);
return rc;
@@ -269,9 +420,10 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, bool set_alloc)
{
__le64 eof = cpu_to_le64(size);
- return smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_WRITE_DATA, FILE_OPEN, 0, &eof,
- SMB2_OP_SET_EOF);
+
+ return smb2_compound_op(xid, tcon, cifs_sb, full_path,
+ FILE_WRITE_DATA, FILE_OPEN, 0, &eof,
+ SMB2_OP_SET_EOF);
}
int
@@ -291,9 +443,9 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
if (IS_ERR(tlink))
return PTR_ERR(tlink);
- rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
- FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
- SMB2_OP_SET_INFO);
+ rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path,
+ FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
+ SMB2_OP_SET_INFO);
cifs_put_tlink(tlink);
return rc;
}
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 20a2d304c603..d47b7f5dfa6c 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -288,7 +288,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_FLT_BUFFER_TOO_SMALL, -ENOBUFS, "STATUS_FLT_BUFFER_TOO_SMALL"},
{STATUS_FVE_PARTIAL_METADATA, -EIO, "STATUS_FVE_PARTIAL_METADATA"},
{STATUS_UNSUCCESSFUL, -EIO, "STATUS_UNSUCCESSFUL"},
- {STATUS_NOT_IMPLEMENTED, -ENOSYS, "STATUS_NOT_IMPLEMENTED"},
+ {STATUS_NOT_IMPLEMENTED, -EOPNOTSUPP, "STATUS_NOT_IMPLEMENTED"},
{STATUS_INVALID_INFO_CLASS, -EIO, "STATUS_INVALID_INFO_CLASS"},
{STATUS_INFO_LENGTH_MISMATCH, -EIO, "STATUS_INFO_LENGTH_MISMATCH"},
{STATUS_ACCESS_VIOLATION, -EACCES, "STATUS_ACCESS_VIOLATION"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 89985a0a6819..f85fc5aa2710 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -74,6 +74,12 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
int *val, rc = 0;
spin_lock(&server->req_lock);
val = server->ops->get_credits_field(server, optype);
+
+ /* eg found case where write overlapping reconnect messed up credits */
+ if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
+ trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+ server->hostname, *val);
+
*val += add;
if (*val > 65000) {
*val = 65000; /* Don't get near 64K credits, avoid srv bugs */
@@ -104,7 +110,12 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val)
{
spin_lock(&server->req_lock);
server->credits = val;
+ if (val == 1)
+ server->reconnect_instance++;
spin_unlock(&server->req_lock);
+ /* don't log while holding the lock */
+ if (val == 1)
+ cifs_dbg(FYI, "set credits to 1 due to smb2 reconnect\n");
}
static int *
@@ -270,6 +281,31 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
}
static unsigned int
+smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+ struct TCP_Server_Info *server = tcon->ses->server;
+ unsigned int wsize;
+
+ /* start with specified wsize, or default */
+ wsize = volume_info->wsize ? volume_info->wsize : SMB3_DEFAULT_IOSIZE;
+ wsize = min_t(unsigned int, wsize, server->max_write);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->rdma) {
+ if (server->sign)
+ wsize = min_t(unsigned int,
+ wsize, server->smbd_conn->max_fragmented_send_size);
+ else
+ wsize = min_t(unsigned int,
+ wsize, server->smbd_conn->max_readwrite_size);
+ }
+#endif
+ if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+ wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
+
+ return wsize;
+}
+
+static unsigned int
smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
{
struct TCP_Server_Info *server = tcon->ses->server;
@@ -295,6 +331,31 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
return rsize;
}
+static unsigned int
+smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+ struct TCP_Server_Info *server = tcon->ses->server;
+ unsigned int rsize;
+
+ /* start with specified rsize, or default */
+ rsize = volume_info->rsize ? volume_info->rsize : SMB3_DEFAULT_IOSIZE;
+ rsize = min_t(unsigned int, rsize, server->max_read);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->rdma) {
+ if (server->sign)
+ rsize = min_t(unsigned int,
+ rsize, server->smbd_conn->max_fragmented_recv_size);
+ else
+ rsize = min_t(unsigned int,
+ rsize, server->smbd_conn->max_readwrite_size);
+ }
+#endif
+
+ if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+ rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
+
+ return rsize;
+}
static int
parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
@@ -962,6 +1023,9 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
seq_printf(m, "\nBytes read: %llu Bytes written: %llu",
(long long)(tcon->bytes_read),
(long long)(tcon->bytes_written));
+ seq_printf(m, "\nOpen files: %d total (local), %d open on server",
+ atomic_read(&tcon->num_local_opens),
+ atomic_read(&tcon->num_remote_opens));
seq_printf(m, "\nTreeConnects: %d total %d failed",
atomic_read(&sent[SMB2_TREE_CONNECT_HE]),
atomic_read(&failed[SMB2_TREE_CONNECT_HE]));
@@ -1057,6 +1121,131 @@ req_res_key_exit:
return rc;
}
+static int
+smb2_ioctl_query_info(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ __le16 *path, int is_dir,
+ unsigned long p)
+{
+ struct cifs_ses *ses = tcon->ses;
+ char __user *arg = (char __user *)p;
+ struct smb_query_info qi;
+ struct smb_query_info __user *pqi;
+ int rc = 0;
+ int flags = 0;
+ struct smb2_query_info_rsp *rsp = NULL;
+ void *buffer = NULL;
+ struct smb_rqst rqst[3];
+ int resp_buftype[3];
+ struct kvec rsp_iov[3];
+ struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+ struct cifs_open_parms oparms;
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_fid fid;
+ struct kvec qi_iov[1];
+ struct kvec close_iov[1];
+
+ memset(rqst, 0, sizeof(rqst));
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+ memset(rsp_iov, 0, sizeof(rsp_iov));
+
+ if (copy_from_user(&qi, arg, sizeof(struct smb_query_info)))
+ return -EFAULT;
+
+ if (qi.output_buffer_length > 1024)
+ return -EINVAL;
+
+ if (!ses || !(ses->server))
+ return -EIO;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ buffer = kmalloc(qi.output_buffer_length, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(buffer, arg + sizeof(struct smb_query_info),
+ qi.output_buffer_length)) {
+ rc = -EFAULT;
+ goto iqinf_exit;
+ }
+
+ /* Open */
+ memset(&open_iov, 0, sizeof(open_iov));
+ rqst[0].rq_iov = open_iov;
+ rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
+
+ memset(&oparms, 0, sizeof(oparms));
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_READ_ATTRIBUTES | READ_CONTROL;
+ oparms.disposition = FILE_OPEN;
+ if (is_dir)
+ oparms.create_options = CREATE_NOT_FILE;
+ else
+ oparms.create_options = CREATE_NOT_DIR;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, path);
+ if (rc)
+ goto iqinf_exit;
+ smb2_set_next_command(ses->server, &rqst[0]);
+
+ /* Query */
+ memset(&qi_iov, 0, sizeof(qi_iov));
+ rqst[1].rq_iov = qi_iov;
+ rqst[1].rq_nvec = 1;
+
+ rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
+ qi.file_info_class, qi.info_type,
+ qi.additional_information,
+ qi.input_buffer_length,
+ qi.output_buffer_length, buffer);
+ if (rc)
+ goto iqinf_exit;
+ smb2_set_next_command(ses->server, &rqst[1]);
+ smb2_set_related(&rqst[1]);
+
+ /* Close */
+ memset(&close_iov, 0, sizeof(close_iov));
+ rqst[2].rq_iov = close_iov;
+ rqst[2].rq_nvec = 1;
+
+ rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID);
+ if (rc)
+ goto iqinf_exit;
+ smb2_set_related(&rqst[2]);
+
+ rc = compound_send_recv(xid, ses, flags, 3, rqst,
+ resp_buftype, rsp_iov);
+ if (rc)
+ goto iqinf_exit;
+ pqi = (struct smb_query_info __user *)arg;
+ rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+ if (le32_to_cpu(rsp->OutputBufferLength) < qi.input_buffer_length)
+ qi.input_buffer_length = le32_to_cpu(rsp->OutputBufferLength);
+ if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length,
+ sizeof(qi.input_buffer_length))) {
+ rc = -EFAULT;
+ goto iqinf_exit;
+ }
+ if (copy_to_user(pqi + 1, rsp->Buffer, qi.input_buffer_length)) {
+ rc = -EFAULT;
+ goto iqinf_exit;
+ }
+
+ iqinf_exit:
+ kfree(buffer);
+ SMB2_open_free(&rqst[0]);
+ SMB2_query_info_free(&rqst[1]);
+ SMB2_close_free(&rqst[2]);
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+ return rc;
+}
+
static ssize_t
smb2_copychunk_range(const unsigned int xid,
struct cifsFileInfo *srcfile,
@@ -1301,7 +1490,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
}
return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
- cfile->fid.volatile_fid, cfile->pid, &eof, false);
+ cfile->fid.volatile_fid, cfile->pid, &eof);
}
static int
@@ -1556,7 +1745,7 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
CIFS_CACHE_READ(cinode) ? 1 : 0);
}
-static void
+void
smb2_set_related(struct smb_rqst *rqst)
{
struct smb2_sync_hdr *shdr;
@@ -1567,7 +1756,7 @@ smb2_set_related(struct smb_rqst *rqst)
char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0};
-static void
+void
smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
struct smb2_sync_hdr *shdr;
@@ -1610,7 +1799,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
flags |= CIFS_TRANSFORM_REQ;
memset(rqst, 0, sizeof(rqst));
- memset(resp_buftype, 0, sizeof(resp_buftype));
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
memset(rsp_iov, 0, sizeof(rsp_iov));
memset(&open_iov, 0, sizeof(open_iov));
@@ -1636,7 +1825,8 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
FS_FULL_SIZE_INFORMATION,
SMB2_O_INFO_FILESYSTEM, 0,
- sizeof(struct smb2_fs_full_size_info));
+ sizeof(struct smb2_fs_full_size_info), 0,
+ NULL);
if (rc)
goto qfs_exit;
smb2_set_next_command(server, &rqst[1]);
@@ -3303,6 +3493,7 @@ struct smb_version_operations smb20_operations = {
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_operations smb21_operations = {
@@ -3398,6 +3589,7 @@ struct smb_version_operations smb21_operations = {
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_operations smb30_operations = {
@@ -3425,8 +3617,8 @@ struct smb_version_operations smb30_operations = {
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
- .negotiate_wsize = smb2_negotiate_wsize,
- .negotiate_rsize = smb2_negotiate_rsize,
+ .negotiate_wsize = smb3_negotiate_wsize,
+ .negotiate_rsize = smb3_negotiate_rsize,
.sess_setup = SMB2_sess_setup,
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
@@ -3502,6 +3694,7 @@ struct smb_version_operations smb30_operations = {
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_operations smb311_operations = {
@@ -3529,8 +3722,8 @@ struct smb_version_operations smb311_operations = {
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
- .negotiate_wsize = smb2_negotiate_wsize,
- .negotiate_rsize = smb2_negotiate_rsize,
+ .negotiate_wsize = smb3_negotiate_wsize,
+ .negotiate_rsize = smb3_negotiate_rsize,
.sess_setup = SMB2_sess_setup,
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
@@ -3607,6 +3800,7 @@ struct smb_version_operations smb311_operations = {
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index f54d07bda067..7d7b016fe8bb 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1478,7 +1478,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
/* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
tcon->tid = 0;
-
+ atomic_set(&tcon->num_remote_opens, 0);
rc = smb2_plain_req_init(SMB2_TREE_CONNECT, tcon, (void **) &req,
&total_len);
if (rc) {
@@ -2243,10 +2243,12 @@ SMB2_open_free(struct smb_rqst *rqst)
{
int i;
- cifs_small_buf_release(rqst->rq_iov[0].iov_base);
- for (i = 1; i < rqst->rq_nvec; i++)
- if (rqst->rq_iov[i].iov_base != smb2_padding)
- kfree(rqst->rq_iov[i].iov_base);
+ if (rqst && rqst->rq_iov) {
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base);
+ for (i = 1; i < rqst->rq_nvec; i++)
+ if (rqst->rq_iov[i].iov_base != smb2_padding)
+ kfree(rqst->rq_iov[i].iov_base);
+ }
}
int
@@ -2261,7 +2263,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
struct cifs_ses *ses = tcon->ses;
struct kvec iov[SMB2_CREATE_IOV_SIZE];
struct kvec rsp_iov = {NULL, 0};
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
int rc = 0;
int flags = 0;
@@ -2303,6 +2305,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
ses->Suid, oparms->create_options,
oparms->desired_access);
+ atomic_inc(&tcon->num_remote_opens);
oparms->fid->persistent_fid = rsp->PersistentFileId;
oparms->fid->volatile_fid = rsp->VolatileFileId;
@@ -2474,13 +2477,13 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- *out_data = kmalloc(*plen, GFP_KERNEL);
+ *out_data = kmemdup((char *)rsp + le32_to_cpu(rsp->OutputOffset),
+ *plen, GFP_KERNEL);
if (*out_data == NULL) {
rc = -ENOMEM;
goto ioctl_exit;
}
- memcpy(*out_data, (char *)rsp + le32_to_cpu(rsp->OutputOffset), *plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -2535,7 +2538,8 @@ SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
void
SMB2_close_free(struct smb_rqst *rqst)
{
- cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+ if (rqst && rqst->rq_iov)
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
}
int
@@ -2547,7 +2551,7 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_ses *ses = tcon->ses;
struct kvec iov[1];
struct kvec rsp_iov;
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
int rc = 0;
cifs_dbg(FYI, "Close\n");
@@ -2577,6 +2581,8 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
goto close_exit;
}
+ atomic_dec(&tcon->num_remote_opens);
+
/* BB FIXME - decode close response, update inode for caching */
close_exit:
@@ -2627,10 +2633,10 @@ smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
* If SMB buffer fields are valid, copy into temporary buffer to hold result.
* Caller must free buffer.
*/
-static int
-validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
- struct kvec *iov, unsigned int minbufsize,
- char *data)
+int
+smb2_validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
+ struct kvec *iov, unsigned int minbufsize,
+ char *data)
{
char *begin_of_buf = offset + (char *)iov->iov_base;
int rc;
@@ -2651,7 +2657,7 @@ int
SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid,
u8 info_class, u8 info_type, u32 additional_info,
- size_t output_len)
+ size_t output_len, size_t input_len, void *input)
{
struct smb2_query_info_req *req;
struct kvec *iov = rqst->rq_iov;
@@ -2669,23 +2675,25 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
req->VolatileFileId = volatile_fid;
req->AdditionalInformation = cpu_to_le32(additional_info);
- /*
- * We do not use the input buffer (do not send extra byte)
- */
- req->InputBufferOffset = 0;
-
req->OutputBufferLength = cpu_to_le32(output_len);
+ if (input_len) {
+ req->InputBufferLength = cpu_to_le32(input_len);
+ /* total_len for smb query request never close to le16 max */
+ req->InputBufferOffset = cpu_to_le16(total_len - 1);
+ memcpy(req->Buffer, input, input_len);
+ }
iov[0].iov_base = (char *)req;
/* 1 for Buffer */
- iov[0].iov_len = total_len - 1;
+ iov[0].iov_len = total_len - 1 + input_len;
return 0;
}
void
SMB2_query_info_free(struct smb_rqst *rqst)
{
- cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+ if (rqst && rqst->rq_iov)
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
}
static int
@@ -2699,7 +2707,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec iov[1];
struct kvec rsp_iov;
int rc = 0;
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
struct cifs_ses *ses = tcon->ses;
int flags = 0;
@@ -2718,7 +2726,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_query_info_init(tcon, &rqst, persistent_fid, volatile_fid,
info_class, info_type, additional_info,
- output_len);
+ output_len, 0, NULL);
if (rc)
goto qinf_exit;
@@ -2746,9 +2754,9 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
}
}
- rc = validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
- le32_to_cpu(rsp->OutputBufferLength),
- &rsp_iov, min_len, *data);
+ rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
+ le32_to_cpu(rsp->OutputBufferLength),
+ &rsp_iov, min_len, *data);
qinf_exit:
SMB2_query_info_free(&rqst);
@@ -3754,45 +3762,22 @@ qdir_exit:
return rc;
}
-static int
-send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+int
+SMB2_set_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
- u8 info_type, u32 additional_info, unsigned int num,
+ u8 info_type, u32 additional_info,
void **data, unsigned int *size)
{
- struct smb_rqst rqst;
struct smb2_set_info_req *req;
- struct smb2_set_info_rsp *rsp = NULL;
- struct kvec *iov;
- struct kvec rsp_iov;
- int rc = 0;
- int resp_buftype;
- unsigned int i;
- struct cifs_ses *ses = tcon->ses;
- int flags = 0;
- unsigned int total_len;
-
- if (!ses || !(ses->server))
- return -EIO;
-
- if (!num)
- return -EINVAL;
-
- iov = kmalloc_array(num, sizeof(struct kvec), GFP_KERNEL);
- if (!iov)
- return -ENOMEM;
+ struct kvec *iov = rqst->rq_iov;
+ unsigned int i, total_len;
+ int rc;
rc = smb2_plain_req_init(SMB2_SET_INFO, tcon, (void **) &req, &total_len);
- if (rc) {
- kfree(iov);
+ if (rc)
return rc;
- }
-
- if (smb3_encryption_required(tcon))
- flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(pid);
-
req->InfoType = info_type;
req->FileInfoClass = info_class;
req->PersistentFileId = persistent_fid;
@@ -3810,19 +3795,66 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
/* 1 for Buffer */
iov[0].iov_len = total_len - 1;
- for (i = 1; i < num; i++) {
+ for (i = 1; i < rqst->rq_nvec; i++) {
le32_add_cpu(&req->BufferLength, size[i]);
iov[i].iov_base = (char *)data[i];
iov[i].iov_len = size[i];
}
+ return 0;
+}
+
+void
+SMB2_set_info_free(struct smb_rqst *rqst)
+{
+ if (rqst && rqst->rq_iov)
+ cifs_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
+static int
+send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
+ u8 info_type, u32 additional_info, unsigned int num,
+ void **data, unsigned int *size)
+{
+ struct smb_rqst rqst;
+ struct smb2_set_info_rsp *rsp = NULL;
+ struct kvec *iov;
+ struct kvec rsp_iov;
+ int rc = 0;
+ int resp_buftype;
+ struct cifs_ses *ses = tcon->ses;
+ int flags = 0;
+
+ if (!ses || !(ses->server))
+ return -EIO;
+
+ if (!num)
+ return -EINVAL;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ iov = kmalloc_array(num, sizeof(struct kvec), GFP_KERNEL);
+ if (!iov)
+ return -ENOMEM;
+
memset(&rqst, 0, sizeof(struct smb_rqst));
rqst.rq_iov = iov;
rqst.rq_nvec = num;
+ rc = SMB2_set_info_init(tcon, &rqst, persistent_fid, volatile_fid, pid,
+ info_class, info_type, additional_info,
+ data, size);
+ if (rc) {
+ kfree(iov);
+ return rc;
+ }
+
+
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
&rsp_iov);
- cifs_buf_release(req);
+ SMB2_set_info_free(&rqst);
rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
if (rc != 0) {
@@ -3837,88 +3869,8 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
}
int
-SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
-{
- struct smb2_file_rename_info info;
- void **data;
- unsigned int size[2];
- int rc;
- int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
-
- data = kmalloc_array(2, sizeof(void *), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- info.ReplaceIfExists = 1; /* 1 = replace existing target with new */
- /* 0 = fail if target already exists */
- info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */
- info.FileNameLength = cpu_to_le32(len);
-
- data[0] = &info;
- size[0] = sizeof(struct smb2_file_rename_info);
-
- data[1] = target_file;
- size[1] = len + 2 /* null */;
-
- rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_RENAME_INFORMATION, SMB2_O_INFO_FILE,
- 0, 2, data, size);
- kfree(data);
- return rc;
-}
-
-int
-SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid)
-{
- __u8 delete_pending = 1;
- void *data;
- unsigned int size;
-
- data = &delete_pending;
- size = 1; /* sizeof __u8 */
-
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_DISPOSITION_INFORMATION, SMB2_O_INFO_FILE,
- 0, 1, &data, &size);
-}
-
-int
-SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
-{
- struct smb2_file_link_info info;
- void **data;
- unsigned int size[2];
- int rc;
- int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
-
- data = kmalloc_array(2, sizeof(void *), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- info.ReplaceIfExists = 0; /* 1 = replace existing link with new */
- /* 0 = fail if link already exists */
- info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */
- info.FileNameLength = cpu_to_le32(len);
-
- data[0] = &info;
- size[0] = sizeof(struct smb2_file_link_info);
-
- data[1] = target_file;
- size[1] = len + 2 /* null */;
-
- rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_LINK_INFORMATION, SMB2_O_INFO_FILE,
- 0, 2, data, size);
- kfree(data);
- return rc;
-}
-
-int
SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
- u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc)
+ u64 volatile_fid, u32 pid, __le64 *eof)
{
struct smb2_file_eof_info info;
void *data;
@@ -3929,28 +3881,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
data = &info;
size = sizeof(struct smb2_file_eof_info);
- if (is_falloc)
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
- pid, FILE_ALLOCATION_INFORMATION, SMB2_O_INFO_FILE,
- 0, 1, &data, &size);
- else
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+ return send_set_info(xid, tcon, persistent_fid, volatile_fid,
pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE,
0, 1, &data, &size);
}
int
-SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf)
-{
- unsigned int size;
- size = sizeof(FILE_BASIC_INFO);
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_BASIC_INFORMATION, SMB2_O_INFO_FILE,
- 0, 1, (void **)&buf, &size);
-}
-
-int
SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
struct cifs_ntsd *pnntsd, int pacllen, int aclflag)
@@ -4350,6 +4286,8 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buf_type;
+ __u64 *please_key_high;
+ __u64 *please_key_low;
cifs_dbg(FYI, "SMB2_lease_break\n");
rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req,
@@ -4379,10 +4317,16 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
+ please_key_low = (__u64 *)req->LeaseKey;
+ please_key_high = (__u64 *)(req->LeaseKey+8);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
+ trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid,
+ ses->Suid, *please_key_low, *please_key_high, rc);
cifs_dbg(FYI, "Send error in Lease Break = %d\n", rc);
- }
+ } else
+ trace_smb3_lease_done(le32_to_cpu(lease_state), tcon->tid,
+ ses->Suid, *please_key_low, *please_key_high);
return rc;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 8fb7887f2b3d..f753f424d7f1 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -613,6 +613,8 @@ struct smb2_tree_disconnect_rsp {
#define SVHDX_OPEN_DEVICE_CONTEX 0x9CCBCF9E04C1E643980E158DA1F6EC83
#define SMB2_CREATE_TAG_POSIX 0x93AD25509CB411E7B42383DE968BCD7C
+/* Flag (SMB3 open response) values */
+#define SMB2_CREATE_FLAG_REPARSEPOINT 0x01
/*
* Maximum number of iovs we need for an open/create request.
@@ -650,7 +652,7 @@ struct smb2_create_rsp {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 89 */
__u8 OplockLevel;
- __u8 Reserved;
+ __u8 Flag; /* 0x01 if reparse point */
__le32 CreateAction;
__le64 CreationTime;
__le64 LastAccessTime;
@@ -1174,6 +1176,15 @@ struct smb2_query_info_rsp {
__u8 Buffer[1];
} __packed;
+/*
+ * Maximum number of iovs we need for a set-info request.
+ * The largest one is rename/hardlink
+ * [0] : struct smb2_set_info_req + smb2_file_[rename|link]_info
+ * [1] : path
+ * [2] : compound padding
+ */
+#define SMB2_SET_INFO_IOV_SIZE 3
+
struct smb2_set_info_req {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 33 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index b4076577eeb7..9f4e9ed9ce53 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -116,6 +116,9 @@ extern void smb2_reconnect_server(struct work_struct *work);
extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server);
extern unsigned long smb_rqst_len(struct TCP_Server_Info *server,
struct smb_rqst *rqst);
+extern void smb2_set_next_command(struct TCP_Server_Info *server,
+ struct smb_rqst *rqst);
+extern void smb2_set_related(struct smb_rqst *rqst);
/*
* SMB2 Worker functions - most of protocol specific implementation details
@@ -160,7 +163,8 @@ extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid,
u8 info_class, u8 info_type,
- u32 additional_info, size_t output_len);
+ u32 additional_info, size_t output_len,
+ size_t input_len, void *input);
extern void SMB2_query_info_free(struct smb_rqst *rqst);
extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
@@ -179,20 +183,14 @@ extern int SMB2_echo(struct TCP_Server_Info *server);
extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int index,
struct cifs_search_info *srch_inf);
-extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- __le16 *target_file);
-extern int SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid);
-extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- __le16 *target_file);
extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 pid,
- __le64 *eof, bool is_fallocate);
-extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- FILE_BASIC_INFO *buf);
+ __le64 *eof);
+extern int SMB2_set_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+ u64 persistent_fid, u64 volatile_fid, u32 pid,
+ u8 info_class, u8 info_type, u32 additional_info,
+ void **data, unsigned int *size);
+extern void SMB2_set_info_free(struct smb_rqst *rqst);
extern int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
struct cifs_ntsd *pnntsd, int pacllen, int aclflag);
@@ -232,6 +230,10 @@ extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
extern int smb3_encryption_required(const struct cifs_tcon *tcon);
extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int min_buf_size);
+extern int smb2_validate_and_copy_iov(unsigned int offset,
+ unsigned int buffer_length,
+ struct kvec *iov,
+ unsigned int minbufsize, char *data);
extern void smb2_copy_fs_info_to_kstatfs(
struct smb2_fs_full_size_info *pfs_inf,
struct kstatfs *kst);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5fdb9a509a97..5e282368cc4a 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2295,8 +2295,12 @@ static void smbd_mr_recovery_work(struct work_struct *work)
int rc;
list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
- if (smbdirect_mr->state == MR_INVALIDATED ||
- smbdirect_mr->state == MR_ERROR) {
+ if (smbdirect_mr->state == MR_INVALIDATED)
+ ib_dma_unmap_sg(
+ info->id->device, smbdirect_mr->sgl,
+ smbdirect_mr->sgl_count,
+ smbdirect_mr->dir);
+ else if (smbdirect_mr->state == MR_ERROR) {
/* recover this MR entry */
rc = ib_dereg_mr(smbdirect_mr->mr);
@@ -2320,25 +2324,21 @@ static void smbd_mr_recovery_work(struct work_struct *work)
smbd_disconnect_rdma_connection(info);
continue;
}
+ } else
+ /* This MR is being used, don't recover it */
+ continue;
- if (smbdirect_mr->state == MR_INVALIDATED)
- ib_dma_unmap_sg(
- info->id->device, smbdirect_mr->sgl,
- smbdirect_mr->sgl_count,
- smbdirect_mr->dir);
-
- smbdirect_mr->state = MR_READY;
+ smbdirect_mr->state = MR_READY;
- /* smbdirect_mr->state is updated by this function
- * and is read and updated by I/O issuing CPUs trying
- * to get a MR, the call to atomic_inc_return
- * implicates a memory barrier and guarantees this
- * value is updated before waking up any calls to
- * get_mr() from the I/O issuing CPUs
- */
- if (atomic_inc_return(&info->mr_ready_count) == 1)
- wake_up_interruptible(&info->wait_mr);
- }
+ /* smbdirect_mr->state is updated by this function
+ * and is read and updated by I/O issuing CPUs trying
+ * to get a MR, the call to atomic_inc_return
+ * implicates a memory barrier and guarantees this
+ * value is updated before waking up any calls to
+ * get_mr() from the I/O issuing CPUs
+ */
+ if (atomic_inc_return(&info->mr_ready_count) == 1)
+ wake_up_interruptible(&info->wait_mr);
}
}
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index d4aed5217a56..cce8414fe7ec 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -460,6 +460,85 @@ DEFINE_EVENT(smb3_open_done_class, smb3_##name, \
DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done);
+
+DECLARE_EVENT_CLASS(smb3_lease_done_class,
+ TP_PROTO(__u32 lease_state,
+ __u32 tid,
+ __u64 sesid,
+ __u64 lease_key_low,
+ __u64 lease_key_high),
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high),
+ TP_STRUCT__entry(
+ __field(__u32, lease_state)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, lease_key_low)
+ __field(__u64, lease_key_high)
+ ),
+ TP_fast_assign(
+ __entry->lease_state = lease_state;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->lease_key_low = lease_key_low;
+ __entry->lease_key_high = lease_key_high;
+ ),
+ TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x",
+ __entry->sesid, __entry->tid, __entry->lease_key_high,
+ __entry->lease_key_low, __entry->lease_state)
+)
+
+#define DEFINE_SMB3_LEASE_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_lease_done_class, smb3_##name, \
+ TP_PROTO(__u32 lease_state, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 lease_key_low, \
+ __u64 lease_key_high), \
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high))
+
+DEFINE_SMB3_LEASE_DONE_EVENT(lease_done);
+
+DECLARE_EVENT_CLASS(smb3_lease_err_class,
+ TP_PROTO(__u32 lease_state,
+ __u32 tid,
+ __u64 sesid,
+ __u64 lease_key_low,
+ __u64 lease_key_high,
+ int rc),
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc),
+ TP_STRUCT__entry(
+ __field(__u32, lease_state)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, lease_key_low)
+ __field(__u64, lease_key_high)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->lease_state = lease_state;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->lease_key_low = lease_key_low;
+ __entry->lease_key_high = lease_key_high;
+ __entry->rc = rc;
+ ),
+ TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x rc=%d",
+ __entry->sesid, __entry->tid, __entry->lease_key_high,
+ __entry->lease_key_low, __entry->lease_state, __entry->rc)
+)
+
+#define DEFINE_SMB3_LEASE_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_lease_err_class, smb3_##name, \
+ TP_PROTO(__u32 lease_state, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 lease_key_low, \
+ __u64 lease_key_high, \
+ int rc), \
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc))
+
+DEFINE_SMB3_LEASE_ERR_EVENT(lease_err);
+
DECLARE_EVENT_CLASS(smb3_reconnect_class,
TP_PROTO(__u64 currmid,
char *hostname),
@@ -486,6 +565,36 @@ DEFINE_EVENT(smb3_reconnect_class, smb3_##name, \
DEFINE_SMB3_RECONNECT_EVENT(reconnect);
DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
+DECLARE_EVENT_CLASS(smb3_credit_class,
+ TP_PROTO(__u64 currmid,
+ char *hostname,
+ int credits),
+ TP_ARGS(currmid, hostname, credits),
+ TP_STRUCT__entry(
+ __field(__u64, currmid)
+ __field(char *, hostname)
+ __field(int, credits)
+ ),
+ TP_fast_assign(
+ __entry->currmid = currmid;
+ __entry->hostname = hostname;
+ __entry->credits = credits;
+ ),
+ TP_printk("server=%s current_mid=0x%llx credits=%d",
+ __entry->hostname,
+ __entry->currmid,
+ __entry->credits)
+)
+
+#define DEFINE_SMB3_CREDIT_EVENT(name) \
+DEFINE_EVENT(smb3_credit_class, smb3_##name, \
+ TP_PROTO(__u64 currmid, \
+ char *hostname, \
+ int credits), \
+ TP_ARGS(currmid, hostname, credits))
+
+DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
+
#endif /* _CIFS_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b48f43963da6..f8112433f0c8 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -113,9 +113,18 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
cifs_small_buf_release(midEntry->resp_buf);
#ifdef CONFIG_CIFS_STATS2
now = jiffies;
- /* commands taking longer than one second are indications that
- something is wrong, unless it is quite a slow link or server */
- if (time_after(now, midEntry->when_alloc + HZ) &&
+ /*
+ * commands taking longer than one second (default) can be indications
+ * that something is wrong, unless it is quite a slow link or a very
+ * busy server. Note that this calc is unlikely or impossible to wrap
+ * as long as slow_rsp_threshold is not set way above recommended max
+ * value (32767 ie 9 hours) and is generally harmless even if wrong
+ * since only affects debug counters - so leaving the calc as simple
+ * comparison rather than doing multiple conversions and overflow
+ * checks
+ */
+ if ((slow_rsp_threshold != 0) &&
+ time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
(midEntry->command != command)) {
/* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */
if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) &&
@@ -128,7 +137,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
if (cifsFYI & CIFS_TIMER) {
pr_debug(" CIFS slow rsp: cmd %d mid %llu",
midEntry->command, midEntry->mid);
- pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
+ cifs_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
now - midEntry->when_alloc,
now - midEntry->when_sent,
now - midEntry->when_received);
@@ -786,7 +795,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
int i, j, rc = 0;
int timeout, optype;
struct mid_q_entry *midQ[MAX_COMPOUND];
- unsigned int credits = 1;
+ unsigned int credits = 0;
char *buf;
timeout = flags & CIFS_TIMEOUT_MASK;
@@ -851,21 +860,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
mutex_unlock(&ses->server->srv_mutex);
- for (i = 0; i < num_rqst; i++) {
- if (rc < 0)
- goto out;
+ if (rc < 0)
+ goto out;
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
- smb311_update_preauth_hash(ses, rqst[i].rq_iov,
- rqst[i].rq_nvec);
+ /*
+ * Compounding is never used during session establish.
+ */
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+ smb311_update_preauth_hash(ses, rqst[0].rq_iov,
+ rqst[0].rq_nvec);
- if (timeout == CIFS_ASYNC_OP)
- goto out;
+ if (timeout == CIFS_ASYNC_OP)
+ goto out;
+ for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(ses->server, midQ[i]);
if (rc != 0) {
- cifs_dbg(FYI, "Cancelling wait for mid %llu\n",
- midQ[i]->mid);
+ cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
+ midQ[i]->mid, le16_to_cpu(midQ[i]->command));
send_cancel(ses->server, &rqst[i], midQ[i]);
spin_lock(&GlobalMid_Lock);
if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
@@ -877,10 +889,21 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
}
spin_unlock(&GlobalMid_Lock);
}
+ }
+
+ for (i = 0; i < num_rqst; i++)
+ if (midQ[i]->resp_buf)
+ credits += ses->server->ops->get_credits(midQ[i]);
+ if (!credits)
+ credits = 1;
+
+ for (i = 0; i < num_rqst; i++) {
+ if (rc < 0)
+ goto out;
rc = cifs_sync_mid_result(midQ[i], ses->server);
if (rc != 0) {
- add_credits(ses->server, 1, optype);
+ add_credits(ses->server, credits, optype);
return rc;
}
@@ -901,23 +924,26 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
else
resp_buf_type[i] = CIFS_SMALL_BUFFER;
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
- struct kvec iov = {
- .iov_base = resp_iov[i].iov_base,
- .iov_len = resp_iov[i].iov_len
- };
- smb311_update_preauth_hash(ses, &iov, 1);
- }
-
- credits = ses->server->ops->get_credits(midQ[i]);
-
rc = ses->server->ops->check_receive(midQ[i], ses->server,
flags & CIFS_LOG_ERROR);
/* mark it so buf will not be freed by cifs_delete_mid */
if ((flags & CIFS_NO_RESP) == 0)
midQ[i]->resp_buf = NULL;
+
+ }
+
+ /*
+ * Compounding is never used during session establish.
+ */
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+ struct kvec iov = {
+ .iov_base = resp_iov[0].iov_base,
+ .iov_len = resp_iov[0].iov_len
+ };
+ smb311_update_preauth_hash(ses, &iov, 1);
}
+
out:
/*
* This will dequeue all mids. After this it is important that the
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index ce2cc2169040..6e30949d9f77 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -64,11 +64,6 @@
#include <linux/hiddev.h>
-#define __DVB_CORE__
-#include <linux/dvb/audio.h>
-#include <linux/dvb/dmx.h>
-#include <linux/dvb/frontend.h>
-#include <linux/dvb/video.h>
#include <linux/sort.h>
@@ -95,71 +90,6 @@ static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return vfs_ioctl(file, cmd, arg);
}
-struct compat_video_event {
- int32_t type;
- compat_time_t timestamp;
- union {
- video_size_t size;
- unsigned int frame_rate;
- } u;
-};
-
-static int do_video_get_event(struct file *file,
- unsigned int cmd, struct compat_video_event __user *up)
-{
- struct video_event __user *kevent =
- compat_alloc_user_space(sizeof(*kevent));
- int err;
-
- if (kevent == NULL)
- return -EFAULT;
-
- err = do_ioctl(file, cmd, (unsigned long)kevent);
- if (!err) {
- err = convert_in_user(&kevent->type, &up->type);
- err |= convert_in_user(&kevent->timestamp, &up->timestamp);
- err |= convert_in_user(&kevent->u.size.w, &up->u.size.w);
- err |= convert_in_user(&kevent->u.size.h, &up->u.size.h);
- err |= convert_in_user(&kevent->u.size.aspect_ratio,
- &up->u.size.aspect_ratio);
- if (err)
- err = -EFAULT;
- }
-
- return err;
-}
-
-struct compat_video_still_picture {
- compat_uptr_t iFrame;
- int32_t size;
-};
-
-static int do_video_stillpicture(struct file *file,
- unsigned int cmd, struct compat_video_still_picture __user *up)
-{
- struct video_still_picture __user *up_native;
- compat_uptr_t fp;
- int32_t size;
- int err;
-
- err = get_user(fp, &up->iFrame);
- err |= get_user(size, &up->size);
- if (err)
- return -EFAULT;
-
- up_native =
- compat_alloc_user_space(sizeof(struct video_still_picture));
-
- err = put_user(compat_ptr(fp), &up_native->iFrame);
- err |= put_user(size, &up_native->size);
- if (err)
- return -EFAULT;
-
- err = do_ioctl(file, cmd, (unsigned long) up_native);
-
- return err;
-}
-
#ifdef CONFIG_BLOCK
typedef struct sg_io_hdr32 {
compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */
@@ -958,61 +888,6 @@ COMPATIBLE_IOCTL(HIDIOCGFLAG)
COMPATIBLE_IOCTL(HIDIOCSFLAG)
COMPATIBLE_IOCTL(HIDIOCGCOLLECTIONINDEX)
COMPATIBLE_IOCTL(HIDIOCGCOLLECTIONINFO)
-/* dvb */
-COMPATIBLE_IOCTL(AUDIO_STOP)
-COMPATIBLE_IOCTL(AUDIO_PLAY)
-COMPATIBLE_IOCTL(AUDIO_PAUSE)
-COMPATIBLE_IOCTL(AUDIO_CONTINUE)
-COMPATIBLE_IOCTL(AUDIO_SELECT_SOURCE)
-COMPATIBLE_IOCTL(AUDIO_SET_MUTE)
-COMPATIBLE_IOCTL(AUDIO_SET_AV_SYNC)
-COMPATIBLE_IOCTL(AUDIO_SET_BYPASS_MODE)
-COMPATIBLE_IOCTL(AUDIO_CHANNEL_SELECT)
-COMPATIBLE_IOCTL(AUDIO_GET_STATUS)
-COMPATIBLE_IOCTL(AUDIO_GET_CAPABILITIES)
-COMPATIBLE_IOCTL(AUDIO_CLEAR_BUFFER)
-COMPATIBLE_IOCTL(AUDIO_SET_ID)
-COMPATIBLE_IOCTL(AUDIO_SET_MIXER)
-COMPATIBLE_IOCTL(AUDIO_SET_STREAMTYPE)
-COMPATIBLE_IOCTL(DMX_START)
-COMPATIBLE_IOCTL(DMX_STOP)
-COMPATIBLE_IOCTL(DMX_SET_FILTER)
-COMPATIBLE_IOCTL(DMX_SET_PES_FILTER)
-COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE)
-COMPATIBLE_IOCTL(DMX_GET_PES_PIDS)
-COMPATIBLE_IOCTL(DMX_GET_STC)
-COMPATIBLE_IOCTL(DMX_REQBUFS)
-COMPATIBLE_IOCTL(DMX_QUERYBUF)
-COMPATIBLE_IOCTL(DMX_EXPBUF)
-COMPATIBLE_IOCTL(DMX_QBUF)
-COMPATIBLE_IOCTL(DMX_DQBUF)
-COMPATIBLE_IOCTL(VIDEO_STOP)
-COMPATIBLE_IOCTL(VIDEO_PLAY)
-COMPATIBLE_IOCTL(VIDEO_FREEZE)
-COMPATIBLE_IOCTL(VIDEO_CONTINUE)
-COMPATIBLE_IOCTL(VIDEO_SELECT_SOURCE)
-COMPATIBLE_IOCTL(VIDEO_SET_BLANK)
-COMPATIBLE_IOCTL(VIDEO_GET_STATUS)
-COMPATIBLE_IOCTL(VIDEO_SET_DISPLAY_FORMAT)
-COMPATIBLE_IOCTL(VIDEO_FAST_FORWARD)
-COMPATIBLE_IOCTL(VIDEO_SLOWMOTION)
-COMPATIBLE_IOCTL(VIDEO_GET_CAPABILITIES)
-COMPATIBLE_IOCTL(VIDEO_CLEAR_BUFFER)
-COMPATIBLE_IOCTL(VIDEO_SET_STREAMTYPE)
-COMPATIBLE_IOCTL(VIDEO_SET_FORMAT)
-COMPATIBLE_IOCTL(VIDEO_GET_SIZE)
-/* cec */
-COMPATIBLE_IOCTL(CEC_ADAP_G_CAPS)
-COMPATIBLE_IOCTL(CEC_ADAP_G_LOG_ADDRS)
-COMPATIBLE_IOCTL(CEC_ADAP_S_LOG_ADDRS)
-COMPATIBLE_IOCTL(CEC_ADAP_G_PHYS_ADDR)
-COMPATIBLE_IOCTL(CEC_ADAP_S_PHYS_ADDR)
-COMPATIBLE_IOCTL(CEC_G_MODE)
-COMPATIBLE_IOCTL(CEC_S_MODE)
-COMPATIBLE_IOCTL(CEC_TRANSMIT)
-COMPATIBLE_IOCTL(CEC_RECEIVE)
-COMPATIBLE_IOCTL(CEC_DQEVENT)
-
/* joystick */
COMPATIBLE_IOCTL(JSIOCGVERSION)
COMPATIBLE_IOCTL(JSIOCGAXES)
@@ -1080,12 +955,6 @@ static long do_ioctl_trans(unsigned int cmd,
case RTC_EPOCH_READ32:
case RTC_EPOCH_SET32:
return rtc_ioctl(file, cmd, argp);
-
- /* dvb */
- case VIDEO_GET_EVENT:
- return do_video_get_event(file, cmd, argp);
- case VIDEO_STILLPICTURE:
- return do_video_stillpicture(file, cmd, argp);
}
/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index f408994fc632..0c35e62f108d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -418,9 +418,12 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
int i;
vma->vm_flags |= VM_MIXEDMAP;
for (i = 0; i < pages && !ret; i++) {
+ vm_fault_t vmf;
unsigned long off = i * PAGE_SIZE;
pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV);
- ret = vm_insert_mixed(vma, vma->vm_start + off, pfn);
+ vmf = vmf_insert_mixed(vma, vma->vm_start + off, pfn);
+ if (vmf & VM_FAULT_ERROR)
+ ret = vm_fault_to_errno(vmf, 0);
}
}
diff --git a/fs/dax.c b/fs/dax.c
index 0fb270f0a0ef..616e36ea6aaa 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -38,6 +38,17 @@
#define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h>
+static inline unsigned int pe_order(enum page_entry_size pe_size)
+{
+ if (pe_size == PE_SIZE_PTE)
+ return PAGE_SHIFT - PAGE_SHIFT;
+ if (pe_size == PE_SIZE_PMD)
+ return PMD_SHIFT - PAGE_SHIFT;
+ if (pe_size == PE_SIZE_PUD)
+ return PUD_SHIFT - PAGE_SHIFT;
+ return ~0;
+}
+
/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
@@ -46,6 +57,9 @@
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
+/* The order of a PMD entry */
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
+
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
static int __init init_dax_wait_table(void)
@@ -59,63 +73,74 @@ static int __init init_dax_wait_table(void)
fs_initcall(init_dax_wait_table);
/*
- * We use lowest available bit in exceptional entry for locking, one bit for
- * the entry size (PMD) and two more to tell us if the entry is a zero page or
- * an empty entry that is just used for locking. In total four special bits.
+ * DAX pagecache entries use XArray value entries so they can't be mistaken
+ * for pages. We use one bit for locking, one bit for the entry size (PMD)
+ * and two more to tell us if the entry is a zero page or an empty entry that
+ * is just used for locking. In total four special bits.
*
* If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
* and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
* block allocation.
*/
-#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
-#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
-#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
-#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
-#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
+#define DAX_SHIFT (4)
+#define DAX_LOCKED (1UL << 0)
+#define DAX_PMD (1UL << 1)
+#define DAX_ZERO_PAGE (1UL << 2)
+#define DAX_EMPTY (1UL << 3)
-static unsigned long dax_radix_pfn(void *entry)
+static unsigned long dax_to_pfn(void *entry)
{
- return (unsigned long)entry >> RADIX_DAX_SHIFT;
+ return xa_to_value(entry) >> DAX_SHIFT;
}
-static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
+static void *dax_make_entry(pfn_t pfn, unsigned long flags)
{
- return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
- (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
+ return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
}
-static unsigned int dax_radix_order(void *entry)
+static void *dax_make_page_entry(struct page *page)
{
- if ((unsigned long)entry & RADIX_DAX_PMD)
- return PMD_SHIFT - PAGE_SHIFT;
+ pfn_t pfn = page_to_pfn_t(page);
+ return dax_make_entry(pfn, PageHead(page) ? DAX_PMD : 0);
+}
+
+static bool dax_is_locked(void *entry)
+{
+ return xa_to_value(entry) & DAX_LOCKED;
+}
+
+static unsigned int dax_entry_order(void *entry)
+{
+ if (xa_to_value(entry) & DAX_PMD)
+ return PMD_ORDER;
return 0;
}
static int dax_is_pmd_entry(void *entry)
{
- return (unsigned long)entry & RADIX_DAX_PMD;
+ return xa_to_value(entry) & DAX_PMD;
}
static int dax_is_pte_entry(void *entry)
{
- return !((unsigned long)entry & RADIX_DAX_PMD);
+ return !(xa_to_value(entry) & DAX_PMD);
}
static int dax_is_zero_entry(void *entry)
{
- return (unsigned long)entry & RADIX_DAX_ZERO_PAGE;
+ return xa_to_value(entry) & DAX_ZERO_PAGE;
}
static int dax_is_empty_entry(void *entry)
{
- return (unsigned long)entry & RADIX_DAX_EMPTY;
+ return xa_to_value(entry) & DAX_EMPTY;
}
/*
- * DAX radix tree locking
+ * DAX page cache entry locking
*/
struct exceptional_entry_key {
- struct address_space *mapping;
+ struct xarray *xa;
pgoff_t entry_start;
};
@@ -124,10 +149,11 @@ struct wait_exceptional_entry_queue {
struct exceptional_entry_key key;
};
-static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
- pgoff_t index, void *entry, struct exceptional_entry_key *key)
+static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
+ void *entry, struct exceptional_entry_key *key)
{
unsigned long hash;
+ unsigned long index = xas->xa_index;
/*
* If 'entry' is a PMD, align the 'index' that we use for the wait
@@ -136,22 +162,21 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
*/
if (dax_is_pmd_entry(entry))
index &= ~PG_PMD_COLOUR;
-
- key->mapping = mapping;
+ key->xa = xas->xa;
key->entry_start = index;
- hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS);
+ hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
return wait_table + hash;
}
-static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode,
- int sync, void *keyp)
+static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
+ unsigned int mode, int sync, void *keyp)
{
struct exceptional_entry_key *key = keyp;
struct wait_exceptional_entry_queue *ewait =
container_of(wait, struct wait_exceptional_entry_queue, wait);
- if (key->mapping != ewait->key.mapping ||
+ if (key->xa != ewait->key.xa ||
key->entry_start != ewait->key.entry_start)
return 0;
return autoremove_wake_function(wait, mode, sync, NULL);
@@ -162,13 +187,12 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo
* The important information it's conveying is whether the entry at
* this index used to be a PMD entry.
*/
-static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
- pgoff_t index, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
{
struct exceptional_entry_key key;
wait_queue_head_t *wq;
- wq = dax_entry_waitqueue(mapping, index, entry, &key);
+ wq = dax_entry_waitqueue(xas, entry, &key);
/*
* Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -181,55 +205,16 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
}
/*
- * Check whether the given slot is locked. Must be called with the i_pages
- * lock held.
- */
-static inline int slot_locked(struct address_space *mapping, void **slot)
-{
- unsigned long entry = (unsigned long)
- radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
- return entry & RADIX_DAX_ENTRY_LOCK;
-}
-
-/*
- * Mark the given slot as locked. Must be called with the i_pages lock held.
- */
-static inline void *lock_slot(struct address_space *mapping, void **slot)
-{
- unsigned long entry = (unsigned long)
- radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-
- entry |= RADIX_DAX_ENTRY_LOCK;
- radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
- return (void *)entry;
-}
-
-/*
- * Mark the given slot as unlocked. Must be called with the i_pages lock held.
- */
-static inline void *unlock_slot(struct address_space *mapping, void **slot)
-{
- unsigned long entry = (unsigned long)
- radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-
- entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
- radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
- return (void *)entry;
-}
-
-/*
- * Lookup entry in radix tree, wait for it to become unlocked if it is
- * exceptional entry and return it. The caller must call
- * put_unlocked_mapping_entry() when he decided not to lock the entry or
- * put_locked_mapping_entry() when he locked the entry and now wants to
- * unlock it.
+ * Look up entry in page cache, wait for it to become unlocked if it
+ * is a DAX entry and return it. The caller must subsequently call
+ * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
+ * if it did.
*
* Must be called with the i_pages lock held.
*/
-static void *__get_unlocked_mapping_entry(struct address_space *mapping,
- pgoff_t index, void ***slotp, bool (*wait_fn)(void))
+static void *get_unlocked_entry(struct xa_state *xas)
{
- void *entry, **slot;
+ void *entry;
struct wait_exceptional_entry_queue ewait;
wait_queue_head_t *wq;
@@ -237,80 +222,54 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping,
ewait.wait.func = wake_exceptional_entry_func;
for (;;) {
- bool revalidate;
-
- entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
- &slot);
- if (!entry ||
- WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
- !slot_locked(mapping, slot)) {
- if (slotp)
- *slotp = slot;
+ entry = xas_load(xas);
+ if (!entry || xa_is_internal(entry) ||
+ WARN_ON_ONCE(!xa_is_value(entry)) ||
+ !dax_is_locked(entry))
return entry;
- }
- wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
+ wq = dax_entry_waitqueue(xas, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
- xa_unlock_irq(&mapping->i_pages);
- revalidate = wait_fn();
+ xas_unlock_irq(xas);
+ xas_reset(xas);
+ schedule();
finish_wait(wq, &ewait.wait);
- xa_lock_irq(&mapping->i_pages);
- if (revalidate)
- return ERR_PTR(-EAGAIN);
+ xas_lock_irq(xas);
}
}
-static bool entry_wait(void)
-{
- schedule();
- /*
- * Never return an ERR_PTR() from
- * __get_unlocked_mapping_entry(), just keep looping.
- */
- return false;
-}
-
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
- pgoff_t index, void ***slotp)
+static void put_unlocked_entry(struct xa_state *xas, void *entry)
{
- return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
-}
-
-static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
-{
- void *entry, **slot;
-
- xa_lock_irq(&mapping->i_pages);
- entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
- if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
- !slot_locked(mapping, slot))) {
- xa_unlock_irq(&mapping->i_pages);
- return;
- }
- unlock_slot(mapping, slot);
- xa_unlock_irq(&mapping->i_pages);
- dax_wake_mapping_entry_waiter(mapping, index, entry, false);
+ /* If we were the only waiter woken, wake the next one */
+ if (entry)
+ dax_wake_entry(xas, entry, false);
}
-static void put_locked_mapping_entry(struct address_space *mapping,
- pgoff_t index)
+/*
+ * We used the xa_state to get the entry, but then we locked the entry and
+ * dropped the xa_lock, so we know the xa_state is stale and must be reset
+ * before use.
+ */
+static void dax_unlock_entry(struct xa_state *xas, void *entry)
{
- unlock_mapping_entry(mapping, index);
+ void *old;
+
+ xas_reset(xas);
+ xas_lock_irq(xas);
+ old = xas_store(xas, entry);
+ xas_unlock_irq(xas);
+ BUG_ON(!dax_is_locked(old));
+ dax_wake_entry(xas, entry, false);
}
/*
- * Called when we are done with radix tree entry we looked up via
- * get_unlocked_mapping_entry() and which we didn't lock in the end.
+ * Return: The entry stored at this location before it was locked.
*/
-static void put_unlocked_mapping_entry(struct address_space *mapping,
- pgoff_t index, void *entry)
+static void *dax_lock_entry(struct xa_state *xas, void *entry)
{
- if (!entry)
- return;
-
- /* We have to wake up next waiter for the radix tree entry lock */
- dax_wake_mapping_entry_waiter(mapping, index, entry, false);
+ unsigned long v = xa_to_value(entry);
+ return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
}
static unsigned long dax_entry_size(void *entry)
@@ -325,9 +284,9 @@ static unsigned long dax_entry_size(void *entry)
return PAGE_SIZE;
}
-static unsigned long dax_radix_end_pfn(void *entry)
+static unsigned long dax_end_pfn(void *entry)
{
- return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
+ return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
}
/*
@@ -335,8 +294,8 @@ static unsigned long dax_radix_end_pfn(void *entry)
* 'empty' and 'zero' entries.
*/
#define for_each_mapped_pfn(entry, pfn) \
- for (pfn = dax_radix_pfn(entry); \
- pfn < dax_radix_end_pfn(entry); pfn++)
+ for (pfn = dax_to_pfn(entry); \
+ pfn < dax_end_pfn(entry); pfn++)
/*
* TODO: for reflink+dax we need a way to associate a single page with
@@ -393,33 +352,16 @@ static struct page *dax_busy_page(void *entry)
return NULL;
}
-static bool entry_wait_revalidate(void)
-{
- rcu_read_unlock();
- schedule();
- rcu_read_lock();
-
- /*
- * Tell __get_unlocked_mapping_entry() to take a break, we need
- * to revalidate page->mapping after dropping locks
- */
- return true;
-}
-
bool dax_lock_mapping_entry(struct page *page)
{
- pgoff_t index;
- struct inode *inode;
- bool did_lock = false;
- void *entry = NULL, **slot;
- struct address_space *mapping;
+ XA_STATE(xas, NULL, 0);
+ void *entry;
- rcu_read_lock();
for (;;) {
- mapping = READ_ONCE(page->mapping);
+ struct address_space *mapping = READ_ONCE(page->mapping);
if (!dax_mapping(mapping))
- break;
+ return false;
/*
* In the device-dax case there's no need to lock, a
@@ -428,98 +370,94 @@ bool dax_lock_mapping_entry(struct page *page)
* otherwise we would not have a valid pfn_to_page()
* translation.
*/
- inode = mapping->host;
- if (S_ISCHR(inode->i_mode)) {
- did_lock = true;
- break;
- }
+ if (S_ISCHR(mapping->host->i_mode))
+ return true;
- xa_lock_irq(&mapping->i_pages);
+ xas.xa = &mapping->i_pages;
+ xas_lock_irq(&xas);
if (mapping != page->mapping) {
- xa_unlock_irq(&mapping->i_pages);
+ xas_unlock_irq(&xas);
continue;
}
- index = page->index;
-
- entry = __get_unlocked_mapping_entry(mapping, index, &slot,
- entry_wait_revalidate);
- if (!entry) {
- xa_unlock_irq(&mapping->i_pages);
- break;
- } else if (IS_ERR(entry)) {
- xa_unlock_irq(&mapping->i_pages);
- WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
- continue;
+ xas_set(&xas, page->index);
+ entry = xas_load(&xas);
+ if (dax_is_locked(entry)) {
+ entry = get_unlocked_entry(&xas);
+ /* Did the page move while we slept? */
+ if (dax_to_pfn(entry) != page_to_pfn(page)) {
+ xas_unlock_irq(&xas);
+ continue;
+ }
}
- lock_slot(mapping, slot);
- did_lock = true;
- xa_unlock_irq(&mapping->i_pages);
- break;
+ dax_lock_entry(&xas, entry);
+ xas_unlock_irq(&xas);
+ return true;
}
- rcu_read_unlock();
-
- return did_lock;
}
void dax_unlock_mapping_entry(struct page *page)
{
struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ XA_STATE(xas, &mapping->i_pages, page->index);
- if (S_ISCHR(inode->i_mode))
+ if (S_ISCHR(mapping->host->i_mode))
return;
- unlock_mapping_entry(mapping, page->index);
+ dax_unlock_entry(&xas, dax_make_page_entry(page));
}
/*
- * Find radix tree entry at given index. If it points to an exceptional entry,
- * return it with the radix tree entry locked. If the radix tree doesn't
- * contain given index, create an empty exceptional entry for the index and
- * return with it locked.
+ * Find page cache entry at given index. If it is a DAX entry, return it
+ * with the entry locked. If the page cache doesn't contain an entry at
+ * that index, add a locked empty entry.
*
- * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
- * either return that locked entry or will return an error. This error will
- * happen if there are any 4k entries within the 2MiB range that we are
- * requesting.
+ * When requesting an entry with size DAX_PMD, grab_mapping_entry() will
+ * either return that locked entry or will return VM_FAULT_FALLBACK.
+ * This will happen if there are any PTE entries within the PMD range
+ * that we are requesting.
*
- * We always favor 4k entries over 2MiB entries. There isn't a flow where we
- * evict 4k entries in order to 'upgrade' them to a 2MiB entry. A 2MiB
- * insertion will fail if it finds any 4k entries already in the tree, and a
- * 4k insertion will cause an existing 2MiB entry to be unmapped and
- * downgraded to 4k entries. This happens for both 2MiB huge zero pages as
- * well as 2MiB empty entries.
+ * We always favor PTE entries over PMD entries. There isn't a flow where we
+ * evict PTE entries in order to 'upgrade' them to a PMD entry. A PMD
+ * insertion will fail if it finds any PTE entries already in the tree, and a
+ * PTE insertion will cause an existing PMD entry to be unmapped and
+ * downgraded to PTE entries. This happens for both PMD zero pages as
+ * well as PMD empty entries.
*
- * The exception to this downgrade path is for 2MiB DAX PMD entries that have
- * real storage backing them. We will leave these real 2MiB DAX entries in
- * the tree, and PTE writes will simply dirty the entire 2MiB DAX entry.
+ * The exception to this downgrade path is for PMD entries that have
+ * real storage backing them. We will leave these real PMD entries in
+ * the tree, and PTE writes will simply dirty the entire PMD entry.
*
* Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For
* persistent memory the benefit is doubtful. We can add that later if we can
* show it helps.
+ *
+ * On error, this function does not return an ERR_PTR. Instead it returns
+ * a VM_FAULT code, encoded as an xarray internal entry. The ERR_PTR values
+ * overlap with xarray value entries.
*/
-static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
- unsigned long size_flag)
+static void *grab_mapping_entry(struct xa_state *xas,
+ struct address_space *mapping, unsigned long size_flag)
{
- bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
- void *entry, **slot;
-
-restart:
- xa_lock_irq(&mapping->i_pages);
- entry = get_unlocked_mapping_entry(mapping, index, &slot);
+ unsigned long index = xas->xa_index;
+ bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */
+ void *entry;
- if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
- entry = ERR_PTR(-EIO);
- goto out_unlock;
- }
+retry:
+ xas_lock_irq(xas);
+ entry = get_unlocked_entry(xas);
+ if (xa_is_internal(entry))
+ goto fallback;
if (entry) {
- if (size_flag & RADIX_DAX_PMD) {
+ if (WARN_ON_ONCE(!xa_is_value(entry))) {
+ xas_set_err(xas, EIO);
+ goto out_unlock;
+ }
+
+ if (size_flag & DAX_PMD) {
if (dax_is_pte_entry(entry)) {
- put_unlocked_mapping_entry(mapping, index,
- entry);
- entry = ERR_PTR(-EEXIST);
- goto out_unlock;
+ put_unlocked_entry(xas, entry);
+ goto fallback;
}
} else { /* trying to grab a PTE entry */
if (dax_is_pmd_entry(entry) &&
@@ -530,87 +468,57 @@ restart:
}
}
- /* No entry for given index? Make sure radix tree is big enough. */
- if (!entry || pmd_downgrade) {
- int err;
-
- if (pmd_downgrade) {
- /*
- * Make sure 'entry' remains valid while we drop
- * the i_pages lock.
- */
- entry = lock_slot(mapping, slot);
- }
+ if (pmd_downgrade) {
+ /*
+ * Make sure 'entry' remains valid while we drop
+ * the i_pages lock.
+ */
+ dax_lock_entry(xas, entry);
- xa_unlock_irq(&mapping->i_pages);
/*
* Besides huge zero pages the only other thing that gets
* downgraded are empty entries which don't need to be
* unmapped.
*/
- if (pmd_downgrade && dax_is_zero_entry(entry))
- unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
- PG_PMD_NR, false);
-
- err = radix_tree_preload(
- mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
- if (err) {
- if (pmd_downgrade)
- put_locked_mapping_entry(mapping, index);
- return ERR_PTR(err);
- }
- xa_lock_irq(&mapping->i_pages);
-
- if (!entry) {
- /*
- * We needed to drop the i_pages lock while calling
- * radix_tree_preload() and we didn't have an entry to
- * lock. See if another thread inserted an entry at
- * our index during this time.
- */
- entry = __radix_tree_lookup(&mapping->i_pages, index,
- NULL, &slot);
- if (entry) {
- radix_tree_preload_end();
- xa_unlock_irq(&mapping->i_pages);
- goto restart;
- }
+ if (dax_is_zero_entry(entry)) {
+ xas_unlock_irq(xas);
+ unmap_mapping_pages(mapping,
+ xas->xa_index & ~PG_PMD_COLOUR,
+ PG_PMD_NR, false);
+ xas_reset(xas);
+ xas_lock_irq(xas);
}
- if (pmd_downgrade) {
- dax_disassociate_entry(entry, mapping, false);
- radix_tree_delete(&mapping->i_pages, index);
- mapping->nrexceptional--;
- dax_wake_mapping_entry_waiter(mapping, index, entry,
- true);
- }
+ dax_disassociate_entry(entry, mapping, false);
+ xas_store(xas, NULL); /* undo the PMD join */
+ dax_wake_entry(xas, entry, true);
+ mapping->nrexceptional--;
+ entry = NULL;
+ xas_set(xas, index);
+ }
- entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
-
- err = __radix_tree_insert(&mapping->i_pages, index,
- dax_radix_order(entry), entry);
- radix_tree_preload_end();
- if (err) {
- xa_unlock_irq(&mapping->i_pages);
- /*
- * Our insertion of a DAX entry failed, most likely
- * because we were inserting a PMD entry and it
- * collided with a PTE sized entry at a different
- * index in the PMD range. We haven't inserted
- * anything into the radix tree and have no waiters to
- * wake.
- */
- return ERR_PTR(err);
- }
- /* Good, we have inserted empty locked entry into the tree. */
+ if (entry) {
+ dax_lock_entry(xas, entry);
+ } else {
+ entry = dax_make_entry(pfn_to_pfn_t(0), size_flag | DAX_EMPTY);
+ dax_lock_entry(xas, entry);
+ if (xas_error(xas))
+ goto out_unlock;
mapping->nrexceptional++;
- xa_unlock_irq(&mapping->i_pages);
- return entry;
}
- entry = lock_slot(mapping, slot);
- out_unlock:
- xa_unlock_irq(&mapping->i_pages);
+
+out_unlock:
+ xas_unlock_irq(xas);
+ if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM))
+ goto retry;
+ if (xas->xa_node == XA_ERROR(-ENOMEM))
+ return xa_mk_internal(VM_FAULT_OOM);
+ if (xas_error(xas))
+ return xa_mk_internal(VM_FAULT_SIGBUS);
return entry;
+fallback:
+ xas_unlock_irq(xas);
+ return xa_mk_internal(VM_FAULT_FALLBACK);
}
/**
@@ -630,11 +538,10 @@ restart:
*/
struct page *dax_layout_busy_page(struct address_space *mapping)
{
- pgoff_t indices[PAGEVEC_SIZE];
+ XA_STATE(xas, &mapping->i_pages, 0);
+ void *entry;
+ unsigned int scanned = 0;
struct page *page = NULL;
- struct pagevec pvec;
- pgoff_t index, end;
- unsigned i;
/*
* In the 'limited' case get_user_pages() for dax is disabled.
@@ -645,13 +552,9 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (!dax_mapping(mapping) || !mapping_mapped(mapping))
return NULL;
- pagevec_init(&pvec);
- index = 0;
- end = -1;
-
/*
* If we race get_user_pages_fast() here either we'll see the
- * elevated page count in the pagevec_lookup and wait, or
+ * elevated page count in the iteration and wait, or
* get_user_pages_fast() will see that the page it took a reference
* against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by
@@ -663,94 +566,68 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
*/
unmap_mapping_range(mapping, 0, 0, 1);
- while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE),
- indices)) {
- pgoff_t nr_pages = 1;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *pvec_ent = pvec.pages[i];
- void *entry;
-
- index = indices[i];
- if (index >= end)
- break;
-
- if (WARN_ON_ONCE(
- !radix_tree_exceptional_entry(pvec_ent)))
- continue;
-
- xa_lock_irq(&mapping->i_pages);
- entry = get_unlocked_mapping_entry(mapping, index, NULL);
- if (entry) {
- page = dax_busy_page(entry);
- /*
- * Account for multi-order entries at
- * the end of the pagevec.
- */
- if (i + 1 >= pagevec_count(&pvec))
- nr_pages = 1UL << dax_radix_order(entry);
- }
- put_unlocked_mapping_entry(mapping, index, entry);
- xa_unlock_irq(&mapping->i_pages);
- if (page)
- break;
- }
-
- /*
- * We don't expect normal struct page entries to exist in our
- * tree, but we keep these pagevec calls so that this code is
- * consistent with the common pattern for handling pagevecs
- * throughout the kernel.
- */
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- index += nr_pages;
-
+ xas_lock_irq(&xas);
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ if (WARN_ON_ONCE(!xa_is_value(entry)))
+ continue;
+ if (unlikely(dax_is_locked(entry)))
+ entry = get_unlocked_entry(&xas);
+ if (entry)
+ page = dax_busy_page(entry);
+ put_unlocked_entry(&xas, entry);
if (page)
break;
+ if (++scanned % XA_CHECK_SCHED)
+ continue;
+
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
+ cond_resched();
+ xas_lock_irq(&xas);
}
+ xas_unlock_irq(&xas);
return page;
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page);
-static int __dax_invalidate_mapping_entry(struct address_space *mapping,
+static int __dax_invalidate_entry(struct address_space *mapping,
pgoff_t index, bool trunc)
{
+ XA_STATE(xas, &mapping->i_pages, index);
int ret = 0;
void *entry;
- struct radix_tree_root *pages = &mapping->i_pages;
- xa_lock_irq(pages);
- entry = get_unlocked_mapping_entry(mapping, index, NULL);
- if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
+ xas_lock_irq(&xas);
+ entry = get_unlocked_entry(&xas);
+ if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
goto out;
if (!trunc &&
- (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
- radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
+ (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) ||
+ xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE)))
goto out;
dax_disassociate_entry(entry, mapping, trunc);
- radix_tree_delete(pages, index);
+ xas_store(&xas, NULL);
mapping->nrexceptional--;
ret = 1;
out:
- put_unlocked_mapping_entry(mapping, index, entry);
- xa_unlock_irq(pages);
+ put_unlocked_entry(&xas, entry);
+ xas_unlock_irq(&xas);
return ret;
}
+
/*
- * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
- * entry to get unlocked before deleting it.
+ * Delete DAX entry at @index from @mapping. Wait for it
+ * to be unlocked before deleting it.
*/
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
{
- int ret = __dax_invalidate_mapping_entry(mapping, index, true);
+ int ret = __dax_invalidate_entry(mapping, index, true);
/*
* This gets called from truncate / punch_hole path. As such, the caller
* must hold locks protecting against concurrent modifications of the
- * radix tree (usually fs-private i_mmap_sem for writing). Since the
- * caller has seen exceptional entry for this index, we better find it
+ * page cache (usually fs-private i_mmap_sem for writing). Since the
+ * caller has seen a DAX entry for this index, we better find it
* at that index as well...
*/
WARN_ON_ONCE(!ret);
@@ -758,12 +635,12 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
}
/*
- * Invalidate exceptional DAX entry if it is clean.
+ * Invalidate DAX entry if it is clean.
*/
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index)
{
- return __dax_invalidate_mapping_entry(mapping, index, false);
+ return __dax_invalidate_entry(mapping, index, false);
}
static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
@@ -799,30 +676,27 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
* already in the tree, we will skip the insertion and just dirty the PMD as
* appropriate.
*/
-static void *dax_insert_mapping_entry(struct address_space *mapping,
- struct vm_fault *vmf,
- void *entry, pfn_t pfn_t,
- unsigned long flags, bool dirty)
+static void *dax_insert_entry(struct xa_state *xas,
+ struct address_space *mapping, struct vm_fault *vmf,
+ void *entry, pfn_t pfn, unsigned long flags, bool dirty)
{
- struct radix_tree_root *pages = &mapping->i_pages;
- unsigned long pfn = pfn_t_to_pfn(pfn_t);
- pgoff_t index = vmf->pgoff;
- void *new_entry;
+ void *new_entry = dax_make_entry(pfn, flags);
if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
+ if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
+ unsigned long index = xas->xa_index;
/* we are replacing a zero page with block mapping */
if (dax_is_pmd_entry(entry))
unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
- PG_PMD_NR, false);
+ PG_PMD_NR, false);
else /* pte entry */
- unmap_mapping_pages(mapping, vmf->pgoff, 1, false);
+ unmap_mapping_pages(mapping, index, 1, false);
}
- xa_lock_irq(pages);
- new_entry = dax_radix_locked_entry(pfn, flags);
+ xas_reset(xas);
+ xas_lock_irq(xas);
if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
dax_disassociate_entry(entry, mapping, false);
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
@@ -830,33 +704,30 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
/*
- * Only swap our new entry into the radix tree if the current
+ * Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or
- * PMD entry is already in the tree, we leave it alone. This
+ * PMD entry is already in the cache, we leave it alone. This
* means that if we are trying to insert a PTE and the
* existing entry is a PMD, we will just leave the PMD in the
* tree and dirty it if necessary.
*/
- struct radix_tree_node *node;
- void **slot;
- void *ret;
-
- ret = __radix_tree_lookup(pages, index, &node, &slot);
- WARN_ON_ONCE(ret != entry);
- __radix_tree_replace(pages, node, slot,
- new_entry, NULL);
+ void *old = dax_lock_entry(xas, new_entry);
+ WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
+ DAX_LOCKED));
entry = new_entry;
+ } else {
+ xas_load(xas); /* Walk the xa_state */
}
if (dirty)
- radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
+ xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
- xa_unlock_irq(pages);
+ xas_unlock_irq(xas);
return entry;
}
-static inline unsigned long
-pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
+static inline
+unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
{
unsigned long address;
@@ -866,8 +737,8 @@ pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
}
/* Walk all mappings of a given index of a file and writeprotect them */
-static void dax_mapping_entry_mkclean(struct address_space *mapping,
- pgoff_t index, unsigned long pfn)
+static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
+ unsigned long pfn)
{
struct vm_area_struct *vma;
pte_t pte, *ptep = NULL;
@@ -937,11 +808,9 @@ unlock_pte:
i_mmap_unlock_read(mapping);
}
-static int dax_writeback_one(struct dax_device *dax_dev,
- struct address_space *mapping, pgoff_t index, void *entry)
+static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
+ struct address_space *mapping, void *entry)
{
- struct radix_tree_root *pages = &mapping->i_pages;
- void *entry2, **slot;
unsigned long pfn;
long ret = 0;
size_t size;
@@ -950,32 +819,38 @@ static int dax_writeback_one(struct dax_device *dax_dev,
* A page got tagged dirty in DAX mapping? Something is seriously
* wrong.
*/
- if (WARN_ON(!radix_tree_exceptional_entry(entry)))
+ if (WARN_ON(!xa_is_value(entry)))
return -EIO;
- xa_lock_irq(pages);
- entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
- /* Entry got punched out / reallocated? */
- if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
- goto put_unlocked;
- /*
- * Entry got reallocated elsewhere? No need to writeback. We have to
- * compare pfns as we must not bail out due to difference in lockbit
- * or entry type.
- */
- if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
- goto put_unlocked;
- if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
- dax_is_zero_entry(entry))) {
- ret = -EIO;
- goto put_unlocked;
+ if (unlikely(dax_is_locked(entry))) {
+ void *old_entry = entry;
+
+ entry = get_unlocked_entry(xas);
+
+ /* Entry got punched out / reallocated? */
+ if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
+ goto put_unlocked;
+ /*
+ * Entry got reallocated elsewhere? No need to writeback.
+ * We have to compare pfns as we must not bail out due to
+ * difference in lockbit or entry type.
+ */
+ if (dax_to_pfn(old_entry) != dax_to_pfn(entry))
+ goto put_unlocked;
+ if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
+ dax_is_zero_entry(entry))) {
+ ret = -EIO;
+ goto put_unlocked;
+ }
+
+ /* Another fsync thread may have already done this entry */
+ if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE))
+ goto put_unlocked;
}
- /* Another fsync thread may have already written back this entry */
- if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
- goto put_unlocked;
/* Lock the entry to serialize with page faults */
- entry = lock_slot(mapping, slot);
+ dax_lock_entry(xas, entry);
+
/*
* We can clear the tag now but we have to be careful so that concurrent
* dax_writeback_one() calls for the same index cannot finish before we
@@ -983,8 +858,8 @@ static int dax_writeback_one(struct dax_device *dax_dev,
* at the entry only under the i_pages lock and once they do that
* they will see the entry locked and wait for it to unlock.
*/
- radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
- xa_unlock_irq(pages);
+ xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE);
+ xas_unlock_irq(xas);
/*
* Even if dax_writeback_mapping_range() was given a wbc->range_start
@@ -993,10 +868,10 @@ static int dax_writeback_one(struct dax_device *dax_dev,
* This allows us to flush for PMD_SIZE and not have to worry about
* partial PMD writebacks.
*/
- pfn = dax_radix_pfn(entry);
- size = PAGE_SIZE << dax_radix_order(entry);
+ pfn = dax_to_pfn(entry);
+ size = PAGE_SIZE << dax_entry_order(entry);
- dax_mapping_entry_mkclean(mapping, index, pfn);
+ dax_entry_mkclean(mapping, xas->xa_index, pfn);
dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
/*
* After we have flushed the cache, we can clear the dirty tag. There
@@ -1004,16 +879,18 @@ static int dax_writeback_one(struct dax_device *dax_dev,
* the pfn mappings are writeprotected and fault waits for mapping
* entry lock.
*/
- xa_lock_irq(pages);
- radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
- xa_unlock_irq(pages);
- trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
- put_locked_mapping_entry(mapping, index);
+ xas_reset(xas);
+ xas_lock_irq(xas);
+ xas_store(xas, entry);
+ xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
+ dax_wake_entry(xas, entry, false);
+
+ trace_dax_writeback_one(mapping->host, xas->xa_index,
+ size >> PAGE_SHIFT);
return ret;
put_unlocked:
- put_unlocked_mapping_entry(mapping, index, entry2);
- xa_unlock_irq(pages);
+ put_unlocked_entry(xas, entry);
return ret;
}
@@ -1025,13 +902,13 @@ static int dax_writeback_one(struct dax_device *dax_dev,
int dax_writeback_mapping_range(struct address_space *mapping,
struct block_device *bdev, struct writeback_control *wbc)
{
+ XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
struct inode *inode = mapping->host;
- pgoff_t start_index, end_index;
- pgoff_t indices[PAGEVEC_SIZE];
+ pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
struct dax_device *dax_dev;
- struct pagevec pvec;
- bool done = false;
- int i, ret = 0;
+ void *entry;
+ int ret = 0;
+ unsigned int scanned = 0;
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
return -EIO;
@@ -1043,41 +920,29 @@ int dax_writeback_mapping_range(struct address_space *mapping,
if (!dax_dev)
return -EIO;
- start_index = wbc->range_start >> PAGE_SHIFT;
- end_index = wbc->range_end >> PAGE_SHIFT;
-
- trace_dax_writeback_range(inode, start_index, end_index);
+ trace_dax_writeback_range(inode, xas.xa_index, end_index);
- tag_pages_for_writeback(mapping, start_index, end_index);
+ tag_pages_for_writeback(mapping, xas.xa_index, end_index);
- pagevec_init(&pvec);
- while (!done) {
- pvec.nr = find_get_entries_tag(mapping, start_index,
- PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
- pvec.pages, indices);
-
- if (pvec.nr == 0)
+ xas_lock_irq(&xas);
+ xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) {
+ ret = dax_writeback_one(&xas, dax_dev, mapping, entry);
+ if (ret < 0) {
+ mapping_set_error(mapping, ret);
break;
-
- for (i = 0; i < pvec.nr; i++) {
- if (indices[i] > end_index) {
- done = true;
- break;
- }
-
- ret = dax_writeback_one(dax_dev, mapping, indices[i],
- pvec.pages[i]);
- if (ret < 0) {
- mapping_set_error(mapping, ret);
- goto out;
- }
}
- start_index = indices[pvec.nr - 1] + 1;
+ if (++scanned % XA_CHECK_SCHED)
+ continue;
+
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
+ cond_resched();
+ xas_lock_irq(&xas);
}
-out:
+ xas_unlock_irq(&xas);
put_dax(dax_dev);
- trace_dax_writeback_range_done(inode, start_index, end_index);
- return (ret < 0 ? ret : 0);
+ trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
+ return ret;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
@@ -1125,16 +990,18 @@ out:
* If this page is ever written to we will re-fault and change the mapping to
* point to real DAX storage instead.
*/
-static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
- struct vm_fault *vmf)
+static vm_fault_t dax_load_hole(struct xa_state *xas,
+ struct address_space *mapping, void **entry,
+ struct vm_fault *vmf)
{
struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address;
pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
vm_fault_t ret;
- dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
- false);
+ *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
+ DAX_ZERO_PAGE, false);
+
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
trace_dax_load_hole(inode, vmf, ret);
return ret;
@@ -1342,6 +1209,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
{
struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
+ XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
@@ -1368,9 +1236,9 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
if (write && !vmf->cow_page)
flags |= IOMAP_WRITE;
- entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
- if (IS_ERR(entry)) {
- ret = dax_fault_return(PTR_ERR(entry));
+ entry = grab_mapping_entry(&xas, mapping, 0);
+ if (xa_is_internal(entry)) {
+ ret = xa_to_internal(entry);
goto out;
}
@@ -1443,7 +1311,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
if (error < 0)
goto error_finish_iomap;
- entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
+ entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
0, write && !sync);
/*
@@ -1471,7 +1339,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (!write) {
- ret = dax_load_hole(mapping, entry, vmf);
+ ret = dax_load_hole(&xas, mapping, &entry, vmf);
goto finish_iomap;
}
/*FALLTHRU*/
@@ -1498,21 +1366,20 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
}
unlock_entry:
- put_locked_mapping_entry(mapping, vmf->pgoff);
+ dax_unlock_entry(&xas, entry);
out:
trace_dax_pte_fault_done(inode, vmf, ret);
return ret | major;
}
#ifdef CONFIG_FS_DAX_PMD
-static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
- void *entry)
+static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
+ struct iomap *iomap, void **entry)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
struct inode *inode = mapping->host;
struct page *zero_page;
- void *ret = NULL;
spinlock_t *ptl;
pmd_t pmd_entry;
pfn_t pfn;
@@ -1523,8 +1390,8 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
goto fallback;
pfn = page_to_pfn_t(zero_page);
- ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
- RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
+ *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
+ DAX_PMD | DAX_ZERO_PAGE, false);
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (!pmd_none(*(vmf->pmd))) {
@@ -1536,11 +1403,11 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
pmd_entry = pmd_mkhuge(pmd_entry);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
- trace_dax_pmd_load_hole(inode, vmf, zero_page, ret);
+ trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
return VM_FAULT_NOPAGE;
fallback:
- trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret);
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
return VM_FAULT_FALLBACK;
}
@@ -1549,6 +1416,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
{
struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
+ XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
unsigned long pmd_addr = vmf->address & PMD_MASK;
bool write = vmf->flags & FAULT_FLAG_WRITE;
bool sync;
@@ -1556,7 +1424,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
struct inode *inode = mapping->host;
vm_fault_t result = VM_FAULT_FALLBACK;
struct iomap iomap = { 0 };
- pgoff_t max_pgoff, pgoff;
+ pgoff_t max_pgoff;
void *entry;
loff_t pos;
int error;
@@ -1567,7 +1435,6 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* supposed to hold locks serializing us with truncate / punch hole so
* this is a reliable test.
*/
- pgoff = linear_page_index(vma, pmd_addr);
max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
@@ -1576,7 +1443,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* Make sure that the faulting address's PMD offset (color) matches
* the PMD offset from the start of the file. This is necessary so
* that a PMD range in the page table overlaps exactly with a PMD
- * range in the radix tree.
+ * range in the page cache.
*/
if ((vmf->pgoff & PG_PMD_COLOUR) !=
((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
@@ -1592,24 +1459,26 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
- if (pgoff >= max_pgoff) {
+ if (xas.xa_index >= max_pgoff) {
result = VM_FAULT_SIGBUS;
goto out;
}
/* If the PMD would extend beyond the file size */
- if ((pgoff | PG_PMD_COLOUR) >= max_pgoff)
+ if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
goto fallback;
/*
- * grab_mapping_entry() will make sure we get a 2MiB empty entry, a
- * 2MiB zero page entry or a DAX PMD. If it can't (because a 4k page
- * is already in the tree, for instance), it will return -EEXIST and
- * we just fall back to 4k entries.
+ * grab_mapping_entry() will make sure we get an empty PMD entry,
+ * a zero PMD entry or a DAX PMD. If it can't (because a PTE
+ * entry is already in the array, for instance), it will return
+ * VM_FAULT_FALLBACK.
*/
- entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
- if (IS_ERR(entry))
+ entry = grab_mapping_entry(&xas, mapping, DAX_PMD);
+ if (xa_is_internal(entry)) {
+ result = xa_to_internal(entry);
goto fallback;
+ }
/*
* It is possible, particularly with mixed reads & writes to private
@@ -1628,7 +1497,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* setting up a mapping, so really we're using iomap_begin() as a way
* to look up our filesystem block.
*/
- pos = (loff_t)pgoff << PAGE_SHIFT;
+ pos = (loff_t)xas.xa_index << PAGE_SHIFT;
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
if (error)
goto unlock_entry;
@@ -1644,8 +1513,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
if (error < 0)
goto finish_iomap;
- entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
- RADIX_DAX_PMD, write && !sync);
+ entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
+ DAX_PMD, write && !sync);
/*
* If we are doing synchronous page fault and inode needs fsync,
@@ -1669,7 +1538,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
break;
- result = dax_pmd_load_hole(vmf, &iomap, entry);
+ result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
break;
default:
WARN_ON_ONCE(1);
@@ -1692,7 +1561,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
&iomap);
}
unlock_entry:
- put_locked_mapping_entry(mapping, pgoff);
+ dax_unlock_entry(&xas, entry);
fallback:
if (result == VM_FAULT_FALLBACK) {
split_huge_pmd(vma, vmf->pmd, vmf->address);
@@ -1737,54 +1606,49 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
}
EXPORT_SYMBOL_GPL(dax_iomap_fault);
-/**
+/*
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
* @vmf: The description of the fault
- * @pe_size: Size of entry to be inserted
* @pfn: PFN to insert
+ * @order: Order of entry to insert.
*
- * This function inserts writeable PTE or PMD entry into page tables for mmaped
- * DAX file. It takes care of marking corresponding radix tree entry as dirty
- * as well.
+ * This function inserts a writeable PTE or PMD entry into the page tables
+ * for an mmaped DAX file. It also marks the page cache entry as dirty.
*/
-static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf,
- enum page_entry_size pe_size,
- pfn_t pfn)
+static vm_fault_t
+dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
- void *entry, **slot;
- pgoff_t index = vmf->pgoff;
+ XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
+ void *entry;
vm_fault_t ret;
- xa_lock_irq(&mapping->i_pages);
- entry = get_unlocked_mapping_entry(mapping, index, &slot);
+ xas_lock_irq(&xas);
+ entry = get_unlocked_entry(&xas);
/* Did we race with someone splitting entry or so? */
if (!entry ||
- (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
- (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
- put_unlocked_mapping_entry(mapping, index, entry);
- xa_unlock_irq(&mapping->i_pages);
+ (order == 0 && !dax_is_pte_entry(entry)) ||
+ (order == PMD_ORDER && (xa_is_internal(entry) ||
+ !dax_is_pmd_entry(entry)))) {
+ put_unlocked_entry(&xas, entry);
+ xas_unlock_irq(&xas);
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
VM_FAULT_NOPAGE);
return VM_FAULT_NOPAGE;
}
- radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
- entry = lock_slot(mapping, slot);
- xa_unlock_irq(&mapping->i_pages);
- switch (pe_size) {
- case PE_SIZE_PTE:
+ xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
+ dax_lock_entry(&xas, entry);
+ xas_unlock_irq(&xas);
+ if (order == 0)
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
- break;
#ifdef CONFIG_FS_DAX_PMD
- case PE_SIZE_PMD:
+ else if (order == PMD_ORDER)
ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
pfn, true);
- break;
#endif
- default:
+ else
ret = VM_FAULT_FALLBACK;
- }
- put_locked_mapping_entry(mapping, index);
+ dax_unlock_entry(&xas, entry);
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
return ret;
}
@@ -1804,17 +1668,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
{
int err;
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
- size_t len = 0;
+ unsigned int order = pe_order(pe_size);
+ size_t len = PAGE_SIZE << order;
- if (pe_size == PE_SIZE_PTE)
- len = PAGE_SIZE;
- else if (pe_size == PE_SIZE_PMD)
- len = PMD_SIZE;
- else
- WARN_ON_ONCE(1);
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
if (err)
return VM_FAULT_SIGBUS;
- return dax_insert_pfn_mkwrite(vmf, pe_size, pfn);
+ return dax_insert_pfn_mkwrite(vmf, pfn, order);
}
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
diff --git a/fs/dcache.c b/fs/dcache.c
index 2e7e8d85e9b4..c2e443fb76ae 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -257,24 +257,10 @@ static void __d_free(struct rcu_head *head)
kmem_cache_free(dentry_cache, dentry);
}
-static void __d_free_external_name(struct rcu_head *head)
-{
- struct external_name *name = container_of(head, struct external_name,
- u.head);
-
- mod_node_page_state(page_pgdat(virt_to_page(name)),
- NR_INDIRECTLY_RECLAIMABLE_BYTES,
- -ksize(name));
-
- kfree(name);
-}
-
static void __d_free_external(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-
- __d_free_external_name(&external_name(dentry)->u.head);
-
+ kfree(external_name(dentry));
kmem_cache_free(dentry_cache, dentry);
}
@@ -306,7 +292,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
struct external_name *p;
p = container_of(name->name, struct external_name, name[0]);
if (unlikely(atomic_dec_and_test(&p->u.count)))
- call_rcu(&p->u.head, __d_free_external_name);
+ kfree_rcu(p, u.head);
}
}
EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -1606,7 +1592,6 @@ EXPORT_SYMBOL(d_invalidate);
struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
- struct external_name *ext = NULL;
struct dentry *dentry;
char *dname;
int err;
@@ -1627,14 +1612,15 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
dname = dentry->d_iname;
} else if (name->len > DNAME_INLINE_LEN-1) {
size_t size = offsetof(struct external_name, name[1]);
-
- ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT);
- if (!ext) {
+ struct external_name *p = kmalloc(size + name->len,
+ GFP_KERNEL_ACCOUNT |
+ __GFP_RECLAIMABLE);
+ if (!p) {
kmem_cache_free(dentry_cache, dentry);
return NULL;
}
- atomic_set(&ext->u.count, 1);
- dname = ext->name;
+ atomic_set(&p->u.count, 1);
+ dname = p->name;
} else {
dname = dentry->d_iname;
}
@@ -1673,12 +1659,6 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
}
}
- if (unlikely(ext)) {
- pg_data_t *pgdat = page_pgdat(virt_to_page(ext));
- mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES,
- ksize(ext));
- }
-
this_cpu_inc(nr_dentry);
return dentry;
@@ -2707,7 +2687,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
dentry->d_name.hash_len = target->d_name.hash_len;
}
if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
- call_rcu(&old_name->u.head, __d_free_external_name);
+ kfree_rcu(old_name, u.head);
}
/*
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 224c04abb2e5..cf4c77f8dd08 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -256,11 +256,15 @@ ext2_init_acl(struct inode *inode, struct inode *dir)
if (default_acl) {
error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
+ } else {
+ inode->i_default_acl = NULL;
}
if (acl) {
if (!error)
error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
+ } else {
+ inode->i_acl = NULL;
}
return error;
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 00e759f05161..e770cd100a6a 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -390,11 +390,7 @@ struct ext2_inode {
#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */
#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */
#define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */
-#ifdef CONFIG_FS_DAX
#define EXT2_MOUNT_DAX 0x100000 /* Direct Access */
-#else
-#define EXT2_MOUNT_DAX 0
-#endif
#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 73bd58fa13de..cb91baa4275d 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -309,20 +309,17 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sb, NOBH))
seq_puts(seq, ",nobh");
-#if defined(CONFIG_QUOTA)
if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
seq_puts(seq, ",usrquota");
if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
seq_puts(seq, ",grpquota");
-#endif
-#ifdef CONFIG_FS_DAX
if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
seq_puts(seq, ",xip");
+
if (sbi->s_mount_opt & EXT2_MOUNT_DAX)
seq_puts(seq, ",dax");
-#endif
if (!test_opt(sb, RESERVATION))
seq_puts(seq, ",noreservation");
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c3d9a42c561e..05f01fbd9c7f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2643,7 +2643,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
long left = mpd->wbc->nr_to_write;
pgoff_t index = mpd->first_page;
pgoff_t end = mpd->last_page;
- int tag;
+ xa_mark_t tag;
int i, err = 0;
int blkbits = mpd->inode->i_blkbits;
ext4_lblk_t lblk;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 106f116466bf..b293cb3e27a2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2071,7 +2071,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pgoff_t done_index;
int cycled;
int range_whole = 0;
- int tag;
+ xa_mark_t tag;
int nwritten = 0;
pagevec_init(&pvec);
@@ -2787,13 +2787,13 @@ const struct address_space_operations f2fs_dblock_aops = {
#endif
};
-void f2fs_clear_radix_tree_dirty_tag(struct page *page)
+void f2fs_clear_page_cache_dirty_tag(struct page *page)
{
struct address_space *mapping = page_mapping(page);
unsigned long flags;
xa_lock_irqsave(&mapping->i_pages, flags);
- radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+ __xa_clear_mark(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY);
xa_unlock_irqrestore(&mapping->i_pages, flags);
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 2ef84b4590ea..bacc667950b6 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -726,7 +726,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!f2fs_truncate_hole(dir, page->index, page->index + 1)) {
- f2fs_clear_radix_tree_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(page);
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
ClearPageUptodate(page);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 56204a8f8a12..1e031971a466 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3108,7 +3108,7 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page, enum migrate_mode mode);
#endif
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
-void f2fs_clear_radix_tree_dirty_tag(struct page *page);
+void f2fs_clear_page_cache_dirty_tag(struct page *page);
/*
* gc.c
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index cb31a719b048..7b0cff7e6051 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -243,7 +243,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
- f2fs_clear_radix_tree_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(page);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 2b34206486d8..d338740d0fda 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -101,7 +101,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
static void clear_node_page_dirty(struct page *page)
{
if (PageDirty(page)) {
- f2fs_clear_radix_tree_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(page);
clear_page_dirty_for_io(page);
dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
}
@@ -1306,9 +1306,7 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
if (f2fs_check_nid_range(sbi, nid))
return;
- rcu_read_lock();
- apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
- rcu_read_unlock();
+ apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid);
if (apage)
return;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 471d863958bc..b40168fcc94a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -339,9 +339,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
- struct radix_tree_iter iter;
+ XA_STATE(xas, &mapping->i_pages, 0);
+ struct page *page;
bool switched = false;
- void **slot;
/*
* By the time control reaches here, RCU grace period has passed
@@ -375,25 +375,18 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
* pages actually under writeback.
*/
- radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
- PAGECACHE_TAG_DIRTY) {
- struct page *page = radix_tree_deref_slot_protected(slot,
- &mapping->i_pages.xa_lock);
- if (likely(page) && PageDirty(page)) {
+ xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
+ if (PageDirty(page)) {
dec_wb_stat(old_wb, WB_RECLAIMABLE);
inc_wb_stat(new_wb, WB_RECLAIMABLE);
}
}
- radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
- PAGECACHE_TAG_WRITEBACK) {
- struct page *page = radix_tree_deref_slot_protected(slot,
- &mapping->i_pages.xa_lock);
- if (likely(page)) {
- WARN_ON_ONCE(!PageWriteback(page));
- dec_wb_stat(old_wb, WB_WRITEBACK);
- inc_wb_stat(new_wb, WB_WRITEBACK);
- }
+ xas_set(&xas, 0);
+ xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
+ WARN_ON_ONCE(!PageWriteback(page));
+ dec_wb_stat(old_wb, WB_WRITEBACK);
+ inc_wb_stat(new_wb, WB_WRITEBACK);
}
wb_get(new_wb);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 31e8270d0b26..8afbb35559b9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -366,7 +366,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
pgoff_t done_index;
int cycled;
int range_whole = 0;
- int tag;
+ xa_mark_t tag;
pagevec_init(&pvec);
if (wbc->range_cyclic) {
diff --git a/fs/inode.c b/fs/inode.c
index 42f6d25f32a5..9b808986d440 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -349,7 +349,7 @@ EXPORT_SYMBOL(inc_nlink);
static void __address_space_init_once(struct address_space *mapping)
{
- INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
+ xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
diff --git a/fs/iomap.c b/fs/iomap.c
index ec15cf2ec696..90c2febc93ac 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1057,7 +1057,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
return length;
}
-int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
+vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
{
struct page *page = vmf->page;
struct inode *inode = file_inode(vmf->vma->vm_file);
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 947ce22f5b3c..f0fe641893a5 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -46,7 +46,7 @@ int isofs_name_translate(struct iso_directory_record *de, char *new, struct inod
return i;
}
-/* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */
+/* Acorn extensions written by Matthew Wilcox <willy@infradead.org> 1998 */
int get_acorn_filename(struct iso_directory_record *de,
char *retname, struct inode *inode)
{
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index ff2716f9322e..fdf527b6d79c 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -236,6 +236,9 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
sb->s_export_op = &kernfs_export_ops;
sb->s_time_gran = 1;
+ /* sysfs dentries and inodes don't require IO to create */
+ sb->s_shrink.seeks = 0;
+
/* get root inode, initialize and unlock it */
mutex_lock(&kernfs_mutex);
inode = kernfs_get_inode(sb, info->root->kn);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 305b220af45d..162f43b80c84 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -72,6 +72,9 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
if (base == kn)
break;
+ if ((s - path) + 3 >= PATH_MAX)
+ return -ENAMETOOLONG;
+
strcpy(s, "../");
s += 3;
base = base->parent;
@@ -88,7 +91,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
if (len < 2)
return -EINVAL;
len--;
- if ((s - path) + len > PATH_MAX)
+ if ((s - path) + len >= PATH_MAX)
return -ENAMETOOLONG;
/* reverse fillup of target string from target to base */
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 06cb0c1d9aee..d3781cd983f6 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -896,7 +896,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (end != inode->i_mapping->nrpages) {
rcu_read_lock();
- end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
+ end = page_cache_next_miss(mapping, idx + 1, ULONG_MAX);
rcu_read_unlock();
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index f033f3a69a3b..07b839560576 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -93,7 +93,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags)
return nfs4_do_check_delegation(inode, flags, false);
}
-static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
struct file_lock *fl;
@@ -108,7 +108,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
spin_lock(&flctx->flc_lock);
restart:
list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file) != ctx)
+ if (nfs_file_open_context(fl->fl_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
@@ -136,8 +136,8 @@ static int nfs_delegation_claim_opens(struct inode *inode,
int err;
again:
- spin_lock(&inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
@@ -147,15 +147,16 @@ again:
continue;
if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
- get_nfs_open_context(ctx);
- spin_unlock(&inode->i_lock);
+ if (!get_nfs_open_context(ctx))
+ continue;
+ rcu_read_unlock();
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid, type);
if (!err)
- err = nfs_delegation_claim_locks(ctx, state, stateid);
+ err = nfs_delegation_claim_locks(state, stateid);
if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
@@ -164,7 +165,7 @@ again:
return err;
goto again;
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return 0;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8bfaa658b2c1..71b2e390becf 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
+static int
+nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
+ struct inode *inode, int error)
+{
+ switch (error) {
+ case 1:
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
+ __func__, dentry);
+ return 1;
+ case 0:
+ nfs_mark_for_revalidate(dir);
+ if (inode && S_ISDIR(inode->i_mode)) {
+ /* Purge readdir caches. */
+ nfs_zap_caches(inode);
+ /*
+ * We can't d_drop the root of a disconnected tree:
+ * its d_hash is on the s_anon list and d_drop() would hide
+ * it from shrink_dcache_for_unmount(), leading to busy
+ * inodes on unmount and further oopses.
+ */
+ if (IS_ROOT(dentry))
+ return 1;
+ }
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
+ __func__, dentry);
+ return 0;
+ }
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
+ __func__, dentry, error);
+ return error;
+}
+
+static int
+nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ int ret = 1;
+ if (nfs_neg_need_reval(dir, dentry, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+ ret = 0;
+ }
+ return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
+}
+
+static int
+nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
+ struct inode *inode)
+{
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
+}
+
+static int
+nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
+ struct inode *inode)
+{
+ struct nfs_fh *fhandle;
+ struct nfs_fattr *fattr;
+ struct nfs4_label *label;
+ int ret;
+
+ ret = -ENOMEM;
+ fhandle = nfs_alloc_fhandle();
+ fattr = nfs_alloc_fattr();
+ label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
+ if (fhandle == NULL || fattr == NULL || IS_ERR(label))
+ goto out;
+
+ ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ if (ret < 0) {
+ if (ret == -ESTALE || ret == -ENOENT)
+ ret = 0;
+ goto out;
+ }
+ ret = 0;
+ if (nfs_compare_fh(NFS_FH(inode), fhandle))
+ goto out;
+ if (nfs_refresh_inode(inode, fattr) < 0)
+ goto out;
+
+ nfs_setsecurity(inode, fattr, label);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+
+ /* set a readdirplus hint that we had a cache miss */
+ nfs_force_use_readdirplus(dir);
+ ret = 1;
+out:
+ nfs_free_fattr(fattr);
+ nfs_free_fhandle(fhandle);
+ nfs4_label_free(label);
+ return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
+}
+
/*
* This is called every time the dcache has a lookup hit,
* and we should check whether we can really trust that
@@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
* If the parent directory is seen to have changed, we throw out the
* cached dentry and do a new lookup.
*/
-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int
+nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
- struct inode *dir;
struct inode *inode;
- struct dentry *parent;
- struct nfs_fh *fhandle = NULL;
- struct nfs_fattr *fattr = NULL;
- struct nfs4_label *label = NULL;
int error;
- if (flags & LOOKUP_RCU) {
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- } else {
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- }
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = d_inode(dentry);
- if (!inode) {
- if (nfs_neg_need_reval(dir, dentry, flags)) {
- if (flags & LOOKUP_RCU)
- return -ECHILD;
- goto out_bad;
- }
- goto out_valid;
- }
+ if (!inode)
+ return nfs_lookup_revalidate_negative(dir, dentry, flags);
if (is_bad_inode(inode)) {
- if (flags & LOOKUP_RCU)
- return -ECHILD;
dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
__func__, dentry);
goto out_bad;
}
if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
- goto out_set_verifier;
+ return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* Force a full look up iff the parent directory has changed */
if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
error = nfs_lookup_verify_inode(inode, flags);
if (error) {
- if (flags & LOOKUP_RCU)
- return -ECHILD;
if (error == -ESTALE)
- goto out_zap_parent;
- goto out_error;
+ nfs_zap_caches(dir);
+ goto out_bad;
}
nfs_advise_use_readdirplus(dir);
goto out_valid;
@@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if (NFS_STALE(inode))
goto out_bad;
- error = -ENOMEM;
- fhandle = nfs_alloc_fhandle();
- fattr = nfs_alloc_fattr();
- if (fhandle == NULL || fattr == NULL)
- goto out_error;
-
- label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
- if (IS_ERR(label))
- goto out_error;
-
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
- if (error == -ESTALE || error == -ENOENT)
- goto out_bad;
- if (error)
- goto out_error;
- if (nfs_compare_fh(NFS_FH(inode), fhandle))
- goto out_bad;
- if ((error = nfs_refresh_inode(inode, fattr)) != 0)
- goto out_bad;
-
- nfs_setsecurity(inode, fattr, label);
-
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fhandle);
- nfs4_label_free(label);
+ return error;
+out_valid:
+ return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
+out_bad:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+ return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
+}
- /* set a readdirplus hint that we had a cache miss */
- nfs_force_use_readdirplus(dir);
+static int
+__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
+ int (*reval)(struct inode *, struct dentry *, unsigned int))
+{
+ struct dentry *parent;
+ struct inode *dir;
+ int ret;
-out_set_verifier:
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- out_valid:
if (flags & LOOKUP_RCU) {
+ parent = READ_ONCE(dentry->d_parent);
+ dir = d_inode_rcu(parent);
+ if (!dir)
+ return -ECHILD;
+ ret = reval(dir, dentry, flags);
if (parent != READ_ONCE(dentry->d_parent))
return -ECHILD;
- } else
+ } else {
+ parent = dget_parent(dentry);
+ ret = reval(d_inode(parent), dentry, flags);
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
- __func__, dentry);
- return 1;
-out_zap_parent:
- nfs_zap_caches(dir);
- out_bad:
- WARN_ON(flags & LOOKUP_RCU);
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fhandle);
- nfs4_label_free(label);
- nfs_mark_for_revalidate(dir);
- if (inode && S_ISDIR(inode->i_mode)) {
- /* Purge readdir caches. */
- nfs_zap_caches(inode);
- /*
- * We can't d_drop the root of a disconnected tree:
- * its d_hash is on the s_anon list and d_drop() would hide
- * it from shrink_dcache_for_unmount(), leading to busy
- * inodes on unmount and further oopses.
- */
- if (IS_ROOT(dentry))
- goto out_valid;
}
- dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
- __func__, dentry);
- return 0;
-out_error:
- WARN_ON(flags & LOOKUP_RCU);
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fhandle);
- nfs4_label_free(label);
- dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
- __func__, dentry, error);
- return error;
+ return ret;
+}
+
+static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
}
/*
@@ -1579,62 +1615,55 @@ no_open:
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);
-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int
+nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
struct inode *inode;
- int ret = 0;
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
- goto no_open;
+ goto full_reval;
if (d_mountpoint(dentry))
- goto no_open;
- if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
- goto no_open;
+ goto full_reval;
inode = d_inode(dentry);
/* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries.
*/
- if (inode == NULL) {
- struct dentry *parent;
- struct inode *dir;
-
- if (flags & LOOKUP_RCU) {
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- } else {
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- }
- if (!nfs_neg_need_reval(dir, dentry, flags))
- ret = 1;
- else if (flags & LOOKUP_RCU)
- ret = -ECHILD;
- if (!(flags & LOOKUP_RCU))
- dput(parent);
- else if (parent != READ_ONCE(dentry->d_parent))
- return -ECHILD;
- goto out;
- }
+ if (inode == NULL)
+ goto full_reval;
+
+ if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+ return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
- goto no_open;
+ goto full_reval;
+
/* We cannot do exclusive creation on a positive dentry */
- if (flags & LOOKUP_EXCL)
- goto no_open;
+ if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
+ goto reval_dentry;
+
+ /* Check if the directory changed */
+ if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
+ goto reval_dentry;
/* Let f_op->open() actually open (and revalidate) the file */
- ret = 1;
+ return 1;
+reval_dentry:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+ return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
-out:
- return ret;
+full_reval:
+ return nfs_do_lookup_revalidate(dir, dentry, flags);
+}
-no_open:
- return nfs_lookup_revalidate(dentry, flags);
+static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return __nfs_lookup_revalidate(dentry, flags,
+ nfs4_do_lookup_revalidate);
}
#endif /* CONFIG_NFSV4 */
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d175724ff566..61f46facb39c 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
.owner = THIS_MODULE,
+ .max_layoutget_response = 4096, /* 1 page or so... */
.alloc_layout_hdr = filelayout_alloc_layout_hdr,
.free_layout_hdr = filelayout_free_layout_hdr,
.alloc_lseg = filelayout_alloc_lseg,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index cae43333ef16..86bcba40ca61 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -2356,6 +2356,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
.name = "LAYOUT_FLEX_FILES",
.owner = THIS_MODULE,
.flags = PNFS_LAYOUTGET_ON_OPEN,
+ .max_layoutget_response = 4096, /* 1 page or so... */
.set_layoutdriver = ff_layout_set_layoutdriver,
.alloc_layout_hdr = ff_layout_alloc_layout_hdr,
.free_layout_hdr = ff_layout_free_layout_hdr,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 59aa04976331..74d8d5352438 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -453,7 +453,7 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
struct rpc_cred *cred;
- if (mirror) {
+ if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
if (!cred)
cred = get_rpccred(mdscred);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b65aee481d13..5b1eee4952b7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -857,15 +857,14 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
{
- struct nfs_lock_context *head = &ctx->lock_context;
- struct nfs_lock_context *pos = head;
+ struct nfs_lock_context *pos;
- do {
+ list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) {
if (pos->lockowner != current->files)
continue;
- refcount_inc(&pos->count);
- return pos;
- } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
+ if (refcount_inc_not_zero(&pos->count))
+ return pos;
+ }
return NULL;
}
@@ -874,10 +873,10 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
struct nfs_lock_context *res, *new = NULL;
struct inode *inode = d_inode(ctx->dentry);
- spin_lock(&inode->i_lock);
+ rcu_read_lock();
res = __nfs_find_lock_context(ctx);
+ rcu_read_unlock();
if (res == NULL) {
- spin_unlock(&inode->i_lock);
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (new == NULL)
return ERR_PTR(-ENOMEM);
@@ -885,14 +884,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
spin_lock(&inode->i_lock);
res = __nfs_find_lock_context(ctx);
if (res == NULL) {
- list_add_tail(&new->list, &ctx->lock_context.list);
+ list_add_tail_rcu(&new->list, &ctx->lock_context.list);
new->open_context = ctx;
res = new;
new = NULL;
}
+ spin_unlock(&inode->i_lock);
+ kfree(new);
}
- spin_unlock(&inode->i_lock);
- kfree(new);
return res;
}
EXPORT_SYMBOL_GPL(nfs_get_lock_context);
@@ -904,9 +903,9 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock))
return;
- list_del(&l_ctx->list);
+ list_del_rcu(&l_ctx->list);
spin_unlock(&inode->i_lock);
- kfree(l_ctx);
+ kfree_rcu(l_ctx, rcu_head);
}
EXPORT_SYMBOL_GPL(nfs_put_lock_context);
@@ -978,9 +977,9 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
{
- if (ctx != NULL)
- refcount_inc(&ctx->lock_context.count);
- return ctx;
+ if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count))
+ return ctx;
+ return NULL;
}
EXPORT_SYMBOL_GPL(get_nfs_open_context);
@@ -989,13 +988,13 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
struct inode *inode = d_inode(ctx->dentry);
struct super_block *sb = ctx->dentry->d_sb;
+ if (!refcount_dec_and_test(&ctx->lock_context.count))
+ return;
if (!list_empty(&ctx->list)) {
- if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
- return;
- list_del(&ctx->list);
+ spin_lock(&inode->i_lock);
+ list_del_rcu(&ctx->list);
spin_unlock(&inode->i_lock);
- } else if (!refcount_dec_and_test(&ctx->lock_context.count))
- return;
+ }
if (inode != NULL)
NFS_PROTO(inode)->close_context(ctx, is_sync);
if (ctx->cred != NULL)
@@ -1003,7 +1002,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
dput(ctx->dentry);
nfs_sb_deactive(sb);
kfree(ctx->mdsthreshold);
- kfree(ctx);
+ kfree_rcu(ctx, rcu_head);
}
void put_nfs_open_context(struct nfs_open_context *ctx)
@@ -1027,10 +1026,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&inode->i_lock);
- if (ctx->mode & FMODE_WRITE)
- list_add(&ctx->list, &nfsi->open_files);
- else
- list_add_tail(&ctx->list, &nfsi->open_files);
+ list_add_tail_rcu(&ctx->list, &nfsi->open_files);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
@@ -1051,16 +1047,17 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *pos, *ctx = NULL;
- spin_lock(&inode->i_lock);
- list_for_each_entry(pos, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, &nfsi->open_files, list) {
if (cred != NULL && pos->cred != cred)
continue;
if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
continue;
ctx = get_nfs_open_context(pos);
- break;
+ if (ctx)
+ break;
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return ctx;
}
@@ -1078,9 +1075,6 @@ void nfs_file_clear_open_context(struct file *filp)
if (ctx->error < 0)
invalidate_inode_pages2(inode->i_mapping);
filp->private_data = NULL;
- spin_lock(&inode->i_lock);
- list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
- spin_unlock(&inode->i_lock);
put_nfs_open_context_sync(ctx);
}
}
@@ -1329,19 +1323,11 @@ static bool nfs_file_has_writers(struct nfs_inode *nfsi)
{
struct inode *inode = &nfsi->vfs_inode;
- assert_spin_locked(&inode->i_lock);
-
if (!S_ISREG(inode->i_mode))
return false;
if (list_empty(&nfsi->open_files))
return false;
- /* Note: This relies on nfsi->open_files being ordered with writers
- * being placed at the head of the list.
- * See nfs_inode_attach_open_context()
- */
- return (list_first_entry(&nfsi->open_files,
- struct nfs_open_context,
- list)->mode & FMODE_WRITE) == FMODE_WRITE;
+ return inode_is_open_for_write(inode);
}
static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ec8a9efa268f..71bc16225b98 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -786,6 +786,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
if (hdr->pgio_done_cb != NULL)
return hdr->pgio_done_cb(task, hdr);
@@ -793,6 +794,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
+ if (task->tk_status >= 0 && !server->read_hdrsize)
+ cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+
nfs_invalidate_atime(inode);
nfs_refresh_inode(inode, &hdr->fattr);
return 0;
@@ -802,6 +806,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
+ hdr->args.replen = NFS_SERVER(hdr->inode)->read_hdrsize;
}
static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 64e4fa33d89f..78df4eb60f85 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -983,10 +983,11 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
const void *data)
{
const struct nfs_pgio_args *args = data;
+ unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
encode_read3args(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
- args->count, NFS3_readres_sz);
+ args->count, replen);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}
@@ -1364,10 +1365,12 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
encode_nfs_fh3(xdr, args->fh);
encode_uint32(xdr, args->mask);
- if (args->mask & (NFS_ACL | NFS_DFACL))
+ if (args->mask & (NFS_ACL | NFS_DFACL)) {
prepare_reply_buffer(req, args->pages, 0,
NFSACL_MAXPAGES << PAGE_SHIFT,
ACL3_getaclres_sz);
+ req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+ }
}
static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
@@ -1673,9 +1676,11 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
void *data)
{
struct nfs_pgio_res *result = data;
+ unsigned int pos;
enum nfs_stat status;
int error;
+ pos = xdr_stream_pos(xdr);
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
@@ -1685,6 +1690,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
result->op_status = status;
if (status != NFS3_OK)
goto out_status;
+ result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
error = decode_read3resok(xdr, result);
out:
return error;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3a6904173214..8d59c9655ec4 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -188,9 +188,10 @@ struct nfs4_state {
unsigned int n_wronly; /* Number of write-only references */
unsigned int n_rdwr; /* Number of read/write references */
fmode_t state; /* State on the server (R,W, or RW) */
- atomic_t count;
+ refcount_t count;
wait_queue_head_t waitq;
+ struct rcu_head rcu_head;
};
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 146e30862234..8f53455c4765 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -950,10 +950,10 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.
- * Set the mount rsize and wsize with negotiated fore channel
- * attributes which will be bound checked in nfs_server_set_fsinfo.
+ * Limit the mount rsize, wsize and dtsize using negotiated fore
+ * channel attributes.
*/
-static void nfs4_session_set_rwsize(struct nfs_server *server)
+static void nfs4_session_limit_rwsize(struct nfs_server *server)
{
#ifdef CONFIG_NFS_V4_1
struct nfs4_session *sess;
@@ -966,9 +966,11 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
- if (!server->rsize || server->rsize > server_resp_sz)
+ if (server->dtsize > server_resp_sz)
+ server->dtsize = server_resp_sz;
+ if (server->rsize > server_resp_sz)
server->rsize = server_resp_sz;
- if (!server->wsize || server->wsize > server_rqst_sz)
+ if (server->wsize > server_rqst_sz)
server->wsize = server_rqst_sz;
#endif /* CONFIG_NFS_V4_1 */
}
@@ -1015,12 +1017,12 @@ static int nfs4_server_common_setup(struct nfs_server *server,
(unsigned long long) server->fsid.minor);
nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
- nfs4_session_set_rwsize(server);
-
error = nfs_probe_fsinfo(server, mntfh, fattr);
if (error < 0)
goto out;
+ nfs4_session_limit_rwsize(server);
+
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8220a168282e..db84b4adbc49 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1349,12 +1349,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
return false;
}
-static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
+static int can_open_cached(struct nfs4_state *state, fmode_t mode,
+ int open_mode, enum open_claim_type4 claim)
{
int ret = 0;
if (open_mode & (O_EXCL|O_TRUNC))
goto out;
+ switch (claim) {
+ case NFS4_OPEN_CLAIM_NULL:
+ case NFS4_OPEN_CLAIM_FH:
+ goto out;
+ default:
+ break;
+ }
switch (mode & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ:
ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
@@ -1747,7 +1755,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
for (;;) {
spin_lock(&state->owner->so_lock);
- if (can_open_cached(state, fmode, open_mode)) {
+ if (can_open_cached(state, fmode, open_mode, claim)) {
update_open_stateflags(state, fmode);
spin_unlock(&state->owner->so_lock);
goto out_return_state;
@@ -1777,7 +1785,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
out:
return ERR_PTR(ret);
out_return_state:
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return state;
}
@@ -1849,7 +1857,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return state;
}
@@ -1887,7 +1895,7 @@ nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data)
return ERR_CAST(inode);
if (data->state != NULL && data->state->inode == inode) {
state = data->state;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
} else
state = nfs4_get_open_state(inode, data->owner);
iput(inode);
@@ -1933,23 +1941,41 @@ nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
return ret;
}
-static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
+static struct nfs_open_context *
+nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode)
{
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs_open_context *ctx;
- spin_lock(&state->inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
if (ctx->state != state)
continue;
- get_nfs_open_context(ctx);
- spin_unlock(&state->inode->i_lock);
+ if ((ctx->mode & mode) != mode)
+ continue;
+ if (!get_nfs_open_context(ctx))
+ continue;
+ rcu_read_unlock();
return ctx;
}
- spin_unlock(&state->inode->i_lock);
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
+static struct nfs_open_context *
+nfs4_state_find_open_context(struct nfs4_state *state)
+{
+ struct nfs_open_context *ctx;
+
+ ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE);
+ if (!IS_ERR(ctx))
+ return ctx;
+ ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE);
+ if (!IS_ERR(ctx))
+ return ctx;
+ return nfs4_state_find_open_context_mode(state, FMODE_READ);
+}
+
static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
struct nfs4_state *state, enum open_claim_type4 claim)
{
@@ -1960,7 +1986,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return opendata;
}
@@ -2276,7 +2302,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
if (data->state != NULL) {
struct nfs_delegation *delegation;
- if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
+ if (can_open_cached(data->state, data->o_arg.fmode,
+ data->o_arg.open_flags, claim))
goto out_no_action;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 40a08cd483f0..62ae0fd345ad 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -655,7 +655,7 @@ nfs4_alloc_open_state(void)
state = kzalloc(sizeof(*state), GFP_NOFS);
if (!state)
return NULL;
- atomic_set(&state->count, 1);
+ refcount_set(&state->count, 1);
INIT_LIST_HEAD(&state->lock_states);
spin_lock_init(&state->state_lock);
seqlock_init(&state->seqlock);
@@ -684,12 +684,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_state *state;
- list_for_each_entry(state, &nfsi->open_states, inode_states) {
+ list_for_each_entry_rcu(state, &nfsi->open_states, inode_states) {
if (state->owner != owner)
continue;
if (!nfs4_valid_open_stateid(state))
continue;
- if (atomic_inc_not_zero(&state->count))
+ if (refcount_inc_not_zero(&state->count))
return state;
}
return NULL;
@@ -698,7 +698,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
static void
nfs4_free_open_state(struct nfs4_state *state)
{
- kfree(state);
+ kfree_rcu(state, rcu_head);
}
struct nfs4_state *
@@ -707,9 +707,9 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
struct nfs4_state *state, *new;
struct nfs_inode *nfsi = NFS_I(inode);
- spin_lock(&inode->i_lock);
+ rcu_read_lock();
state = __nfs4_find_state_byowner(inode, owner);
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
if (state)
goto out;
new = nfs4_alloc_open_state();
@@ -720,7 +720,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
state = new;
state->owner = owner;
atomic_inc(&owner->so_count);
- list_add(&state->inode_states, &nfsi->open_states);
+ list_add_rcu(&state->inode_states, &nfsi->open_states);
ihold(inode);
state->inode = inode;
spin_unlock(&inode->i_lock);
@@ -743,10 +743,10 @@ void nfs4_put_open_state(struct nfs4_state *state)
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
- if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
+ if (!refcount_dec_and_lock(&state->count, &owner->so_lock))
return;
spin_lock(&inode->i_lock);
- list_del(&state->inode_states);
+ list_del_rcu(&state->inode_states);
list_del(&state->open_states);
spin_unlock(&inode->i_lock);
spin_unlock(&owner->so_lock);
@@ -1437,8 +1437,8 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
struct nfs4_state *state;
bool found = false;
- spin_lock(&inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
@@ -1456,7 +1456,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
nfs4_state_mark_reclaim_nograce(clp, state))
found = true;
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
nfs_inode_find_delegation_state_and_recover(inode, stateid);
if (found)
@@ -1469,13 +1469,13 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
- spin_lock(&inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
if (ctx->state != state)
continue;
set_bit(NFS_CONTEXT_BAD, &ctx->flags);
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
}
static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
@@ -1549,10 +1549,62 @@ out:
return status;
}
+#ifdef CONFIG_NFS_V4_2
+static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ struct nfs4_copy_state *copy;
+
+ if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags))
+ return;
+
+ spin_lock(&sp->so_server->nfs_client->cl_lock);
+ list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
+ if (nfs4_stateid_match_other(&state->stateid, &copy->parent_state->stateid))
+ continue;
+ copy->flags = 1;
+ complete(&copy->completion);
+ break;
+ }
+ spin_unlock(&sp->so_server->nfs_client->cl_lock);
+}
+#else /* !CONFIG_NFS_V4_2 */
+static inline void nfs42_complete_copies(struct nfs4_state_owner *sp,
+ struct nfs4_state *state)
+{
+}
+#endif /* CONFIG_NFS_V4_2 */
+
+static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state,
+ const struct nfs4_state_recovery_ops *ops)
+{
+ struct nfs4_lock_state *lock;
+ int status;
+
+ status = ops->recover_open(sp, state);
+ if (status < 0)
+ return status;
+
+ status = nfs4_reclaim_locks(state, ops);
+ if (status < 0)
+ return status;
+
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+ spin_lock(&state->state_lock);
+ list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
+ pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__);
+ }
+ spin_unlock(&state->state_lock);
+ }
+
+ nfs42_complete_copies(sp, state);
+ clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
+ return status;
+}
+
static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
{
struct nfs4_state *state;
- struct nfs4_lock_state *lock;
int status = 0;
/* Note: we rely on the sp->so_states list being ordered
@@ -1573,79 +1625,45 @@ restart:
continue;
if (state->state == 0)
continue;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
spin_unlock(&sp->so_lock);
- status = ops->recover_open(sp, state);
- if (status >= 0) {
- status = nfs4_reclaim_locks(state, ops);
- if (status >= 0) {
- if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
- spin_lock(&state->state_lock);
- list_for_each_entry(lock, &state->lock_states, ls_locks) {
- if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
- pr_warn_ratelimited("NFS: "
- "%s: Lock reclaim "
- "failed!\n", __func__);
- }
- spin_unlock(&state->state_lock);
- }
- clear_bit(NFS_STATE_RECLAIM_NOGRACE,
- &state->flags);
-#ifdef CONFIG_NFS_V4_2
- if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) {
- struct nfs4_copy_state *copy;
-
- spin_lock(&sp->so_server->nfs_client->cl_lock);
- list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
- if (memcmp(&state->stateid.other, &copy->parent_state->stateid.other, NFS4_STATEID_SIZE))
- continue;
- copy->flags = 1;
- complete(&copy->completion);
- printk("AGLO: server rebooted waking up the copy\n");
- break;
- }
- spin_unlock(&sp->so_server->nfs_client->cl_lock);
- }
-#endif /* CONFIG_NFS_V4_2 */
- nfs4_put_open_state(state);
- spin_lock(&sp->so_lock);
- goto restart;
- }
- }
+ status = __nfs4_reclaim_open_state(sp, state, ops);
+
switch (status) {
- default:
- printk(KERN_ERR "NFS: %s: unhandled error %d\n",
- __func__, status);
- /* Fall through */
- case -ENOENT:
- case -ENOMEM:
- case -EACCES:
- case -EROFS:
- case -EIO:
- case -ESTALE:
- /* Open state on this file cannot be recovered */
- nfs4_state_mark_recovery_failed(state, status);
- break;
- case -EAGAIN:
- ssleep(1);
- /* Fall through */
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_OLD_STATEID:
- case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_RECLAIM_BAD:
- case -NFS4ERR_RECLAIM_CONFLICT:
- nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ default:
+ if (status >= 0)
break;
- case -NFS4ERR_EXPIRED:
- case -NFS4ERR_NO_GRACE:
- nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
- case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- goto out_err;
+ printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
+ /* Fall through */
+ case -ENOENT:
+ case -ENOMEM:
+ case -EACCES:
+ case -EROFS:
+ case -EIO:
+ case -ESTALE:
+ /* Open state on this file cannot be recovered */
+ nfs4_state_mark_recovery_failed(state, status);
+ break;
+ case -EAGAIN:
+ ssleep(1);
+ /* Fall through */
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_RECLAIM_BAD:
+ case -NFS4ERR_RECLAIM_CONFLICT:
+ nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ break;
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_NO_GRACE:
+ nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_BADSLOT:
+ case -NFS4ERR_BAD_HIGH_SLOT:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ goto out_err;
}
nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
@@ -1795,38 +1813,38 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
{
switch (error) {
- case 0:
- break;
- case -NFS4ERR_CB_PATH_DOWN:
- nfs40_handle_cb_pathdown(clp);
- break;
- case -NFS4ERR_NO_GRACE:
- nfs4_state_end_reclaim_reboot(clp);
- break;
- case -NFS4ERR_STALE_CLIENTID:
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
- nfs4_state_start_reclaim_reboot(clp);
- break;
- case -NFS4ERR_EXPIRED:
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
- nfs4_state_start_reclaim_nograce(clp);
- break;
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
- set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
- /* Zero session reset errors */
- break;
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
- break;
- default:
- dprintk("%s: failed to handle error %d for server %s\n",
- __func__, error, clp->cl_hostname);
- return error;
+ case 0:
+ break;
+ case -NFS4ERR_CB_PATH_DOWN:
+ nfs40_handle_cb_pathdown(clp);
+ break;
+ case -NFS4ERR_NO_GRACE:
+ nfs4_state_end_reclaim_reboot(clp);
+ break;
+ case -NFS4ERR_STALE_CLIENTID:
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs4_state_start_reclaim_reboot(clp);
+ break;
+ case -NFS4ERR_EXPIRED:
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs4_state_start_reclaim_nograce(clp);
+ break;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_BADSLOT:
+ case -NFS4ERR_BAD_HIGH_SLOT:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_SEQ_FALSE_RETRY:
+ case -NFS4ERR_SEQ_MISORDERED:
+ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ /* Zero session reset errors */
+ break;
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ break;
+ default:
+ dprintk("%s: failed to handle error %d for server %s\n",
+ __func__, error, clp->cl_hostname);
+ return error;
}
dprintk("%s: handled error %d for server %s\n", __func__, error,
clp->cl_hostname);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b7bde12d8cd5..2fc8f6fa25e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3516,7 +3516,7 @@ static int decode_attr_exclcreat_supported(struct xdr_stream *xdr,
static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
{
__be32 *p;
- int len;
+ u32 len;
if (fh != NULL)
memset(fh, 0, sizeof(*fh));
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index bb5476a6d264..5c4568a0804b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -63,14 +63,14 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init);
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
{
- spin_lock(&hdr->lock);
- if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags)
- || pos < hdr->io_start + hdr->good_bytes) {
+ unsigned int new = pos - hdr->io_start;
+
+ if (hdr->good_bytes > new) {
+ hdr->good_bytes = new;
clear_bit(NFS_IOHDR_EOF, &hdr->flags);
- hdr->good_bytes = pos - hdr->io_start;
- hdr->error = error;
+ if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags))
+ hdr->error = error;
}
- spin_unlock(&hdr->lock);
}
static inline struct nfs_page *
@@ -494,7 +494,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
if (hdr) {
INIT_LIST_HEAD(&hdr->pages);
- spin_lock_init(&hdr->lock);
hdr->rw_ops = ops;
}
return hdr;
@@ -1111,6 +1110,20 @@ static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc,
return ret;
}
+static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
+{
+ u32 midx;
+ struct nfs_pgio_mirror *mirror;
+
+ if (!desc->pg_error)
+ return;
+
+ for (midx = 0; midx < desc->pg_mirror_count; midx++) {
+ mirror = &desc->pg_mirrors[midx];
+ desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
+ }
+}
+
int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
@@ -1161,25 +1174,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
return 1;
out_failed:
- /*
- * We might have failed before sending any reqs over wire.
- * Clean up rest of the reqs in mirror pg_list.
- */
- if (desc->pg_error) {
- struct nfs_pgio_mirror *mirror;
- void (*func)(struct list_head *);
-
- /* remember fatal errors */
- if (nfs_error_is_fatal(desc->pg_error))
- nfs_context_set_write_error(req->wb_context,
- desc->pg_error);
-
- func = desc->pg_completion_ops->error_cleanup;
- for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
- func(&mirror->pg_list);
- }
- }
+ nfs_pageio_error_cleanup(desc);
return 0;
}
@@ -1251,6 +1246,8 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
for (midx = 0; midx < desc->pg_mirror_count; midx++)
nfs_pageio_complete_mirror(desc, midx);
+ if (desc->pg_error < 0)
+ nfs_pageio_error_cleanup(desc);
if (desc->pg_ops->pg_cleanup)
desc->pg_ops->pg_cleanup(desc);
nfs_pageio_cleanup_mirroring(desc);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 7d9a51e6b847..06cb90e9bc6e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -965,7 +965,7 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
struct page **pages;
int i;
- pages = kcalloc(size, sizeof(struct page *), gfp_flags);
+ pages = kmalloc_array(size, sizeof(struct page *), gfp_flags);
if (!pages) {
dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
return NULL;
@@ -975,7 +975,7 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
pages[i] = alloc_page(gfp_flags);
if (!pages[i]) {
dprintk("%s: failed to allocate page\n", __func__);
- nfs4_free_pages(pages, size);
+ nfs4_free_pages(pages, i);
return NULL;
}
}
@@ -991,6 +991,7 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
gfp_t gfp_flags)
{
struct nfs_server *server = pnfs_find_server(ino, ctx);
+ size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response;
size_t max_pages = max_response_pages(server);
struct nfs4_layoutget *lgp;
@@ -1000,6 +1001,12 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
if (lgp == NULL)
return NULL;
+ if (max_reply_sz) {
+ size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (npages < max_pages)
+ max_pages = npages;
+ }
+
lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
if (!lgp->args.layout.pages) {
kfree(lgp);
@@ -1332,6 +1339,7 @@ bool pnfs_roc(struct inode *ino,
if (!nfs_have_layout(ino))
return false;
retry:
+ rcu_read_lock();
spin_lock(&ino->i_lock);
lo = nfsi->layout;
if (!lo || !pnfs_layout_is_valid(lo) ||
@@ -1342,6 +1350,7 @@ retry:
pnfs_get_layout_hdr(lo);
if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
spin_unlock(&ino->i_lock);
+ rcu_read_unlock();
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
TASK_UNINTERRUPTIBLE);
pnfs_put_layout_hdr(lo);
@@ -1355,7 +1364,7 @@ retry:
skip_read = true;
}
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
@@ -1403,6 +1412,7 @@ retry:
out_noroc:
spin_unlock(&ino->i_lock);
+ rcu_read_unlock();
pnfs_layoutcommit_inode(ino, true);
if (roc) {
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index ece367ebde69..e2e9fcd5341d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -125,6 +125,7 @@ struct pnfs_layoutdriver_type {
struct module *owner;
unsigned flags;
unsigned max_deviceinfo_size;
+ unsigned max_layoutget_response;
int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
int (*clear_layoutdriver) (struct nfs_server *);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 48d7277c60a9..f9f19784db82 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -276,16 +276,14 @@ static void nfs_readpage_result(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
if (hdr->res.eof) {
- loff_t bound;
+ loff_t pos = hdr->args.offset + hdr->res.count;
+ unsigned int new = pos - hdr->io_start;
- bound = hdr->args.offset + hdr->res.count;
- spin_lock(&hdr->lock);
- if (bound < hdr->io_start + hdr->good_bytes) {
+ if (hdr->good_bytes > new) {
+ hdr->good_bytes = new;
set_bit(NFS_IOHDR_EOF, &hdr->flags);
clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
- hdr->good_bytes = bound - hdr->io_start;
}
- spin_unlock(&hdr->lock);
} else if (hdr->res.count < hdr->args.count)
nfs_readpage_retry(task, hdr);
}
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index ebb24a314f43..de99db518571 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -168,24 +168,18 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
ctxt->newbh = NULL;
if (inode->i_blkbits == PAGE_SHIFT) {
- lock_page(obh->b_page);
- /*
- * We cannot call radix_tree_preload for the kernels older
- * than 2.6.23, because it is not exported for modules.
- */
+ struct page *opage = obh->b_page;
+ lock_page(opage);
retry:
- err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
- if (err)
- goto failed_unlock;
/* BUG_ON(oldkey != obh->b_page->index); */
- if (unlikely(oldkey != obh->b_page->index))
- NILFS_PAGE_BUG(obh->b_page,
+ if (unlikely(oldkey != opage->index))
+ NILFS_PAGE_BUG(opage,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
xa_lock_irq(&btnc->i_pages);
- err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page);
+ err = __xa_insert(&btnc->i_pages, newkey, opage, GFP_NOFS);
xa_unlock_irq(&btnc->i_pages);
/*
* Note: page->index will not change to newkey until
@@ -193,7 +187,6 @@ retry:
* To protect the page in intermediate state, the page lock
* is held.
*/
- radix_tree_preload_end();
if (!err)
return 0;
else if (err != -EEXIST)
@@ -203,7 +196,7 @@ retry:
if (!err)
goto retry;
/* fallback to copy mode */
- unlock_page(obh->b_page);
+ unlock_page(opage);
}
nbh = nilfs_btnode_create_block(btnc, newkey);
@@ -243,9 +236,8 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
mark_buffer_dirty(obh);
xa_lock_irq(&btnc->i_pages);
- radix_tree_delete(&btnc->i_pages, oldkey);
- radix_tree_tag_set(&btnc->i_pages, newkey,
- PAGECACHE_TAG_DIRTY);
+ __xa_erase(&btnc->i_pages, oldkey);
+ __xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY);
xa_unlock_irq(&btnc->i_pages);
opage->index = obh->b_blocknr = newkey;
@@ -275,7 +267,7 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
if (nbh == NULL) { /* blocksize == pagesize */
xa_lock_irq(&btnc->i_pages);
- radix_tree_delete(&btnc->i_pages, newkey);
+ __xa_erase(&btnc->i_pages, newkey);
xa_unlock_irq(&btnc->i_pages);
unlock_page(ctxt->bh->b_page);
} else
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 329a056b73b1..d7fc8d369d89 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -289,7 +289,7 @@ repeat:
* @dmap: destination page cache
* @smap: source page cache
*
- * No pages must no be added to the cache during this process.
+ * No pages must be added to the cache during this process.
* This must be ensured by the caller.
*/
void nilfs_copy_back_pages(struct address_space *dmap,
@@ -298,7 +298,6 @@ void nilfs_copy_back_pages(struct address_space *dmap,
struct pagevec pvec;
unsigned int i, n;
pgoff_t index = 0;
- int err;
pagevec_init(&pvec);
repeat:
@@ -313,35 +312,34 @@ repeat:
lock_page(page);
dpage = find_lock_page(dmap, offset);
if (dpage) {
- /* override existing page on the destination cache */
+ /* overwrite existing page in the destination cache */
WARN_ON(PageDirty(dpage));
nilfs_copy_page(dpage, page, 0);
unlock_page(dpage);
put_page(dpage);
+ /* Do we not need to remove page from smap here? */
} else {
- struct page *page2;
+ struct page *p;
/* move the page to the destination cache */
xa_lock_irq(&smap->i_pages);
- page2 = radix_tree_delete(&smap->i_pages, offset);
- WARN_ON(page2 != page);
-
+ p = __xa_erase(&smap->i_pages, offset);
+ WARN_ON(page != p);
smap->nrpages--;
xa_unlock_irq(&smap->i_pages);
xa_lock_irq(&dmap->i_pages);
- err = radix_tree_insert(&dmap->i_pages, offset, page);
- if (unlikely(err < 0)) {
- WARN_ON(err == -EEXIST);
+ p = __xa_store(&dmap->i_pages, offset, page, GFP_NOFS);
+ if (unlikely(p)) {
+ /* Probably -ENOMEM */
page->mapping = NULL;
- put_page(page); /* for cache */
+ put_page(page);
} else {
page->mapping = dmap;
dmap->nrpages++;
if (PageDirty(page))
- radix_tree_tag_set(&dmap->i_pages,
- offset,
- PAGECACHE_TAG_DIRTY);
+ __xa_set_mark(&dmap->i_pages, offset,
+ PAGECACHE_TAG_DIRTY);
}
xa_unlock_irq(&dmap->i_pages);
}
@@ -467,8 +465,7 @@ int __nilfs_clear_page_dirty(struct page *page)
if (mapping) {
xa_lock_irq(&mapping->i_pages);
if (test_bit(PG_dirty, &page->flags)) {
- radix_tree_tag_clear(&mapping->i_pages,
- page_index(page),
+ __xa_clear_mark(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY);
xa_unlock_irq(&mapping->i_pages);
return clear_page_dirty_for_io(page);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 94b52157bf8d..5769cf3ff035 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -25,7 +25,7 @@ static bool should_merge(struct fsnotify_event *old_fsn,
old = FANOTIFY_E(old_fsn);
new = FANOTIFY_E(new_fsn);
- if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
+ if (old_fsn->inode == new_fsn->inode && old->pid == new->pid &&
old->path.mnt == new->path.mnt &&
old->path.dentry == new->path.dentry)
return true;
@@ -131,8 +131,8 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
!(marks_mask & FS_ISDIR & ~marks_ignored_mask))
return false;
- if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
- ~marks_ignored_mask)
+ if (event_mask & FANOTIFY_OUTGOING_EVENTS &
+ marks_mask & ~marks_ignored_mask)
return true;
return false;
@@ -171,7 +171,10 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
goto out;
init: __maybe_unused
fsnotify_init_event(&event->fse, inode, mask);
- event->tgid = get_pid(task_tgid(current));
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
+ event->pid = get_pid(task_pid(current));
+ else
+ event->pid = get_pid(task_tgid(current));
if (path) {
event->path = *path;
path_get(&event->path);
@@ -205,6 +208,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10);
+
if (!fanotify_should_send_event(iter_info, mask, data, data_type))
return 0;
@@ -236,7 +241,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
if (ret) {
/* Permission events shouldn't be merged */
- BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
+ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
@@ -268,7 +273,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
event = FANOTIFY_E(fsn_event);
path_put(&event->path);
- put_pid(event->tgid);
+ put_pid(event->pid);
if (fanotify_is_perm_event(fsn_event->mask)) {
kmem_cache_free(fanotify_perm_event_cachep,
FANOTIFY_PE(fsn_event));
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 8609ba06f474..ea05b8a401e7 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -19,7 +19,7 @@ struct fanotify_event_info {
* during this object's lifetime
*/
struct path path;
- struct pid *tgid;
+ struct pid *pid;
};
/*
@@ -44,7 +44,7 @@ FANOTIFY_PE(struct fsnotify_event *fse)
static inline bool fanotify_is_perm_event(u32 mask)
{
return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) &&
- mask & FAN_ALL_PERM_EVENTS;
+ mask & FANOTIFY_PERM_EVENTS;
}
static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 69054886915b..e03be5071362 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -131,8 +131,8 @@ static int fill_event_metadata(struct fsnotify_group *group,
metadata->metadata_len = FAN_EVENT_METADATA_LEN;
metadata->vers = FANOTIFY_METADATA_VERSION;
metadata->reserved = 0;
- metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
- metadata->pid = pid_vnr(event->tgid);
+ metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS;
+ metadata->pid = pid_vnr(event->pid);
if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
metadata->fd = FAN_NOFD;
else {
@@ -191,7 +191,7 @@ static int process_access_response(struct fsnotify_group *group,
if (fd < 0)
return -EINVAL;
- if ((response & FAN_AUDIT) && !group->fanotify_data.audit)
+ if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
return -EINVAL;
event = dequeue_event(group, fd);
@@ -395,7 +395,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
*/
while (!fsnotify_notify_queue_is_empty(group)) {
fsn_event = fsnotify_remove_first_event(group);
- if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) {
+ if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) {
spin_unlock(&group->notification_lock);
fsnotify_destroy_event(group, fsn_event);
spin_lock(&group->notification_lock);
@@ -506,18 +506,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
- __u32 tmask = fsn_mark->mask & ~mask;
-
- if (flags & FAN_MARK_ONDIR)
- tmask &= ~FAN_ONDIR;
-
oldmask = fsn_mark->mask;
- fsn_mark->mask = tmask;
+ fsn_mark->mask &= ~mask;
} else {
- __u32 tmask = fsn_mark->ignored_mask & ~mask;
- if (flags & FAN_MARK_ONDIR)
- tmask &= ~FAN_ONDIR;
- fsn_mark->ignored_mask = tmask;
+ fsn_mark->ignored_mask &= ~mask;
}
*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
spin_unlock(&fsn_mark->lock);
@@ -563,6 +555,13 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
mask, flags);
}
+static int fanotify_remove_sb_mark(struct fsnotify_group *group,
+ struct super_block *sb, __u32 mask,
+ unsigned int flags)
+{
+ return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags);
+}
+
static int fanotify_remove_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
@@ -579,19 +578,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
- __u32 tmask = fsn_mark->mask | mask;
-
- if (flags & FAN_MARK_ONDIR)
- tmask |= FAN_ONDIR;
-
oldmask = fsn_mark->mask;
- fsn_mark->mask = tmask;
+ fsn_mark->mask |= mask;
} else {
- __u32 tmask = fsn_mark->ignored_mask | mask;
- if (flags & FAN_MARK_ONDIR)
- tmask |= FAN_ONDIR;
-
- fsn_mark->ignored_mask = tmask;
+ fsn_mark->ignored_mask |= mask;
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
@@ -658,6 +648,14 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags);
}
+static int fanotify_add_sb_mark(struct fsnotify_group *group,
+ struct super_block *sb, __u32 mask,
+ unsigned int flags)
+{
+ return fanotify_add_mark(group, &sb->s_fsnotify_marks,
+ FSNOTIFY_OBJ_TYPE_SB, mask, flags);
+}
+
static int fanotify_add_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
@@ -686,16 +684,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
struct user_struct *user;
struct fanotify_event_info *oevent;
- pr_debug("%s: flags=%d event_f_flags=%d\n",
- __func__, flags, event_f_flags);
+ pr_debug("%s: flags=%x event_f_flags=%x\n",
+ __func__, flags, event_f_flags);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
#ifdef CONFIG_AUDITSYSCALL
- if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT))
+ if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
#else
- if (flags & ~FAN_ALL_INIT_FLAGS)
+ if (flags & ~FANOTIFY_INIT_FLAGS)
#endif
return -EINVAL;
@@ -731,6 +729,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
}
group->fanotify_data.user = user;
+ group->fanotify_data.flags = flags;
atomic_inc(&user->fanotify_listeners);
group->memcg = get_mem_cgroup_from_mm(current->mm);
@@ -746,7 +745,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.f_flags = event_f_flags;
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
- switch (flags & FAN_ALL_CLASS_BITS) {
+ switch (flags & FANOTIFY_CLASS_BITS) {
case FAN_CLASS_NOTIF:
group->priority = FS_PRIO_0;
break;
@@ -783,7 +782,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
fd = -EPERM;
if (!capable(CAP_AUDIT_WRITE))
goto out_destroy_group;
- group->fanotify_data.audit = true;
}
fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
@@ -805,7 +803,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
struct fsnotify_group *group;
struct fd f;
struct path path;
- u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD;
+ u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
int ret;
pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -815,8 +814,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & ((__u64)0xffffffff << 32))
return -EINVAL;
- if (flags & ~FAN_ALL_MARK_FLAGS)
+ if (flags & ~FANOTIFY_MARK_FLAGS)
+ return -EINVAL;
+
+ switch (mark_type) {
+ case FAN_MARK_INODE:
+ case FAN_MARK_MOUNT:
+ case FAN_MARK_FILESYSTEM:
+ break;
+ default:
return -EINVAL;
+ }
+
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
case FAN_MARK_ADD: /* fallthrough */
case FAN_MARK_REMOVE:
@@ -824,20 +833,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
break;
case FAN_MARK_FLUSH:
- if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH))
+ if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
return -EINVAL;
break;
default:
return -EINVAL;
}
- if (mask & FAN_ONDIR) {
- flags |= FAN_MARK_ONDIR;
- mask &= ~FAN_ONDIR;
- }
-
if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
- valid_mask |= FAN_ALL_PERM_EVENTS;
+ valid_mask |= FANOTIFY_PERM_EVENTS;
if (mask & ~valid_mask)
return -EINVAL;
@@ -857,14 +861,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* allowed to set permissions events.
*/
ret = -EINVAL;
- if (mask & FAN_ALL_PERM_EVENTS &&
+ if (mask & FANOTIFY_PERM_EVENTS &&
group->priority == FS_PRIO_0)
goto fput_and_out;
if (flags & FAN_MARK_FLUSH) {
ret = 0;
- if (flags & FAN_MARK_MOUNT)
+ if (mark_type == FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
+ else if (mark_type == FAN_MARK_FILESYSTEM)
+ fsnotify_clear_sb_marks_by_group(group);
else
fsnotify_clear_inode_marks_by_group(group);
goto fput_and_out;
@@ -875,7 +881,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out;
/* inode held in place by reference to path; group by fget on fd */
- if (!(flags & FAN_MARK_MOUNT))
+ if (mark_type == FAN_MARK_INODE)
inode = path.dentry->d_inode;
else
mnt = path.mnt;
@@ -883,14 +889,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
case FAN_MARK_ADD:
- if (flags & FAN_MARK_MOUNT)
+ if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
+ else if (mark_type == FAN_MARK_FILESYSTEM)
+ ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags);
else
ret = fanotify_add_inode_mark(group, inode, mask, flags);
break;
case FAN_MARK_REMOVE:
- if (flags & FAN_MARK_MOUNT)
+ if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
+ else if (mark_type == FAN_MARK_FILESYSTEM)
+ ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags);
else
ret = fanotify_remove_inode_mark(group, inode, mask, flags);
break;
@@ -934,6 +944,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
*/
static int __init fanotify_user_setup(void)
{
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);
fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 86fcf5814279..348a184bcdda 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -131,37 +131,20 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
+ } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
+ struct super_block *sb = fsnotify_conn_sb(mark->connector);
+
+ seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
+ sb->s_dev, mflags, mark->mask, mark->ignored_mask);
}
}
void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
{
struct fsnotify_group *group = f->private_data;
- unsigned int flags = 0;
-
- switch (group->priority) {
- case FS_PRIO_0:
- flags |= FAN_CLASS_NOTIF;
- break;
- case FS_PRIO_1:
- flags |= FAN_CLASS_CONTENT;
- break;
- case FS_PRIO_2:
- flags |= FAN_CLASS_PRE_CONTENT;
- break;
- }
-
- if (group->max_events == UINT_MAX)
- flags |= FAN_UNLIMITED_QUEUE;
-
- if (group->fanotify_data.max_marks == UINT_MAX)
- flags |= FAN_UNLIMITED_MARKS;
-
- if (group->fanotify_data.audit)
- flags |= FAN_ENABLE_AUDIT;
seq_printf(m, "fanotify flags:%x event-flags:%x\n",
- flags, group->fanotify_data.f_flags);
+ group->fanotify_data.flags, group->fanotify_data.f_flags);
show_fdinfo(m, f, fanotify_fdinfo);
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ababdbfab537..2172ba516c61 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
* Called during unmount with no locks held, so needs to be safe against
* concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
*/
-void fsnotify_unmount_inodes(struct super_block *sb)
+static void fsnotify_unmount_inodes(struct super_block *sb)
{
struct inode *inode, *iput_inode = NULL;
@@ -96,6 +96,15 @@ void fsnotify_unmount_inodes(struct super_block *sb)
if (iput_inode)
iput(iput_inode);
+ /* Wait for outstanding inode references from connectors */
+ wait_var_event(&sb->s_fsnotify_inode_refs,
+ !atomic_long_read(&sb->s_fsnotify_inode_refs));
+}
+
+void fsnotify_sb_delete(struct super_block *sb)
+{
+ fsnotify_unmount_inodes(sb);
+ fsnotify_clear_marks_by_sb(sb);
}
/*
@@ -190,7 +199,7 @@ static int send_to_group(struct inode *to_tell,
struct fsnotify_iter_info *iter_info)
{
struct fsnotify_group *group = NULL;
- __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+ __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
__u32 marks_mask = 0;
__u32 marks_ignored_mask = 0;
struct fsnotify_mark *mark;
@@ -319,15 +328,17 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
const unsigned char *file_name, u32 cookie)
{
struct fsnotify_iter_info iter_info = {};
- struct mount *mnt;
+ struct super_block *sb = NULL;
+ struct mount *mnt = NULL;
+ __u32 mnt_or_sb_mask = 0;
int ret = 0;
- /* global tests shouldn't care about events on child only the specific event */
- __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+ __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
- if (data_is == FSNOTIFY_EVENT_PATH)
+ if (data_is == FSNOTIFY_EVENT_PATH) {
mnt = real_mount(((const struct path *)data)->mnt);
- else
- mnt = NULL;
+ sb = mnt->mnt.mnt_sb;
+ mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask;
+ }
/*
* Optimization: srcu_read_lock() has a memory barrier which can
@@ -337,16 +348,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
* need SRCU to keep them "alive".
*/
if (!to_tell->i_fsnotify_marks &&
- (!mnt || !mnt->mnt_fsnotify_marks))
+ (!mnt || (!mnt->mnt_fsnotify_marks && !sb->s_fsnotify_marks)))
return 0;
/*
* if this is a modify event we may need to clear the ignored masks
- * otherwise return if neither the inode nor the vfsmount care about
+ * otherwise return if neither the inode nor the vfsmount/sb care about
* this type of event.
*/
if (!(mask & FS_MODIFY) &&
- !(test_mask & to_tell->i_fsnotify_mask) &&
- !(mnt && test_mask & mnt->mnt_fsnotify_mask))
+ !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
return 0;
iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
@@ -356,11 +366,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
if (mnt) {
iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
+ iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
+ fsnotify_first_mark(&sb->s_fsnotify_marks);
}
/*
- * We need to merge inode & vfsmount mark lists so that inode mark
- * ignore masks are properly reflected for mount mark notifications.
+ * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
+ * ignore masks are properly reflected for mount/sb mark notifications.
* That's why this traversal is so complicated...
*/
while (fsnotify_iter_select_report_types(&iter_info)) {
@@ -386,7 +398,7 @@ static __init int fsnotify_init(void)
{
int ret;
- BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 7902653dd577..5a00121fb219 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -21,6 +21,12 @@ static inline struct mount *fsnotify_conn_mount(
return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
}
+static inline struct super_block *fsnotify_conn_sb(
+ struct fsnotify_mark_connector *conn)
+{
+ return container_of(conn->obj, struct super_block, s_fsnotify_marks);
+}
+
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
@@ -43,6 +49,11 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
}
+/* run the list of all marks associated with sb and destroy them */
+static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
+{
+ fsnotify_destroy_marks(&sb->s_fsnotify_marks);
+}
/* Wait until all marks queued for destruction are destroyed */
extern void fsnotify_wait_marks_destroyed(void);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ac6978d3208c..105576daca4a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -815,7 +815,7 @@ static int __init inotify_user_setup(void)
BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
- BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22);
+ BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 59cdb27826de..d2dd16cb5989 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -115,6 +115,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
+ else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
+ return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
return NULL;
}
@@ -179,19 +181,24 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
-static struct inode *fsnotify_detach_connector_from_object(
- struct fsnotify_mark_connector *conn)
+static void *fsnotify_detach_connector_from_object(
+ struct fsnotify_mark_connector *conn,
+ unsigned int *type)
{
struct inode *inode = NULL;
+ *type = conn->type;
if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
return NULL;
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
+ atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
+ } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
+ fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
}
rcu_assign_pointer(*(conn->obj), NULL);
@@ -211,10 +218,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
fsnotify_put_group(group);
}
+/* Drop object reference originally held by a connector */
+static void fsnotify_drop_object(unsigned int type, void *objp)
+{
+ struct inode *inode;
+ struct super_block *sb;
+
+ if (!objp)
+ return;
+ /* Currently only inode references are passed to be dropped */
+ if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
+ return;
+ inode = objp;
+ sb = inode->i_sb;
+ iput(inode);
+ if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
+ wake_up_var(&sb->s_fsnotify_inode_refs);
+}
+
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
struct fsnotify_mark_connector *conn;
- struct inode *inode = NULL;
+ void *objp = NULL;
+ unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
bool free_conn = false;
/* Catch marks that were actually never attached to object */
@@ -234,7 +260,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
conn = mark->connector;
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true;
} else {
__fsnotify_recalc_mask(conn);
@@ -242,7 +268,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
mark->connector = NULL;
spin_unlock(&conn->lock);
- iput(inode);
+ fsnotify_drop_object(type, objp);
if (free_conn) {
spin_lock(&destroy_lock);
@@ -709,7 +735,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
{
struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark, *old_mark = NULL;
- struct inode *inode;
+ void *objp;
+ unsigned int type;
conn = fsnotify_grab_connector(connp);
if (!conn)
@@ -735,11 +762,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
* mark references get dropped. It would lead to strange results such
* as delaying inode deletion or blocking unmount.
*/
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
spin_unlock(&conn->lock);
if (old_mark)
fsnotify_put_mark(old_mark);
- iput(inode);
+ fsnotify_drop_object(type, objp);
}
/*
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a342f008e42f..d1cbb27808e2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5106,8 +5106,6 @@ int ocfs2_split_extent(handle_t *handle,
* rightmost extent list.
*/
if (path->p_tree_depth) {
- struct ocfs2_extent_block *eb;
-
ret = ocfs2_read_extent_block(et->et_ci,
ocfs2_et_get_last_eb_blk(et),
&last_eb_bh);
@@ -5115,8 +5113,6 @@ int ocfs2_split_extent(handle_t *handle,
mlog_errno(ret);
goto out;
}
-
- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
}
if (rec->e_cpos == split_rec->e_cpos &&
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 302cd7caa4a7..da578ad4c08f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1392,8 +1392,7 @@ retry:
unlock:
spin_unlock(&oi->ip_lock);
out:
- if (new)
- kfree(new);
+ kfree(new);
return ret;
}
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 9b984cae4c4e..1d6dc8422899 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -329,7 +329,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
{
char *buf;
- buf = (char *) get_zeroed_page(GFP_NOFS);
+ buf = (char *) get_zeroed_page(GFP_ATOMIC);
if (buf) {
dump_mle(mle, buf, PAGE_SIZE - 1);
free_page((unsigned long)buf);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 838a06d4066a..074d5de17bb2 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -531,7 +531,7 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
assert_spin_locked(&res->spinlock);
/* don't shuffle secondary queues */
- if ((res->owner == dlm->node_num)) {
+ if (res->owner == dlm->node_num) {
if (res->state & (DLM_LOCK_RES_MIGRATING |
DLM_LOCK_RES_BLOCK_DIRTY))
return;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a5ee145c733..1114ef02e780 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4135,7 +4135,6 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_cached_dealloc_ctxt dealloc;
struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
- struct ocfs2_refcount_block *rb;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
struct ocfs2_refcount_tree *ref_tree;
@@ -4162,7 +4161,6 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
mlog_errno(ret);
goto out;
}
- rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
&ref_tree->rf_ci, ref_root_bh,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index fc5306a31a1d..5792f9e39466 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -516,6 +516,9 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
*/
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+ /* procfs dentries and inodes don't require IO to create */
+ s->s_shrink.seeks = 0;
+
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
if (!root_inode) {
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index d06694757201..8468baee951d 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -10,9 +10,6 @@
#include <linux/seqlock.h>
#include <linux/time.h>
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
static int loadavg_proc_show(struct seq_file *m, void *v)
{
unsigned long avnrun[3];
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index edda898714eb..568d90e17c17 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -38,6 +38,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
long cached;
long available;
unsigned long pages[NR_LRU_LISTS];
+ unsigned long sreclaimable, sunreclaim;
int lru;
si_meminfo(&i);
@@ -53,6 +54,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
available = si_mem_available();
+ sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE);
+ sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE);
show_val_kb(m, "MemTotal: ", i.totalram);
show_val_kb(m, "MemFree: ", i.freeram);
@@ -94,14 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "Mapped: ",
global_node_page_state(NR_FILE_MAPPED));
show_val_kb(m, "Shmem: ", i.sharedram);
- show_val_kb(m, "Slab: ",
- global_node_page_state(NR_SLAB_RECLAIMABLE) +
- global_node_page_state(NR_SLAB_UNRECLAIMABLE));
-
- show_val_kb(m, "SReclaimable: ",
- global_node_page_state(NR_SLAB_RECLAIMABLE));
- show_val_kb(m, "SUnreclaim: ",
- global_node_page_state(NR_SLAB_UNRECLAIMABLE));
+ show_val_kb(m, "KReclaimable: ", sreclaimable +
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
+ show_val_kb(m, "Slab: ", sreclaimable + sunreclaim);
+ show_val_kb(m, "SReclaimable: ", sreclaimable);
+ show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
show_val_kb(m, "PageTables: ",
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5ea1d64cb0b4..47c3764c469b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -521,7 +521,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- if (radix_tree_exceptional_entry(page))
+ if (xa_is_value(page))
mss->swap += PAGE_SIZE;
else
put_page(page);
@@ -713,6 +713,8 @@ static void smap_gather_stats(struct vm_area_struct *vma,
smaps_walk.private = mss;
#ifdef CONFIG_SHMEM
+ /* In case of smaps_rollup, reset the value from previous vma */
+ mss->check_shmem_swap = false;
if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
/*
* For shared or readonly shmem mappings we know that all
@@ -728,7 +730,7 @@ static void smap_gather_stats(struct vm_area_struct *vma,
if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
!(vma->vm_flags & VM_WRITE)) {
- mss->swap = shmem_swapped;
+ mss->swap += shmem_swapped;
} else {
mss->check_shmem_swap = true;
smaps_walk.pte_hole = smaps_pte_hole;
diff --git a/fs/super.c b/fs/super.c
index f3a8c008e164..ca53a08497ed 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -442,7 +442,7 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb);
sb->s_flags &= ~SB_ACTIVE;
- fsnotify_unmount_inodes(sb);
+ fsnotify_sb_delete(sb);
cgroup_writeback_umount();
evict_inodes(sb);
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index fcda0fc97b90..ec85aeaed54a 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -175,8 +175,8 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
{
struct udf_sb_info *sbi = UDF_SB(sb);
int alloc_count = 0;
- int bit, block, block_group, group_start;
- int nr_groups, bitmap_nr;
+ int bit, block, block_group;
+ int bitmap_nr;
struct buffer_head *bh;
__u32 part_len;
@@ -189,10 +189,8 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
block_count = part_len - first_block;
do {
- nr_groups = udf_compute_nr_groups(sb, partition);
block = first_block + (sizeof(struct spaceBitmapDesc) << 3);
block_group = block >> (sb->s_blocksize_bits + 3);
- group_start = block_group ? 0 : sizeof(struct spaceBitmapDesc);
bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
if (bitmap_nr < 0)
@@ -652,12 +650,6 @@ void udf_free_blocks(struct super_block *sb, struct inode *inode,
} else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
udf_table_free_blocks(sb, map->s_uspace.s_table,
bloc, offset, count);
- } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
- udf_bitmap_free_blocks(sb, map->s_fspace.s_bitmap,
- bloc, offset, count);
- } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
- udf_table_free_blocks(sb, map->s_fspace.s_table,
- bloc, offset, count);
}
if (inode) {
@@ -684,16 +676,6 @@ inline int udf_prealloc_blocks(struct super_block *sb,
map->s_uspace.s_table,
partition, first_block,
block_count);
- else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
- allocated = udf_bitmap_prealloc_blocks(sb,
- map->s_fspace.s_bitmap,
- partition, first_block,
- block_count);
- else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
- allocated = udf_table_prealloc_blocks(sb,
- map->s_fspace.s_table,
- partition, first_block,
- block_count);
else
return 0;
@@ -717,14 +699,6 @@ inline udf_pblk_t udf_new_block(struct super_block *sb,
block = udf_table_new_block(sb,
map->s_uspace.s_table,
partition, goal, err);
- else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
- block = udf_bitmap_new_block(sb,
- map->s_fspace.s_bitmap,
- partition, goal, err);
- else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
- block = udf_table_new_block(sb,
- map->s_fspace.s_table,
- partition, goal, err);
else {
*err = -EIO;
return 0;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 6f515651a2c2..8f2f56d9a1bb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -290,12 +290,8 @@ static void udf_free_partition(struct udf_part_map *map)
if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
iput(map->s_uspace.s_table);
- if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
- iput(map->s_fspace.s_table);
if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
udf_sb_free_bitmap(map->s_uspace.s_bitmap);
- if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
- udf_sb_free_bitmap(map->s_fspace.s_bitmap);
if (map->s_partition_type == UDF_SPARABLE_MAP15)
for (i = 0; i < 4; i++)
brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
@@ -613,14 +609,11 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
struct udf_options uopt;
struct udf_sb_info *sbi = UDF_SB(sb);
int error = 0;
- struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
+
+ if (!(*flags & SB_RDONLY) && UDF_QUERY_FLAG(sb, UDF_FLAG_RW_INCOMPAT))
+ return -EACCES;
sync_filesystem(sb);
- if (lvidiu) {
- int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev);
- if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & SB_RDONLY))
- return -EACCES;
- }
uopt.flags = sbi->s_flags;
uopt.uid = sbi->s_uid;
@@ -988,12 +981,62 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
return bitmap;
}
+static int check_partition_desc(struct super_block *sb,
+ struct partitionDesc *p,
+ struct udf_part_map *map)
+{
+ bool umap, utable, fmap, ftable;
+ struct partitionHeaderDesc *phd;
+
+ switch (le32_to_cpu(p->accessType)) {
+ case PD_ACCESS_TYPE_READ_ONLY:
+ case PD_ACCESS_TYPE_WRITE_ONCE:
+ case PD_ACCESS_TYPE_REWRITABLE:
+ case PD_ACCESS_TYPE_NONE:
+ goto force_ro;
+ }
+
+ /* No Partition Header Descriptor? */
+ if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
+ strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
+ goto force_ro;
+
+ phd = (struct partitionHeaderDesc *)p->partitionContentsUse;
+ utable = phd->unallocSpaceTable.extLength;
+ umap = phd->unallocSpaceBitmap.extLength;
+ ftable = phd->freedSpaceTable.extLength;
+ fmap = phd->freedSpaceBitmap.extLength;
+
+ /* No allocation info? */
+ if (!utable && !umap && !ftable && !fmap)
+ goto force_ro;
+
+ /* We don't support blocks that require erasing before overwrite */
+ if (ftable || fmap)
+ goto force_ro;
+ /* UDF 2.60: 2.3.3 - no mixing of tables & bitmaps, no VAT. */
+ if (utable && umap)
+ goto force_ro;
+
+ if (map->s_partition_type == UDF_VIRTUAL_MAP15 ||
+ map->s_partition_type == UDF_VIRTUAL_MAP20)
+ goto force_ro;
+
+ return 0;
+force_ro:
+ if (!sb_rdonly(sb))
+ return -EACCES;
+ UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT);
+ return 0;
+}
+
static int udf_fill_partdesc_info(struct super_block *sb,
struct partitionDesc *p, int p_index)
{
struct udf_part_map *map;
struct udf_sb_info *sbi = UDF_SB(sb);
struct partitionHeaderDesc *phd;
+ int err;
map = &sbi->s_partmaps[p_index];
@@ -1013,8 +1056,16 @@ static int udf_fill_partdesc_info(struct super_block *sb,
p_index, map->s_partition_type,
map->s_partition_root, map->s_partition_len);
- if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
- strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
+ err = check_partition_desc(sb, p, map);
+ if (err)
+ return err;
+
+ /*
+ * Skip loading allocation info it we cannot ever write to the fs.
+ * This is a correctness thing as we may have decided to force ro mount
+ * to avoid allocation info we don't support.
+ */
+ if (UDF_QUERY_FLAG(sb, UDF_FLAG_RW_INCOMPAT))
return 0;
phd = (struct partitionHeaderDesc *)p->partitionContentsUse;
@@ -1050,40 +1101,6 @@ static int udf_fill_partdesc_info(struct super_block *sb,
p_index, bitmap->s_extPosition);
}
- if (phd->partitionIntegrityTable.extLength)
- udf_debug("partitionIntegrityTable (part %d)\n", p_index);
-
- if (phd->freedSpaceTable.extLength) {
- struct kernel_lb_addr loc = {
- .logicalBlockNum = le32_to_cpu(
- phd->freedSpaceTable.extPosition),
- .partitionReferenceNum = p_index,
- };
- struct inode *inode;
-
- inode = udf_iget_special(sb, &loc);
- if (IS_ERR(inode)) {
- udf_debug("cannot load freedSpaceTable (part %d)\n",
- p_index);
- return PTR_ERR(inode);
- }
- map->s_fspace.s_table = inode;
- map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
- udf_debug("freedSpaceTable (part %d) @ %lu\n",
- p_index, map->s_fspace.s_table->i_ino);
- }
-
- if (phd->freedSpaceBitmap.extLength) {
- struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index);
- if (!bitmap)
- return -ENOMEM;
- map->s_fspace.s_bitmap = bitmap;
- bitmap->s_extPosition = le32_to_cpu(
- phd->freedSpaceBitmap.extPosition);
- map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
- udf_debug("freedSpaceBitmap (part %d) @ %u\n",
- p_index, bitmap->s_extPosition);
- }
return 0;
}
@@ -1257,6 +1274,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
ret = -EACCES;
goto out_bh;
}
+ UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT);
ret = udf_load_vat(sb, i, type1_idx);
if (ret < 0)
goto out_bh;
@@ -2155,10 +2173,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
UDF_MAX_READ_VERSION);
ret = -EINVAL;
goto error_out;
- } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION &&
- !sb_rdonly(sb)) {
- ret = -EACCES;
- goto error_out;
+ } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) {
+ if (!sb_rdonly(sb)) {
+ ret = -EACCES;
+ goto error_out;
+ }
+ UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT);
}
sbi->s_udfrev = minUDFWriteRev;
@@ -2176,10 +2196,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
}
if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
- UDF_PART_FLAG_READ_ONLY &&
- !sb_rdonly(sb)) {
- ret = -EACCES;
- goto error_out;
+ UDF_PART_FLAG_READ_ONLY) {
+ if (!sb_rdonly(sb)) {
+ ret = -EACCES;
+ goto error_out;
+ }
+ UDF_SET_FLAG(sb, UDF_FLAG_RW_INCOMPAT);
}
if (udf_find_fileset(sb, &fileset, &rootdir)) {
@@ -2433,10 +2455,6 @@ static unsigned int udf_count_free(struct super_block *sb)
accum += udf_count_free_bitmap(sb,
map->s_uspace.s_bitmap);
}
- if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
- accum += udf_count_free_bitmap(sb,
- map->s_fspace.s_bitmap);
- }
if (accum)
return accum;
@@ -2444,11 +2462,6 @@ static unsigned int udf_count_free(struct super_block *sb)
accum += udf_count_free_table(sb,
map->s_uspace.s_table);
}
- if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
- accum += udf_count_free_table(sb,
- map->s_fspace.s_table);
- }
-
return accum;
}
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 9424d7cab790..3d83be54c474 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -30,11 +30,11 @@
#define UDF_FLAG_LASTBLOCK_SET 16
#define UDF_FLAG_BLOCKSIZE_SET 17
#define UDF_FLAG_INCONSISTENT 18
+#define UDF_FLAG_RW_INCOMPAT 19 /* Set when we find RW incompatible
+ * feature */
#define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001
#define UDF_PART_FLAG_UNALLOC_TABLE 0x0002
-#define UDF_PART_FLAG_FREED_BITMAP 0x0004
-#define UDF_PART_FLAG_FREED_TABLE 0x0008
#define UDF_PART_FLAG_READ_ONLY 0x0010
#define UDF_PART_FLAG_WRITE_ONCE 0x0020
#define UDF_PART_FLAG_REWRITABLE 0x0040
@@ -50,8 +50,6 @@
#define UDF_INVALID_MODE ((umode_t)-1)
-#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */
-
#define MF_DUPLICATE_MD 0x01
#define MF_MIRROR_FE_LOADED 0x02
@@ -93,10 +91,6 @@ struct udf_part_map {
struct udf_bitmap *s_bitmap;
struct inode *s_table;
} s_uspace;
- union {
- struct udf_bitmap *s_bitmap;
- struct inode *s_table;
- } s_fspace;
__u32 s_partition_root;
__u32 s_partition_len;
__u16 s_partition_type;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index bfa0ec69f924..356d2b8568c1 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1026,7 +1026,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
struct userfaultfd_ctx *fork_nctx = NULL;
/* always take the fd_wqh lock before the fault_pending_wqh lock */
- spin_lock(&ctx->fd_wqh.lock);
+ spin_lock_irq(&ctx->fd_wqh.lock);
__add_wait_queue(&ctx->fd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -1112,13 +1112,13 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
ret = -EAGAIN;
break;
}
- spin_unlock(&ctx->fd_wqh.lock);
+ spin_unlock_irq(&ctx->fd_wqh.lock);
schedule();
- spin_lock(&ctx->fd_wqh.lock);
+ spin_lock_irq(&ctx->fd_wqh.lock);
}
__remove_wait_queue(&ctx->fd_wqh, &wait);
__set_current_state(TASK_RUNNING);
- spin_unlock(&ctx->fd_wqh.lock);
+ spin_unlock_irq(&ctx->fd_wqh.lock);
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);