diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/btrfs/inode.c | 4 | ||||
| -rw-r--r-- | fs/cifs/cifs_dfs_ref.c | 2 | ||||
| -rw-r--r-- | fs/cifs/cifsfs.c | 2 | ||||
| -rw-r--r-- | fs/cifs/cifsglob.h | 7 | ||||
| -rw-r--r-- | fs/cifs/cifsproto.h | 5 | ||||
| -rw-r--r-- | fs/cifs/cifssmb.c | 3 | ||||
| -rw-r--r-- | fs/cifs/dir.c | 1 | ||||
| -rw-r--r-- | fs/cifs/file.c | 19 | ||||
| -rw-r--r-- | fs/cifs/inode.c | 16 | ||||
| -rw-r--r-- | fs/cifs/smb1ops.c | 2 | ||||
| -rw-r--r-- | fs/cifs/smb2inode.c | 4 | ||||
| -rw-r--r-- | fs/cifs/smb2ops.c | 3 | ||||
| -rw-r--r-- | fs/cifs/smb2pdu.c | 1 | ||||
| -rw-r--r-- | fs/crypto/keysetup.c | 9 | ||||
| -rw-r--r-- | fs/debugfs/file.c | 17 | ||||
| -rw-r--r-- | fs/ext4/super.c | 6 | ||||
| -rw-r--r-- | fs/fat/inode.c | 19 | ||||
| -rw-r--r-- | fs/fcntl.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/inode.c | 2 | ||||
| -rw-r--r-- | fs/io-wq.c | 77 | ||||
| -rw-r--r-- | fs/io-wq.h | 16 | ||||
| -rw-r--r-- | fs/io_uring.c | 157 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 8 | ||||
| -rw-r--r-- | fs/locks.c | 14 | ||||
| -rw-r--r-- | fs/open.c | 3 | ||||
| -rw-r--r-- | fs/zonefs/Kconfig | 1 | ||||
| -rw-r--r-- | fs/zonefs/super.c | 8 | 
27 files changed, 196 insertions, 216 deletions
| diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1ccb3f8d528d..27076ebadb36 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7783,6 +7783,7 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,  {  	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);  	struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); +	u16 csum_size;  	blk_status_t ret;  	/* @@ -7802,7 +7803,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,  	file_offset -= dip->logical_offset;  	file_offset >>= inode->i_sb->s_blocksize_bits; -	io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); +	csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy); +	io_bio->csum = orig_io_bio->csum + csum_size * file_offset;  	return 0;  } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 606f26d862dc..cc3ada12848d 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -324,6 +324,8 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)  	if (full_path == NULL)  		goto cdda_exit; +	convert_delimiter(full_path, '\\'); +  	cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path);  	if (!cifs_sb_master_tlink(cifs_sb)) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 46ebaf3f0824..fa77fe5258b0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -530,6 +530,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)  	if (tcon->seal)  		seq_puts(s, ",seal"); +	else if (tcon->ses->server->ignore_signature) +		seq_puts(s, ",signloosely");  	if (tcon->nocase)  		seq_puts(s, ",nocase");  	if (tcon->local_lease) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de82cfa44b1a..0d956360e984 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1281,6 +1281,7 @@ struct cifs_fid {  	__u64 volatile_fid;	/* volatile file id for smb2 */  	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for smb2 */  	__u8 create_guid[16]; +	__u32 access;  	struct cifs_pending_open *pending_open;  	unsigned int epoch;  #ifdef CONFIG_CIFS_DEBUG2 @@ -1741,6 +1742,12 @@ static inline bool is_retryable_error(int error)  	return false;  } + +/* cifs_get_writable_file() flags */ +#define FIND_WR_ANY         0 +#define FIND_WR_FSUID_ONLY  1 +#define FIND_WR_WITH_DELETE 2 +  #define   MID_FREE 0  #define   MID_REQUEST_ALLOCATED 1  #define   MID_REQUEST_SUBMITTED 2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 89eaaf46d1ca..e5cb681ec138 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -134,11 +134,12 @@ extern bool backup_cred(struct cifs_sb_info *);  extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);  extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,  			    unsigned int bytes_written); -extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); +extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int);  extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, -				  bool fsuid_only, +				  int flags,  				  struct cifsFileInfo **ret_file);  extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, +				  int flags,  				  struct cifsFileInfo **ret_file);  extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);  extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3c89569e7210..6f6fb3606a5d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1492,6 +1492,7 @@ openRetry:  	*oplock = rsp->OplockLevel;  	/* cifs fid stays in le */  	oparms->fid->netfid = rsp->Fid; +	oparms->fid->access = desired_access;  	/* Let caller know file was created so we can set the mode. */  	/* Do we care about the CreateAction in any other cases? */ @@ -2115,7 +2116,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)  		wdata2->tailsz = tailsz;  		wdata2->bytes = cur_len; -		rc = cifs_get_writable_file(CIFS_I(inode), false, +		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,  					    &wdata2->cfile);  		if (!wdata2->cfile) {  			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 0ef099442f20..36e7b2fd2190 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -555,7 +555,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,  		if (server->ops->close)  			server->ops->close(xid, tcon, &fid);  		cifs_del_pending_open(&open); -		fput(file);  		rc = -ENOMEM;  	} diff --git a/fs/cifs/file.c b/fs/cifs/file.c index bc9516ab4b34..3b942ecdd4be 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1958,7 +1958,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,  /* Return -EBADF if no handle is found and general rc otherwise */  int -cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, +cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,  		       struct cifsFileInfo **ret_file)  {  	struct cifsFileInfo *open_file, *inv_file = NULL; @@ -1966,7 +1966,8 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,  	bool any_available = false;  	int rc = -EBADF;  	unsigned int refind = 0; - +	bool fsuid_only = flags & FIND_WR_FSUID_ONLY; +	bool with_delete = flags & FIND_WR_WITH_DELETE;  	*ret_file = NULL;  	/* @@ -1998,6 +1999,8 @@ refind_writable:  			continue;  		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))  			continue; +		if (with_delete && !(open_file->fid.access & DELETE)) +			continue;  		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {  			if (!open_file->invalidHandle) {  				/* found a good writable file */ @@ -2045,12 +2048,12 @@ refind_writable:  }  struct cifsFileInfo * -find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) +find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)  {  	struct cifsFileInfo *cfile;  	int rc; -	rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile); +	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);  	if (rc)  		cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc); @@ -2059,6 +2062,7 @@ find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)  int  cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, +		       int flags,  		       struct cifsFileInfo **ret_file)  {  	struct list_head *tmp; @@ -2085,7 +2089,7 @@ cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,  		kfree(full_path);  		cinode = CIFS_I(d_inode(cfile->dentry));  		spin_unlock(&tcon->open_file_lock); -		return cifs_get_writable_file(cinode, 0, ret_file); +		return cifs_get_writable_file(cinode, flags, ret_file);  	}  	spin_unlock(&tcon->open_file_lock); @@ -2162,7 +2166,8 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)  	if (mapping->host->i_size - offset < (loff_t)to)  		to = (unsigned)(mapping->host->i_size - offset); -	rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file); +	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, +				    &open_file);  	if (!rc) {  		bytes_written = cifs_write(open_file, open_file->pid,  					   write_data, to - from, &offset); @@ -2355,7 +2360,7 @@ retry:  		if (cfile)  			cifsFileInfo_put(cfile); -		rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile); +		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);  		/* in case of an error store it to return later */  		if (rc) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b5e6635c578e..1e8a4b1579db 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -653,8 +653,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,  		 */  		if ((fattr->cf_nlink < 1) && !tcon->unix_ext &&  		    !info->DeletePending) { -			cifs_dbg(1, "bogus file nlink value %u\n", -				fattr->cf_nlink); +			cifs_dbg(VFS, "bogus file nlink value %u\n", +				 fattr->cf_nlink);  			fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;  		}  	} @@ -2073,6 +2073,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)  	struct inode *inode = d_inode(dentry);  	struct super_block *sb = dentry->d_sb;  	char *full_path = NULL; +	int count = 0;  	if (inode == NULL)  		return -ENOENT; @@ -2094,15 +2095,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)  		 full_path, inode, inode->i_count.counter,  		 dentry, cifs_get_time(dentry), jiffies); +again:  	if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)  		rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);  	else  		rc = cifs_get_inode_info(&inode, full_path, NULL, sb,  					 xid, NULL); - +	if (rc == -EAGAIN && count++ < 10) +		goto again;  out:  	kfree(full_path);  	free_xid(xid); +  	return rc;  } @@ -2278,7 +2282,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,  	 * writebehind data than the SMB timeout for the SetPathInfo  	 * request would allow  	 */ -	open_file = find_writable_file(cifsInode, true); +	open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY);  	if (open_file) {  		tcon = tlink_tcon(open_file->tlink);  		server = tcon->ses->server; @@ -2428,7 +2432,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)  		args->ctime = NO_CHANGE_64;  	args->device = 0; -	open_file = find_writable_file(cifsInode, true); +	open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY);  	if (open_file) {  		u16 nfid = open_file->fid.netfid;  		u32 npid = open_file->pid; @@ -2531,7 +2535,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)  	rc = 0;  	if (attrs->ia_valid & ATTR_MTIME) { -		rc = cifs_get_writable_file(cifsInode, false, &wfile); +		rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile);  		if (!rc) {  			tcon = tlink_tcon(wfile->tlink);  			rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index eb994e313c6a..b130efaf8feb 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -766,7 +766,7 @@ smb_set_file_info(struct inode *inode, const char *full_path,  	struct cifs_tcon *tcon;  	/* if the file is already open for write, just use that fileid */ -	open_file = find_writable_file(cinode, true); +	open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY);  	if (open_file) {  		fid.netfid = open_file->fid.netfid;  		netpid = open_file->pid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1cf207564ff9..a8c301ae00ed 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -521,7 +521,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,  	cifs_i = CIFS_I(inode);  	dosattrs = cifs_i->cifsAttrs | ATTR_READONLY;  	data.Attributes = cpu_to_le32(dosattrs); -	cifs_get_writable_path(tcon, name, &cfile); +	cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile);  	tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,  				 FILE_WRITE_ATTRIBUTES, FILE_CREATE,  				 CREATE_NOT_FILE, ACL_NO_MODE, @@ -577,7 +577,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,  {  	struct cifsFileInfo *cfile; -	cifs_get_writable_path(tcon, from_name, &cfile); +	cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile);  	return smb2_set_path_attr(xid, tcon, from_name, to_name,  				  cifs_sb, DELETE, SMB2_OP_RENAME, cfile); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e47190cae163..c31e84ee3c39 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1364,6 +1364,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)  	cfile->fid.persistent_fid = fid->persistent_fid;  	cfile->fid.volatile_fid = fid->volatile_fid; +	cfile->fid.access = fid->access;  #ifdef CONFIG_CIFS_DEBUG2  	cfile->fid.mid = fid->mid;  #endif /* CIFS_DEBUG2 */ @@ -3327,7 +3328,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs  	 * some servers (Windows2016) will not reflect recent writes in  	 * QUERY_ALLOCATED_RANGES until SMB2_flush is called.  	 */ -	wrcfile = find_writable_file(cifsi, false); +	wrcfile = find_writable_file(cifsi, FIND_WR_ANY);  	if (wrcfile) {  		filemap_write_and_wait(inode->i_mapping);  		smb2_flush_file(xid, tcon, &wrcfile->fid); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1234f9ccab03..28c0be5e69b7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2771,6 +2771,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,  	atomic_inc(&tcon->num_remote_opens);  	oparms->fid->persistent_fid = rsp->PersistentFileId;  	oparms->fid->volatile_fid = rsp->VolatileFileId; +	oparms->fid->access = oparms->desired_access;  #ifdef CONFIG_CIFS_DEBUG2  	oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId);  #endif /* CIFS_DEBUG2 */ diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 65cb09fa6ead..08c9f216a54d 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -539,6 +539,15 @@ int fscrypt_drop_inode(struct inode *inode)  	mk = ci->ci_master_key->payload.data[0];  	/* +	 * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes +	 * protected by the key were cleaned by sync_filesystem().  But if +	 * userspace is still using the files, inodes can be dirtied between +	 * then and now.  We mustn't lose any writes, so skip dirty inodes here. +	 */ +	if (inode->i_state & I_DIRTY_ALL) +		return 0; + +	/*  	 * Note: since we aren't holding ->mk_secret_sem, the result here can  	 * immediately become outdated.  But there's no correctness problem with  	 * unnecessarily evicting.  Nor is there a correctness problem with not diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 634b09d18b77..db987b5110a9 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1090,21 +1090,12 @@ static const struct file_operations fops_regset32 = {   * This function creates a file in debugfs with the given name that reports   * the names and values of a set of 32-bit registers. If the @mode variable   * is so set it can be read from. Writing is not supported. - * - * This function will return a pointer to a dentry if it succeeds.  This - * pointer must be passed to the debugfs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.)  If an error occurs, ERR_PTR(-ERROR) will be - * returned. - * - * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will - * be returned.   */ -struct dentry *debugfs_create_regset32(const char *name, umode_t mode, -				       struct dentry *parent, -				       struct debugfs_regset32 *regset) +void debugfs_create_regset32(const char *name, umode_t mode, +			     struct dentry *parent, +			     struct debugfs_regset32 *regset)  { -	return debugfs_create_file(name, mode, parent, regset, &fops_regset32); +	debugfs_create_file(name, mode, parent, regset, &fops_regset32);  }  EXPORT_SYMBOL_GPL(debugfs_create_regset32); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ff1b764b0c0e..0c7c4adb664e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2391,7 +2391,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)  {  	struct ext4_sb_info *sbi = EXT4_SB(sb);  	struct flex_groups **old_groups, **new_groups; -	int size, i; +	int size, i, j;  	if (!sbi->s_log_groups_per_flex)  		return 0; @@ -2412,8 +2412,8 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)  					 sizeof(struct flex_groups)),  					 GFP_KERNEL);  		if (!new_groups[i]) { -			for (i--; i >= sbi->s_flex_groups_allocated; i--) -				kvfree(new_groups[i]); +			for (j = sbi->s_flex_groups_allocated; j < i; j++) +				kvfree(new_groups[j]);  			kvfree(new_groups);  			ext4_msg(sb, KERN_ERR,  				 "not enough memory for %d flex groups", size); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 594b05ae16c9..71946da84388 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,6 +750,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb)  		return NULL;  	init_rwsem(&ei->truncate_lock); +	/* Zeroing to allow iput() even if partial initialized inode. */ +	ei->mmu_private = 0; +	ei->i_start = 0; +	ei->i_logstart = 0; +	ei->i_attrs = 0; +	ei->i_pos = 0; +  	return &ei->vfs_inode;  } @@ -1374,16 +1381,6 @@ out:  	return 0;  } -static void fat_dummy_inode_init(struct inode *inode) -{ -	/* Initialize this dummy inode to work as no-op. */ -	MSDOS_I(inode)->mmu_private = 0; -	MSDOS_I(inode)->i_start = 0; -	MSDOS_I(inode)->i_logstart = 0; -	MSDOS_I(inode)->i_attrs = 0; -	MSDOS_I(inode)->i_pos = 0; -} -  static int fat_read_root(struct inode *inode)  {  	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1844,13 +1841,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,  	fat_inode = new_inode(sb);  	if (!fat_inode)  		goto out_fail; -	fat_dummy_inode_init(fat_inode);  	sbi->fat_inode = fat_inode;  	fsinfo_inode = new_inode(sb);  	if (!fsinfo_inode)  		goto out_fail; -	fat_dummy_inode_init(fsinfo_inode);  	fsinfo_inode->i_ino = MSDOS_FSINFO_INO;  	sbi->fsinfo_inode = fsinfo_inode;  	insert_inode_hash(fsinfo_inode); diff --git a/fs/fcntl.c b/fs/fcntl.c index 9bc167562ee8..2e4c0fa2074b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -735,8 +735,9 @@ static void send_sigio_to_task(struct task_struct *p,  		return;  	switch (signum) { -		kernel_siginfo_t si; -		default: +		default: { +			kernel_siginfo_t si; +  			/* Queue a rt signal with the appropriate fd as its  			   value.  We use SI_SIGIO as the source, not   			   SI_KERNEL, since kernel signals always get  @@ -769,6 +770,7 @@ static void send_sigio_to_task(struct task_struct *p,  			si.si_fd    = fd;  			if (!do_send_sig_info(signum, &si, p, type))  				break; +		}  		/* fall-through - fall back on the old plain SIGIO signal */  		case 0:  			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2716d56ed0a0..8294851a9dd9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1248,7 +1248,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,  		if (!(file->f_mode & FMODE_OPENED))  			return finish_no_open(file, d);  		dput(d); -		return 0; +		return excl && (flags & O_CREAT) ? -EEXIST : 0;  	}  	BUG_ON(d != NULL); diff --git a/fs/io-wq.c b/fs/io-wq.c index 0a5ab1a8f69a..5cef075c0b37 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -502,7 +502,7 @@ next:  		if (worker->mm)  			work->flags |= IO_WQ_WORK_HAS_MM; -		if (wq->get_work && !(work->flags & IO_WQ_WORK_INTERNAL)) { +		if (wq->get_work) {  			put_work = work;  			wq->get_work(work);  		} @@ -535,42 +535,23 @@ next:  	} while (1);  } -static inline void io_worker_spin_for_work(struct io_wqe *wqe) -{ -	int i = 0; - -	while (++i < 1000) { -		if (io_wqe_run_queue(wqe)) -			break; -		if (need_resched()) -			break; -		cpu_relax(); -	} -} -  static int io_wqe_worker(void *data)  {  	struct io_worker *worker = data;  	struct io_wqe *wqe = worker->wqe;  	struct io_wq *wq = wqe->wq; -	bool did_work;  	io_worker_start(wqe, worker); -	did_work = false;  	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {  		set_current_state(TASK_INTERRUPTIBLE);  loop: -		if (did_work) -			io_worker_spin_for_work(wqe);  		spin_lock_irq(&wqe->lock);  		if (io_wqe_run_queue(wqe)) {  			__set_current_state(TASK_RUNNING);  			io_worker_handle_work(worker); -			did_work = true;  			goto loop;  		} -		did_work = false;  		/* drops the lock on success, retry */  		if (__io_worker_idle(wqe, worker)) {  			__release(&wqe->lock); @@ -766,6 +747,17 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,  	return true;  } +static void io_run_cancel(struct io_wq_work *work) +{ +	do { +		struct io_wq_work *old_work = work; + +		work->flags |= IO_WQ_WORK_CANCEL; +		work->func(&work); +		work = (work == old_work) ? NULL : work; +	} while (work); +} +  static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)  {  	struct io_wqe_acct *acct = io_work_get_acct(wqe, work); @@ -779,8 +771,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)  	 * It's close enough to not be an issue, fork() has the same delay.  	 */  	if (unlikely(!io_wq_can_queue(wqe, acct, work))) { -		work->flags |= IO_WQ_WORK_CANCEL; -		work->func(&work); +		io_run_cancel(work);  		return;  	} @@ -919,8 +910,7 @@ static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe,  	spin_unlock_irqrestore(&wqe->lock, flags);  	if (found) { -		work->flags |= IO_WQ_WORK_CANCEL; -		work->func(&work); +		io_run_cancel(work);  		return IO_WQ_CANCEL_OK;  	} @@ -995,8 +985,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,  	spin_unlock_irqrestore(&wqe->lock, flags);  	if (found) { -		work->flags |= IO_WQ_WORK_CANCEL; -		work->func(&work); +		io_run_cancel(work);  		return IO_WQ_CANCEL_OK;  	} @@ -1068,42 +1057,6 @@ enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)  	return ret;  } -struct io_wq_flush_data { -	struct io_wq_work work; -	struct completion done; -}; - -static void io_wq_flush_func(struct io_wq_work **workptr) -{ -	struct io_wq_work *work = *workptr; -	struct io_wq_flush_data *data; - -	data = container_of(work, struct io_wq_flush_data, work); -	complete(&data->done); -} - -/* - * Doesn't wait for previously queued work to finish. When this completes, - * it just means that previously queued work was started. - */ -void io_wq_flush(struct io_wq *wq) -{ -	struct io_wq_flush_data data; -	int node; - -	for_each_node(node) { -		struct io_wqe *wqe = wq->wqes[node]; - -		if (!node_online(node)) -			continue; -		init_completion(&data.done); -		INIT_IO_WORK(&data.work, io_wq_flush_func); -		data.work.flags |= IO_WQ_WORK_INTERNAL; -		io_wqe_enqueue(wqe, &data.work); -		wait_for_completion(&data.done); -	} -} -  struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)  {  	int ret = -ENOMEM, node; diff --git a/fs/io-wq.h b/fs/io-wq.h index ccc7d84af57d..e5e15f2c93ec 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -8,7 +8,6 @@ enum {  	IO_WQ_WORK_HAS_MM	= 2,  	IO_WQ_WORK_HASHED	= 4,  	IO_WQ_WORK_UNBOUND	= 32, -	IO_WQ_WORK_INTERNAL	= 64,  	IO_WQ_WORK_CB		= 128,  	IO_WQ_WORK_NO_CANCEL	= 256,  	IO_WQ_WORK_CONCURRENT	= 512, @@ -79,16 +78,10 @@ struct io_wq_work {  	pid_t task_pid;  }; -#define INIT_IO_WORK(work, _func)			\ -	do {						\ -		(work)->list.next = NULL;		\ -		(work)->func = _func;			\ -		(work)->files = NULL;			\ -		(work)->mm = NULL;			\ -		(work)->creds = NULL;			\ -		(work)->fs = NULL;			\ -		(work)->flags = 0;			\ -	} while (0)					\ +#define INIT_IO_WORK(work, _func)				\ +	do {							\ +		*(work) = (struct io_wq_work){ .func = _func };	\ +	} while (0)						\  typedef void (get_work_fn)(struct io_wq_work *);  typedef void (put_work_fn)(struct io_wq_work *); @@ -106,7 +99,6 @@ void io_wq_destroy(struct io_wq *wq);  void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);  void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val); -void io_wq_flush(struct io_wq *wq);  void io_wq_cancel_all(struct io_wq *wq);  enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork); diff --git a/fs/io_uring.c b/fs/io_uring.c index de650df9ac53..c06082bb039a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -183,19 +183,15 @@ struct fixed_file_table {  	struct file		**files;  }; -enum { -	FFD_F_ATOMIC, -}; -  struct fixed_file_data {  	struct fixed_file_table		*table;  	struct io_ring_ctx		*ctx;  	struct percpu_ref		refs;  	struct llist_head		put_llist; -	unsigned long			state;  	struct work_struct		ref_work;  	struct completion		done; +	struct rcu_head			rcu;  };  struct io_ring_ctx { @@ -1004,6 +1000,7 @@ static void io_kill_timeout(struct io_kiocb *req)  	if (ret != -1) {  		atomic_inc(&req->ctx->cq_timeouts);  		list_del_init(&req->list); +		req->flags |= REQ_F_COMP_LOCKED;  		io_cqring_fill_event(req, 0);  		io_put_req(req);  	} @@ -1483,10 +1480,10 @@ static void io_free_req(struct io_kiocb *req)  __attribute__((nonnull))  static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)  { -	io_req_find_next(req, nxtptr); - -	if (refcount_dec_and_test(&req->refs)) +	if (refcount_dec_and_test(&req->refs)) { +		io_req_find_next(req, nxtptr);  		__io_free_req(req); +	}  }  static void io_put_req(struct io_kiocb *req) @@ -1821,6 +1818,10 @@ static void io_iopoll_req_issued(struct io_kiocb *req)  		list_add(&req->list, &ctx->poll_list);  	else  		list_add_tail(&req->list, &ctx->poll_list); + +	if ((ctx->flags & IORING_SETUP_SQPOLL) && +	    wq_has_sleeper(&ctx->sqo_wait)) +		wake_up(&ctx->sqo_wait);  }  static void io_file_put(struct io_submit_state *state) @@ -2071,7 +2072,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,  		ssize_t ret;  		ret = import_single_range(rw, buf, sqe_len, *iovec, iter);  		*iovec = NULL; -		return ret; +		return ret < 0 ? ret : sqe_len;  	}  	if (req->io) { @@ -3002,6 +3003,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));  	sr->len = READ_ONCE(sqe->len); +#ifdef CONFIG_COMPAT +	if (req->ctx->compat) +		sr->msg_flags |= MSG_CMSG_COMPAT; +#endif +  	if (!io || req->opcode == IORING_OP_SEND)  		return 0;  	/* iovec is already imported */ @@ -3154,6 +3160,11 @@ static int io_recvmsg_prep(struct io_kiocb *req,  	sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));  	sr->len = READ_ONCE(sqe->len); +#ifdef CONFIG_COMPAT +	if (req->ctx->compat) +		sr->msg_flags |= MSG_CMSG_COMPAT; +#endif +  	if (!io || req->opcode == IORING_OP_RECV)  		return 0;  	/* iovec is already imported */ @@ -4705,11 +4716,21 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)  {  	struct io_kiocb *linked_timeout;  	struct io_kiocb *nxt = NULL; +	const struct cred *old_creds = NULL;  	int ret;  again:  	linked_timeout = io_prep_linked_timeout(req); +	if (req->work.creds && req->work.creds != current_cred()) { +		if (old_creds) +			revert_creds(old_creds); +		if (old_creds == req->work.creds) +			old_creds = NULL; /* restored original creds */ +		else +			old_creds = override_creds(req->work.creds); +	} +  	ret = io_issue_sqe(req, sqe, &nxt, true);  	/* @@ -4735,7 +4756,7 @@ punt:  err:  	/* drop submission reference */ -	io_put_req(req); +	io_put_req_find_next(req, &nxt);  	if (linked_timeout) {  		if (!ret) @@ -4759,6 +4780,8 @@ done_req:  			goto punt;  		goto again;  	} +	if (old_creds) +		revert_creds(old_creds);  }  static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -4803,7 +4826,6 @@ static inline void io_queue_link_head(struct io_kiocb *req)  static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,  			  struct io_submit_state *state, struct io_kiocb **link)  { -	const struct cred *old_creds = NULL;  	struct io_ring_ctx *ctx = req->ctx;  	unsigned int sqe_flags;  	int ret, id; @@ -4818,14 +4840,12 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,  	id = READ_ONCE(sqe->personality);  	if (id) { -		const struct cred *personality_creds; - -		personality_creds = idr_find(&ctx->personality_idr, id); -		if (unlikely(!personality_creds)) { +		req->work.creds = idr_find(&ctx->personality_idr, id); +		if (unlikely(!req->work.creds)) {  			ret = -EINVAL;  			goto err_req;  		} -		old_creds = override_creds(personality_creds); +		get_cred(req->work.creds);  	}  	/* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -4837,8 +4857,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,  err_req:  		io_cqring_add_event(req, ret);  		io_double_put_req(req); -		if (old_creds) -			revert_creds(old_creds);  		return false;  	} @@ -4899,8 +4917,6 @@ err_req:  		}  	} -	if (old_creds) -		revert_creds(old_creds);  	return true;  } @@ -5081,9 +5097,8 @@ static int io_sq_thread(void *data)  	const struct cred *old_cred;  	mm_segment_t old_fs;  	DEFINE_WAIT(wait); -	unsigned inflight;  	unsigned long timeout; -	int ret; +	int ret = 0;  	complete(&ctx->completions[1]); @@ -5091,39 +5106,19 @@ static int io_sq_thread(void *data)  	set_fs(USER_DS);  	old_cred = override_creds(ctx->creds); -	ret = timeout = inflight = 0; +	timeout = jiffies + ctx->sq_thread_idle;  	while (!kthread_should_park()) {  		unsigned int to_submit; -		if (inflight) { +		if (!list_empty(&ctx->poll_list)) {  			unsigned nr_events = 0; -			if (ctx->flags & IORING_SETUP_IOPOLL) { -				/* -				 * inflight is the count of the maximum possible -				 * entries we submitted, but it can be smaller -				 * if we dropped some of them. If we don't have -				 * poll entries available, then we know that we -				 * have nothing left to poll for. Reset the -				 * inflight count to zero in that case. -				 */ -				mutex_lock(&ctx->uring_lock); -				if (!list_empty(&ctx->poll_list)) -					io_iopoll_getevents(ctx, &nr_events, 0); -				else -					inflight = 0; -				mutex_unlock(&ctx->uring_lock); -			} else { -				/* -				 * Normal IO, just pretend everything completed. -				 * We don't have to poll completions for that. -				 */ -				nr_events = inflight; -			} - -			inflight -= nr_events; -			if (!inflight) +			mutex_lock(&ctx->uring_lock); +			if (!list_empty(&ctx->poll_list)) +				io_iopoll_getevents(ctx, &nr_events, 0); +			else  				timeout = jiffies + ctx->sq_thread_idle; +			mutex_unlock(&ctx->uring_lock);  		}  		to_submit = io_sqring_entries(ctx); @@ -5152,7 +5147,7 @@ static int io_sq_thread(void *data)  			 * more IO, we should wait for the application to  			 * reap events and wake us up.  			 */ -			if (inflight || +			if (!list_empty(&ctx->poll_list) ||  			    (!time_after(jiffies, timeout) && ret != -EBUSY &&  			    !percpu_ref_is_dying(&ctx->refs))) {  				cond_resched(); @@ -5162,6 +5157,19 @@ static int io_sq_thread(void *data)  			prepare_to_wait(&ctx->sqo_wait, &wait,  						TASK_INTERRUPTIBLE); +			/* +			 * While doing polled IO, before going to sleep, we need +			 * to check if there are new reqs added to poll_list, it +			 * is because reqs may have been punted to io worker and +			 * will be added to poll_list later, hence check the +			 * poll_list again. +			 */ +			if ((ctx->flags & IORING_SETUP_IOPOLL) && +			    !list_empty_careful(&ctx->poll_list)) { +				finish_wait(&ctx->sqo_wait, &wait); +				continue; +			} +  			/* Tell userspace we may need a wakeup call */  			ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;  			/* make sure to read SQ tail after writing flags */ @@ -5189,8 +5197,7 @@ static int io_sq_thread(void *data)  		mutex_lock(&ctx->uring_lock);  		ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);  		mutex_unlock(&ctx->uring_lock); -		if (ret > 0) -			inflight += ret; +		timeout = jiffies + ctx->sq_thread_idle;  	}  	set_fs(old_fs); @@ -5324,6 +5331,26 @@ static void io_file_ref_kill(struct percpu_ref *ref)  	complete(&data->done);  } +static void __io_file_ref_exit_and_free(struct rcu_head *rcu) +{ +	struct fixed_file_data *data = container_of(rcu, struct fixed_file_data, +							rcu); +	percpu_ref_exit(&data->refs); +	kfree(data); +} + +static void io_file_ref_exit_and_free(struct rcu_head *rcu) +{ +	/* +	 * We need to order our exit+free call against the potentially +	 * existing call_rcu() for switching to atomic. One way to do that +	 * is to have this rcu callback queue the final put and free, as we +	 * could otherwise have a pre-existing atomic switch complete _after_ +	 * the free callback we queued. +	 */ +	call_rcu(rcu, __io_file_ref_exit_and_free); +} +  static int io_sqe_files_unregister(struct io_ring_ctx *ctx)  {  	struct fixed_file_data *data = ctx->file_data; @@ -5336,14 +5363,13 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)  	flush_work(&data->ref_work);  	wait_for_completion(&data->done);  	io_ring_file_ref_flush(data); -	percpu_ref_exit(&data->refs);  	__io_sqe_files_unregister(ctx);  	nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);  	for (i = 0; i < nr_tables; i++)  		kfree(data->table[i].files);  	kfree(data->table); -	kfree(data); +	call_rcu(&data->rcu, io_file_ref_exit_and_free);  	ctx->file_data = NULL;  	ctx->nr_user_files = 0;  	return 0; @@ -5595,7 +5621,6 @@ static void io_ring_file_ref_switch(struct work_struct *work)  	data = container_of(work, struct fixed_file_data, ref_work);  	io_ring_file_ref_flush(data); -	percpu_ref_get(&data->refs);  	percpu_ref_switch_to_percpu(&data->refs);  } @@ -5771,8 +5796,13 @@ static void io_atomic_switch(struct percpu_ref *ref)  {  	struct fixed_file_data *data; +	/* +	 * Juggle reference to ensure we hit zero, if needed, so we can +	 * switch back to percpu mode +	 */  	data = container_of(ref, struct fixed_file_data, refs); -	clear_bit(FFD_F_ATOMIC, &data->state); +	percpu_ref_put(&data->refs); +	percpu_ref_get(&data->refs);  }  static bool io_queue_file_removal(struct fixed_file_data *data, @@ -5795,11 +5825,7 @@ static bool io_queue_file_removal(struct fixed_file_data *data,  	llist_add(&pfile->llist, &data->put_llist);  	if (pfile == &pfile_stack) { -		if (!test_and_set_bit(FFD_F_ATOMIC, &data->state)) { -			percpu_ref_put(&data->refs); -			percpu_ref_switch_to_atomic(&data->refs, -							io_atomic_switch); -		} +		percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);  		wait_for_completion(&done);  		flush_work(&data->ref_work);  		return false; @@ -5873,10 +5899,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,  		up->offset++;  	} -	if (ref_switch && !test_and_set_bit(FFD_F_ATOMIC, &data->state)) { -		percpu_ref_put(&data->refs); +	if (ref_switch)  		percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch); -	}  	return done ? done : err;  } @@ -6334,6 +6358,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)  	io_sqe_buffer_unregister(ctx);  	io_sqe_files_unregister(ctx);  	io_eventfd_unregister(ctx); +	idr_destroy(&ctx->personality_idr);  #if defined(CONFIG_UNIX)  	if (ctx->ring_sock) { @@ -6647,6 +6672,7 @@ out_fput:  	return submitted ? submitted : ret;  } +#ifdef CONFIG_PROC_FS  static int io_uring_show_cred(int id, void *p, void *data)  {  	const struct cred *cred = p; @@ -6720,6 +6746,7 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)  		percpu_ref_put(&ctx->refs);  	}  } +#endif  static const struct file_operations io_uring_fops = {  	.release	= io_uring_release, @@ -6731,7 +6758,9 @@ static const struct file_operations io_uring_fops = {  #endif  	.poll		= io_uring_poll,  	.fasync		= io_uring_fasync, +#ifdef CONFIG_PROC_FS  	.show_fdinfo	= io_uring_show_fdinfo, +#endif  };  static int io_allocate_scq_urings(struct io_ring_ctx *ctx, diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d181948c0390..3dccc23cf010 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1150,8 +1150,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,  	/* For undo access buffer must have data copied */  	if (undo && !jh->b_committed_data)  		goto out; -	if (jh->b_transaction != handle->h_transaction && -	    jh->b_next_transaction != handle->h_transaction) +	if (READ_ONCE(jh->b_transaction) != handle->h_transaction && +	    READ_ONCE(jh->b_next_transaction) != handle->h_transaction)  		goto out;  	/*  	 * There are two reasons for the barrier here: @@ -2569,8 +2569,8 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh)  	 * our jh reference and thus __jbd2_journal_file_buffer() must not  	 * take a new one.  	 */ -	jh->b_transaction = jh->b_next_transaction; -	jh->b_next_transaction = NULL; +	WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); +	WRITE_ONCE(jh->b_next_transaction, NULL);  	if (buffer_freed(bh))  		jlist = BJ_Forget;  	else if (jh->b_modified) diff --git a/fs/locks.c b/fs/locks.c index 44b6da032842..426b55d333d5 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -753,20 +753,6 @@ int locks_delete_block(struct file_lock *waiter)  {  	int status = -ENOENT; -	/* -	 * If fl_blocker is NULL, it won't be set again as this thread -	 * "owns" the lock and is the only one that might try to claim -	 * the lock.  So it is safe to test fl_blocker locklessly. -	 * Also if fl_blocker is NULL, this waiter is not listed on -	 * fl_blocked_requests for some lock, so no other request can -	 * be added to the list of fl_blocked_requests for this -	 * request.  So if fl_blocker is NULL, it is safe to -	 * locklessly check if fl_blocked_requests is empty.  If both -	 * of these checks succeed, there is no need to take the lock. -	 */ -	if (waiter->fl_blocker == NULL && -	    list_empty(&waiter->fl_blocked_requests)) -		return status;  	spin_lock(&blocked_lock_lock);  	if (waiter->fl_blocker)  		status = 0; diff --git a/fs/open.c b/fs/open.c index 0788b3715731..b69d6eed67e6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -860,9 +860,6 @@ cleanup_file:   * the return value of d_splice_alias(), then the caller needs to perform dput()   * on it after finish_open().   * - * On successful return @file is a fully instantiated open file.  After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - *   * Returns zero on success or -errno if the open failed.   */  int finish_open(struct file *file, struct dentry *dentry, diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig index fb87ad372e29..ef2697b78820 100644 --- a/fs/zonefs/Kconfig +++ b/fs/zonefs/Kconfig @@ -2,6 +2,7 @@ config ZONEFS_FS  	tristate "zonefs filesystem support"  	depends on BLOCK  	depends on BLK_DEV_ZONED +	select FS_IOMAP  	help  	  zonefs is a simple file system which exposes zones of a zoned block  	  device (e.g. host-managed or host-aware SMR disk drives) as files. diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 8bc6ef82d693..69aee3dfb660 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -601,13 +601,13 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)  	ssize_t ret;  	/* -	 * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT +	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT  	 * as this can cause write reordering (e.g. the first aio gets EAGAIN  	 * on the inode lock but the second goes through but is now unaligned).  	 */ -	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) -	    && (iocb->ki_flags & IOCB_NOWAIT)) -		iocb->ki_flags &= ~IOCB_NOWAIT; +	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) && +	    (iocb->ki_flags & IOCB_NOWAIT)) +		return -EOPNOTSUPP;  	if (iocb->ki_flags & IOCB_NOWAIT) {  		if (!inode_trylock(inode)) |