diff options
Diffstat (limited to 'fs')
47 files changed, 436 insertions, 358 deletions
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0c9ab62c3df4..9dcaed031843 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1553,6 +1553,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, /* Flags */ #define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ +#define MID_DELETED 2 /* Mid has been dequeued/deleted */ /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7aa08dba4719..52d71b64c0c6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -659,7 +659,15 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) mid->mid_state = MID_RESPONSE_RECEIVED; else mid->mid_state = MID_RESPONSE_MALFORMED; - list_del_init(&mid->qhead); + /* + * Trying to handle/dequeue a mid after the send_recv() + * function has finished processing it is a bug. + */ + if (mid->mid_flags & MID_DELETED) + printk_once(KERN_WARNING + "trying to dequeue a deleted mid\n"); + else + list_del_init(&mid->qhead); spin_unlock(&GlobalMid_Lock); } @@ -938,8 +946,7 @@ next_pdu: } else { mids[0] = server->ops->find_mid(server, buf); bufs[0] = buf; - if (mids[0]) - num_mids = 1; + num_mids = 1; if (!mids[0] || !mids[0]->receive) length = standard_receive3(server, mids[0]); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d954ce36b473..89985a0a6819 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1477,7 +1477,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, } srch_inf->entries_in_buffer = 0; - srch_inf->index_of_last_entry = 0; + srch_inf->index_of_last_entry = 2; rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, fid->volatile_fid, 0, srch_inf); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 78f96fa3d7d9..b48f43963da6 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -142,7 +142,8 @@ void cifs_delete_mid(struct mid_q_entry *mid) { spin_lock(&GlobalMid_Lock); - list_del(&mid->qhead); + list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(mid); @@ -772,6 +773,11 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) return mid; } +static void +cifs_noop_callback(struct mid_q_entry *mid) +{ +} + int compound_send_recv(const unsigned int xid, struct cifs_ses *ses, const int flags, const int num_rqst, struct smb_rqst *rqst, @@ -826,8 +832,13 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, } midQ[i]->mid_state = MID_REQUEST_SUBMITTED; + /* + * We don't invoke the callback compounds unless it is the last + * request. + */ + if (i < num_rqst - 1) + midQ[i]->callback = cifs_noop_callback; } - cifs_in_send_inc(ses->server); rc = smb_send_rqst(ses->server, num_rqst, rqst, flags); cifs_in_send_dec(ses->server); @@ -908,6 +919,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, midQ[i]->resp_buf = NULL; } out: + /* + * This will dequeue all mids. After this it is important that the + * demultiplex_thread will not process any of these mids any futher. + * This is prevented above by using a noop callback that will not + * wake this thread except for the very last PDU. + */ for (i = 0; i < num_rqst; i++) cifs_delete_mid(midQ[i]); add_credits(ses->server, credits, optype); @@ -447,6 +447,7 @@ bool dax_lock_mapping_entry(struct page *page) xa_unlock_irq(&mapping->i_pages); break; } else if (IS_ERR(entry)) { + xa_unlock_irq(&mapping->i_pages); WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN); continue; } @@ -1120,21 +1121,12 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry, { struct inode *inode = mapping->host; unsigned long vaddr = vmf->address; - vm_fault_t ret = VM_FAULT_NOPAGE; - struct page *zero_page; - pfn_t pfn; - - zero_page = ZERO_PAGE(0); - if (unlikely(!zero_page)) { - ret = VM_FAULT_OOM; - goto out; - } + pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); + vm_fault_t ret; - pfn = page_to_pfn_t(zero_page); dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); -out: trace_dax_load_hole(inode, vmf, ret); return ret; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7f7ee18fe179..e4bb9386c045 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1448,6 +1448,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); + ext2_set_inode_flags(inode); ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); ei->i_frag_no = raw_inode->i_frag; ei->i_frag_size = raw_inode->i_fsize; @@ -1517,7 +1518,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } brelse (bh); - ext2_set_inode_flags(inode); unlock_new_inode(inode); return inode; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index e2902d394f1b..f93f9881ec18 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -76,7 +76,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len))) error_msg = "rec_len is too small for name_len"; else if (unlikely(((char *) de - buf) + rlen > size)) - error_msg = "directory entry across range"; + error_msg = "directory entry overrun"; else if (unlikely(le32_to_cpu(de->inode) > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))) error_msg = "inode out of bounds"; @@ -85,18 +85,16 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, if (filp) ext4_error_file(filp, function, line, bh->b_blocknr, - "bad entry in directory: %s - offset=%u(%u), " - "inode=%u, rec_len=%d, name_len=%d", - error_msg, (unsigned) (offset % size), - offset, le32_to_cpu(de->inode), - rlen, de->name_len); + "bad entry in directory: %s - offset=%u, " + "inode=%u, rec_len=%d, name_len=%d, size=%d", + error_msg, offset, le32_to_cpu(de->inode), + rlen, de->name_len, size); else ext4_error_inode(dir, function, line, bh->b_blocknr, - "bad entry in directory: %s - offset=%u(%u), " - "inode=%u, rec_len=%d, name_len=%d", - error_msg, (unsigned) (offset % size), - offset, le32_to_cpu(de->inode), - rlen, de->name_len); + "bad entry in directory: %s - offset=%u, " + "inode=%u, rec_len=%d, name_len=%d, size=%d", + error_msg, offset, le32_to_cpu(de->inode), + rlen, de->name_len, size); return 1; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0f0edd1cd0cd..caff935fbeb8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -43,6 +43,17 @@ #define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION) #include <linux/fscrypt.h> +#include <linux/compiler.h> + +/* Until this gets included into linux/compiler-gcc.h */ +#ifndef __nonstring +#if defined(GCC_VERSION) && (GCC_VERSION >= 80000) +#define __nonstring __attribute__((nonstring)) +#else +#define __nonstring +#endif +#endif + /* * The fourth extended filesystem constants/structures */ @@ -675,6 +686,9 @@ enum { /* Max physical block we can address w/o extents */ #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF +/* Max logical block we can support */ +#define EXT4_MAX_LOGICAL_BLOCK 0xFFFFFFFF + /* * Structure of an inode on the disk */ @@ -1226,7 +1240,7 @@ struct ext4_super_block { __le32 s_feature_ro_compat; /* readonly-compatible feature set */ /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ /*78*/ char s_volume_name[16]; /* volume name */ -/*88*/ char s_last_mounted[64]; /* directory where last mounted */ +/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */ /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ /* * Performance hints. Directory preallocation should only @@ -1277,13 +1291,13 @@ struct ext4_super_block { __le32 s_first_error_time; /* first time an error happened */ __le32 s_first_error_ino; /* inode involved in first error */ __le64 s_first_error_block; /* block involved of first error */ - __u8 s_first_error_func[32]; /* function where the error happened */ + __u8 s_first_error_func[32] __nonstring; /* function where the error happened */ __le32 s_first_error_line; /* line number where error happened */ __le32 s_last_error_time; /* most recent time of an error */ __le32 s_last_error_ino; /* inode involved in last error */ __le32 s_last_error_line; /* line number where error happened */ __le64 s_last_error_block; /* block involved of last error */ - __u8 s_last_error_func[32]; /* function where the error happened */ + __u8 s_last_error_func[32] __nonstring; /* function where the error happened */ #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) __u8 s_mount_opts[64]; __le32 s_usr_quota_inum; /* inode for tracking user quota */ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3543fe80a3c4..7b4736022761 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1753,6 +1753,7 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) { int err, inline_size; struct ext4_iloc iloc; + size_t inline_len; void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; @@ -1780,8 +1781,9 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) goto out; } + inline_len = ext4_get_inline_size(dir); offset = EXT4_INLINE_DOTDOT_SIZE; - while (offset < dir->i_size) { + while (offset < inline_len) { de = ext4_get_inline_entry(dir, &iloc, offset, &inline_pos, &inline_size); if (ext4_check_dir_entry(dir, NULL, de, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d0dd585add6a..d767e993591d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3413,12 +3413,16 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned int blkbits = inode->i_blkbits; - unsigned long first_block = offset >> blkbits; - unsigned long last_block = (offset + length - 1) >> blkbits; + unsigned long first_block, last_block; struct ext4_map_blocks map; bool delalloc = false; int ret; + if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) + return -EINVAL; + first_block = offset >> blkbits; + last_block = min_t(loff_t, (offset + length - 1) >> blkbits, + EXT4_MAX_LOGICAL_BLOCK); if (flags & IOMAP_REPORT) { if (ext4_has_inline_data(inode)) { @@ -3948,6 +3952,7 @@ static const struct address_space_operations ext4_dax_aops = { .writepages = ext4_dax_writepages, .direct_IO = noop_direct_IO, .set_page_dirty = noop_set_page_dirty, + .bmap = ext4_bmap, .invalidatepage = noop_invalidatepage, }; @@ -4192,9 +4197,8 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, return 0; } -static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock) +static void ext4_wait_dax_page(struct ext4_inode_info *ei) { - *did_unlock = true; up_write(&ei->i_mmap_sem); schedule(); down_write(&ei->i_mmap_sem); @@ -4204,14 +4208,12 @@ int ext4_break_layouts(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct page *page; - bool retry; int error; if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) return -EINVAL; do { - retry = false; page = dax_layout_busy_page(inode->i_mapping); if (!page) return 0; @@ -4219,8 +4221,8 @@ int ext4_break_layouts(struct inode *inode) error = ___wait_var_event(&page->_refcount, atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 0, 0, - ext4_wait_dax_page(ei, &retry)); - } while (error == 0 && retry); + ext4_wait_dax_page(ei)); + } while (error == 0); return error; } @@ -4895,6 +4897,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) * not initialized on a new filesystem. */ } ei->i_flags = le32_to_cpu(raw_inode->i_flags); + ext4_set_inode_flags(inode); inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); if (ext4_has_feature_64bit(sb)) @@ -5041,7 +5044,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) goto bad_inode; } brelse(iloc.bh); - ext4_set_inode_flags(inode); unlock_new_inode(inode); return inode; diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 39b07c2d3384..2305b4374fd3 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -49,7 +49,6 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) */ sb_start_write(sb); ext4_mmp_csum_set(sb, mmp); - mark_buffer_dirty(bh); lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 116ff68c5bd4..377d516c475f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3478,6 +3478,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, int credits; u8 old_file_type; + if (new.inode && new.inode->i_nlink == 0) { + EXT4_ERROR_INODE(new.inode, + "target of rename is already freed"); + return -EFSCORRUPTED; + } + if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) && (!projid_eq(EXT4_I(new_dir)->i_projid, EXT4_I(old_dentry->d_inode)->i_projid))) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e5fb38451a73..ebbc663d0798 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -19,6 +19,7 @@ int ext4_resize_begin(struct super_block *sb) { + struct ext4_sb_info *sbi = EXT4_SB(sb); int ret = 0; if (!capable(CAP_SYS_RESOURCE)) @@ -29,7 +30,7 @@ int ext4_resize_begin(struct super_block *sb) * because the user tools have no way of handling this. Probably a * bad time to do it anyways. */ - if (EXT4_SB(sb)->s_sbh->b_blocknr != + if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { ext4_warning(sb, "won't resize using backup superblock at %llu", (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); @@ -1986,6 +1987,26 @@ retry: } } + /* + * Make sure the last group has enough space so that it's + * guaranteed to have enough space for all metadata blocks + * that it might need to hold. (We might not need to store + * the inode table blocks in the last block group, but there + * will be cases where this might be needed.) + */ + if ((ext4_group_first_block_no(sb, n_group) + + ext4_group_overhead_blocks(sb, n_group) + 2 + + sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) { + n_blocks_count = ext4_group_first_block_no(sb, n_group); + n_group--; + n_blocks_count_retry = 0; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } + goto retry; + } + /* extend the last group */ if (n_group == o_group) add = n_blocks_count - o_blocks_count; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5863fd22e90b..1145109968ef 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2145,6 +2145,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); if (test_opt(sb, DATA_ERR_ABORT)) SEQ_OPTS_PUTS("data_err=abort"); + if (DUMMY_ENCRYPTION_ENABLED(sbi)) + SEQ_OPTS_PUTS("test_dummy_encryption"); ext4_show_quota_options(seq, sb); return 0; @@ -4378,11 +4380,13 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); + ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeclusters_counter, block, GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); + ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } diff --git a/fs/ioctl.c b/fs/ioctl.c index 3212c29235ce..2005529af560 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -230,7 +230,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen); + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); fdput: fdput(src_file); return ret; diff --git a/fs/iomap.c b/fs/iomap.c index 74762b1ec233..ec15cf2ec696 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1051,6 +1051,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, } else { WARN_ON_ONCE(!PageUptodate(page)); iomap_page_create(inode, page); + set_page_dirty(page); } return length; @@ -1090,7 +1091,6 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) length -= ret; } - set_page_dirty(page); wait_for_stable_page(page); return VM_FAULT_LOCKED; out_unlock: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 55a099e47ba2..b53e76391e52 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -541,7 +541,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count)); + return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, + count)); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index d9ebe11c8990..1d098c3c00e0 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -342,6 +342,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, * for this bh as it's not marked locally * uptodate. */ status = -EIO; + clear_buffer_needs_validate(bh); put_bh(bh); bhs[i] = NULL; continue; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 296037afecdb..1cc797a08a5b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -141,7 +141,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - error = vfs_clone_file_range(old_file, 0, new_file, 0, len); + error = do_clone_file_range(old_file, 0, new_file, 0, len); if (!error) goto out; /* Couldn't clone, so now we try to copy the data */ diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index aeaefd2a551b..986313da0c88 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -240,8 +240,10 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto out_unlock; old_cred = ovl_override_creds(file_inode(file)->i_sb); + file_start_write(real.file); ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, ovl_iocb_to_rwf(iocb)); + file_end_write(real.file); revert_creds(old_cred); /* Update size */ diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b6ac545b5a32..3b7ed5d2279c 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -504,7 +504,7 @@ static const struct inode_operations ovl_special_inode_operations = { .update_time = ovl_update_time, }; -const struct address_space_operations ovl_aops = { +static const struct address_space_operations ovl_aops = { /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ .direct_IO = noop_direct_IO, }; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index f28711846dd6..9c0ca6a7becf 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -686,7 +686,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, index = NULL; goto out; } - pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" + pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, err); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f61839e1054c..a3c0d9584312 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -152,8 +152,8 @@ static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { int err = vfs_setxattr(dentry, name, value, size, flags); - pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", - dentry, name, (int) size, (char *) value, flags, err); + pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n", + dentry, name, min((int)size, 48), value, size, flags, err); return err; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 8cfb62cc8672..ace4fe4c39a9 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -683,7 +683,7 @@ static void ovl_cleanup_index(struct dentry *dentry) struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; struct inode *inode; - struct qstr name; + struct qstr name = { }; int err; err = ovl_get_index_name(lowerdentry, &name); @@ -726,6 +726,7 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; out: + kfree(name.name); dput(index); return; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index ad72261ee3fe..d297fe4472a9 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -464,6 +464,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) ret = -EFAULT; goto out; } + m = NULL; /* skip the list anchor */ } else if (m->type == KCORE_VMALLOC) { vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index bbd1e357c23d..f4fd2e72add4 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -898,8 +898,22 @@ static struct platform_driver ramoops_driver = { }, }; -static void ramoops_register_dummy(void) +static inline void ramoops_unregister_dummy(void) { + platform_device_unregister(dummy); + dummy = NULL; + + kfree(dummy_data); + dummy_data = NULL; +} + +static void __init ramoops_register_dummy(void) +{ + /* + * Prepare a dummy platform data structure to carry the module + * parameters. If mem_size isn't set, then there are no module + * parameters, and we can skip this. + */ if (!mem_size) return; @@ -932,21 +946,28 @@ static void ramoops_register_dummy(void) if (IS_ERR(dummy)) { pr_info("could not create platform device: %ld\n", PTR_ERR(dummy)); + dummy = NULL; + ramoops_unregister_dummy(); } } static int __init ramoops_init(void) { + int ret; + ramoops_register_dummy(); - return platform_driver_register(&ramoops_driver); + ret = platform_driver_register(&ramoops_driver); + if (ret != 0) + ramoops_unregister_dummy(); + + return ret; } late_initcall(ramoops_init); static void __exit ramoops_exit(void) { platform_driver_unregister(&ramoops_driver); - platform_device_unregister(dummy); - kfree(dummy_data); + ramoops_unregister_dummy(); } module_exit(ramoops_exit); diff --git a/fs/read_write.c b/fs/read_write.c index 39b4a21dd933..8a2737f0d61d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1818,8 +1818,8 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); -int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1866,6 +1866,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; } +EXPORT_SYMBOL(do_clone_file_range); + +int vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) +{ + int ret; + + file_start_write(file_out); + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} EXPORT_SYMBOL(vfs_clone_file_range); /* diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 23e7042666a7..bf000c8aeffb 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1912,7 +1912,9 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) mutex_unlock(&c->bu_mutex); } - ubifs_assert(c, c->lst.taken_empty_lebs > 0); + if (!c->need_recovery) + ubifs_assert(c, c->lst.taken_empty_lebs > 0); + return 0; } @@ -1954,6 +1956,9 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) int dev, vol; char *endptr; + if (!name || !*name) + return ERR_PTR(-EINVAL); + /* First, try to open using the device node path method */ ubi = ubi_open_volume_path(name, mode); if (!IS_ERR(ubi)) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 61afdfee4b28..f5ad1ede7990 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -152,12 +152,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); - - if (!host->i_nlink) { - err = -ENOENT; - goto out_noent; - } - host->i_ctime = current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); @@ -190,7 +184,6 @@ out_cancel: host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_names -= fname_len(nm); host_ui->flags &= ~UBIFS_CRYPT_FL; -out_noent: mutex_unlock(&host_ui->ui_mutex); out_free: make_bad_inode(inode); @@ -242,12 +235,6 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); - - if (!host->i_nlink) { - err = -ENOENT; - goto out_noent; - } - host->i_ctime = current_time(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -269,7 +256,6 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, out_cancel: host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_size += CALC_XATTR_BYTES(old_size); -out_noent: mutex_unlock(&host_ui->ui_mutex); make_bad_inode(inode); out_free: @@ -496,12 +482,6 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err; mutex_lock(&host_ui->ui_mutex); - - if (!host->i_nlink) { - err = -ENOENT; - goto out_noent; - } - host->i_ctime = current_time(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); @@ -521,7 +501,6 @@ out_cancel: host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); host_ui->xattr_names += fname_len(nm); -out_noent: mutex_unlock(&host_ui->ui_mutex); ubifs_release_budget(c, &req); make_bad_inode(inode); @@ -561,9 +540,6 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) ubifs_assert(c, inode_is_locked(host)); - if (!host->i_nlink) - return -ENOENT; - if (fname_len(&nm) > UBIFS_MAX_NLEN) return -ENAMETOOLONG; diff --git a/fs/xattr.c b/fs/xattr.c index daa732550088..0d6a6a4af861 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -948,17 +948,19 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, int err = 0; #ifdef CONFIG_FS_POSIX_ACL - if (inode->i_acl) { - err = xattr_list_one(&buffer, &remaining_size, - XATTR_NAME_POSIX_ACL_ACCESS); - if (err) - return err; - } - if (inode->i_default_acl) { - err = xattr_list_one(&buffer, &remaining_size, - XATTR_NAME_POSIX_ACL_DEFAULT); - if (err) - return err; + if (IS_POSIXACL(inode)) { + if (inode->i_acl) { + err = xattr_list_one(&buffer, &remaining_size, + XATTR_NAME_POSIX_ACL_ACCESS); + if (err) + return err; + } + if (inode->i_default_acl) { + err = xattr_list_one(&buffer, &remaining_size, + XATTR_NAME_POSIX_ACL_DEFAULT); + if (err) + return err; + } } #endif diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 1e671d4eb6fa..c6299f82a6e4 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -587,7 +587,7 @@ xfs_attr_leaf_addname( */ error = xfs_attr3_leaf_to_node(args); if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -675,7 +675,7 @@ xfs_attr_leaf_addname( error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -693,9 +693,6 @@ xfs_attr_leaf_addname( error = xfs_attr3_leaf_clearflag(args); } return error; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } /* @@ -738,15 +735,12 @@ xfs_attr_leaf_removename( error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; } return 0; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } /* @@ -864,7 +858,7 @@ restart: state = NULL; error = xfs_attr3_leaf_to_node(args); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -888,7 +882,7 @@ restart: */ error = xfs_da3_split(state); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -984,7 +978,7 @@ restart: if (retval && (state->path.active > 1)) { error = xfs_da3_join(state); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -1013,9 +1007,6 @@ out: if (error) return error; return retval; -out_defer_cancel: - xfs_defer_cancel(args->trans); - goto out; } /* @@ -1107,7 +1098,7 @@ xfs_attr_node_removename( if (retval && (state->path.active > 1)) { error = xfs_da3_join(state); if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -1138,7 +1129,7 @@ xfs_attr_node_removename( error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) - goto out_defer_cancel; + goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; @@ -1150,9 +1141,6 @@ xfs_attr_node_removename( out: xfs_da_state_free(state); return error; -out_defer_cancel: - xfs_defer_cancel(args->trans); - goto out; } /* diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index af094063e402..d89363c6b523 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -485,7 +485,7 @@ xfs_attr_rmtval_set( blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, &nmap); if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -553,9 +553,6 @@ xfs_attr_rmtval_set( } ASSERT(valuelen == 0); return 0; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } /* @@ -625,7 +622,7 @@ xfs_attr_rmtval_remove( error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, XFS_BMAPI_ATTRFORK, 1, &done); if (error) - goto out_defer_cancel; + return error; error = xfs_defer_finish(&args->trans); if (error) return error; @@ -638,7 +635,4 @@ xfs_attr_rmtval_remove( return error; } return 0; -out_defer_cancel: - xfs_defer_cancel(args->trans); - return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2760314fdf7f..a47670332326 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -673,7 +673,8 @@ xfs_bmap_extents_to_btree( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); /* - * Make space in the inode incore. + * Make space in the inode incore. This needs to be undone if we fail + * to expand the root. */ xfs_iroot_realloc(ip, 1, whichfork); ifp->if_flags |= XFS_IFBROOT; @@ -711,16 +712,15 @@ xfs_bmap_extents_to_btree( args.minlen = args.maxlen = args.prod = 1; args.wasdel = wasdel; *logflagsp = 0; - if ((error = xfs_alloc_vextent(&args))) { - ASSERT(ifp->if_broot == NULL); - goto err1; - } + error = xfs_alloc_vextent(&args); + if (error) + goto out_root_realloc; if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { - ASSERT(ifp->if_broot == NULL); error = -ENOSPC; - goto err1; + goto out_root_realloc; } + /* * Allocation can't fail, the space was reserved. */ @@ -732,9 +732,10 @@ xfs_bmap_extents_to_btree( xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); if (!abp) { - error = -ENOSPC; - goto err2; + error = -EFSCORRUPTED; + goto out_unreserve_dquot; } + /* * Fill in the child block. */ @@ -775,11 +776,12 @@ xfs_bmap_extents_to_btree( *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); return 0; -err2: +out_unreserve_dquot: xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); -err1: +out_root_realloc: xfs_iroot_realloc(ip, -1, whichfork); XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + ASSERT(ifp->if_broot == NULL); xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 059bc44c27e8..afbe336600e1 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1016,6 +1016,8 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ #define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */ +/* Do not use bit 15, di_flags is legacy and unchanging now */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 30d1d60f1d46..09d9c8cfa4a0 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -415,6 +415,31 @@ xfs_dinode_verify_fork( return NULL; } +static xfs_failaddr_t +xfs_dinode_verify_forkoff( + struct xfs_dinode *dip, + struct xfs_mount *mp) +{ + if (!XFS_DFORK_Q(dip)) + return NULL; + + switch (dip->di_format) { + case XFS_DINODE_FMT_DEV: + if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) + return __this_address; + break; + case XFS_DINODE_FMT_LOCAL: /* fall through ... */ + case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ + case XFS_DINODE_FMT_BTREE: + if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3)) + return __this_address; + break; + default: + return __this_address; + } + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -470,6 +495,11 @@ xfs_dinode_verify( if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) return __this_address; + /* check for illegal values of forkoff */ + fa = xfs_dinode_verify_forkoff(dip, mp); + if (fa) + return fa; + /* Do we have appropriate data fork formats for the mode? */ switch (mode & S_IFMT) { case S_IFIFO: diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 036b5c7021eb..376bcb585ae6 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -17,7 +17,6 @@ #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" -#include "xfs_alloc.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 5b3b177c0fc9..e386c9b0b4ab 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -126,6 +126,7 @@ xchk_inode_flags( { struct xfs_mount *mp = sc->mp; + /* di_flags are all taken, last bit cannot be used */ if (flags & ~XFS_DIFLAG_ANY) goto bad; @@ -172,8 +173,9 @@ xchk_inode_flags2( { struct xfs_mount *mp = sc->mp; + /* Unknown di_flags2 could be from a future kernel */ if (flags2 & ~XFS_DIFLAG2_ANY) - goto bad; + xchk_ino_set_warning(sc, ino); /* reflink flag requires reflink feature */ if ((flags2 & XFS_DIFLAG2_REFLINK) && diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index addbd74ecd8e..6de8d90041ff 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -702,13 +702,9 @@ xfs_bmap_punch_delalloc_range( struct xfs_iext_cursor icur; int error = 0; - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); - if (error) - goto out_unlock; - } + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) goto out_unlock; @@ -1584,7 +1580,7 @@ xfs_swap_extent_rmap( tirec.br_blockcount, &irec, &nimaps, 0); if (error) - goto out_defer; + goto out; ASSERT(nimaps == 1); ASSERT(tirec.br_startoff == irec.br_startoff); trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); @@ -1599,22 +1595,22 @@ xfs_swap_extent_rmap( /* Remove the mapping from the donor file. */ error = xfs_bmap_unmap_extent(tp, tip, &uirec); if (error) - goto out_defer; + goto out; /* Remove the mapping from the source file. */ error = xfs_bmap_unmap_extent(tp, ip, &irec); if (error) - goto out_defer; + goto out; /* Map the donor file's blocks into the source file. */ error = xfs_bmap_map_extent(tp, ip, &uirec); if (error) - goto out_defer; + goto out; /* Map the source file's blocks into the donor file. */ error = xfs_bmap_map_extent(tp, tip, &irec); if (error) - goto out_defer; + goto out; error = xfs_defer_finish(tpp); tp = *tpp; @@ -1636,8 +1632,6 @@ xfs_swap_extent_rmap( tip->i_d.di_flags2 = tip_flags2; return 0; -out_defer: - xfs_defer_cancel(tp); out: trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); tip->i_d.di_flags2 = tip_flags2; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1c9d1398980b..12d8455bfbb2 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -532,6 +532,49 @@ xfs_buf_item_push( } /* + * Drop the buffer log item refcount and take appropriate action. This helper + * determines whether the bli must be freed or not, since a decrement to zero + * does not necessarily mean the bli is unused. + * + * Return true if the bli is freed, false otherwise. + */ +bool +xfs_buf_item_put( + struct xfs_buf_log_item *bip) +{ + struct xfs_log_item *lip = &bip->bli_item; + bool aborted; + bool dirty; + + /* drop the bli ref and return if it wasn't the last one */ + if (!atomic_dec_and_test(&bip->bli_refcount)) + return false; + + /* + * We dropped the last ref and must free the item if clean or aborted. + * If the bli is dirty and non-aborted, the buffer was clean in the + * transaction but still awaiting writeback from previous changes. In + * that case, the bli is freed on buffer writeback completion. + */ + aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || + XFS_FORCED_SHUTDOWN(lip->li_mountp); + dirty = bip->bli_flags & XFS_BLI_DIRTY; + if (dirty && !aborted) + return false; + + /* + * The bli is aborted or clean. An aborted item may be in the AIL + * regardless of dirty state. For example, consider an aborted + * transaction that invalidated a dirty bli and cleared the dirty + * state. + */ + if (aborted) + xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bip->bli_buf); + return true; +} + +/* * Release the buffer associated with the buf log item. If there is no dirty * logged data associated with the buffer recorded in the buf log item, then * free the buf log item and remove the reference to it in the buffer. @@ -556,76 +599,42 @@ xfs_buf_item_unlock( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool aborted; - bool hold = !!(bip->bli_flags & XFS_BLI_HOLD); - bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY); + bool released; + bool hold = bip->bli_flags & XFS_BLI_HOLD; + bool stale = bip->bli_flags & XFS_BLI_STALE; #if defined(DEBUG) || defined(XFS_WARN) - bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED); + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; #endif - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); - - /* Clear the buffer's association with this transaction. */ - bp->b_transp = NULL; - - /* - * The per-transaction state has been copied above so clear it from the - * bli. - */ - bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); - - /* - * If the buf item is marked stale, then don't do anything. We'll - * unlock the buffer and free the buf item when the buffer is unpinned - * for the last time. - */ - if (bip->bli_flags & XFS_BLI_STALE) { - trace_xfs_buf_item_unlock_stale(bip); - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); - if (!aborted) { - atomic_dec(&bip->bli_refcount); - return; - } - } - trace_xfs_buf_item_unlock(bip); /* - * If the buf item isn't tracking any data, free it, otherwise drop the - * reference we hold to it. If we are aborting the transaction, this may - * be the only reference to the buf item, so we free it anyway - * regardless of whether it is dirty or not. A dirty abort implies a - * shutdown, anyway. - * * The bli dirty state should match whether the blf has logged segments * except for ordered buffers, where only the bli should be dirty. */ ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) || (ordered && dirty && !xfs_buf_item_dirty_format(bip))); + ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); /* - * Clean buffers, by definition, cannot be in the AIL. However, aborted - * buffers may be in the AIL regardless of dirty state. An aborted - * transaction that invalidates a buffer already in the AIL may have - * marked it stale and cleared the dirty state, for example. - * - * Therefore if we are aborting a buffer and we've just taken the last - * reference away, we have to check if it is in the AIL before freeing - * it. We need to free it in this case, because an aborted transaction - * has already shut the filesystem down and this is the last chance we - * will have to do so. + * Clear the buffer's association with this transaction and + * per-transaction state from the bli, which has been copied above. */ - if (atomic_dec_and_test(&bip->bli_refcount)) { - if (aborted) { - ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); - xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - } else if (!dirty) - xfs_buf_item_relse(bp); - } + bp->b_transp = NULL; + bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); - if (!hold) - xfs_buf_relse(bp); + /* + * Unref the item and unlock the buffer unless held or stale. Stale + * buffers remain locked until final unpin unless the bli is freed by + * the unref call. The latter implies shutdown because buffer + * invalidation dirties the bli and transaction. + */ + released = xfs_buf_item_put(bip); + if (hold || (stale && !released)) + return; + ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags)); + xfs_buf_relse(bp); } /* diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 3f7d7b72e7e6..90f65f891fab 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -51,6 +51,7 @@ struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); +bool xfs_buf_item_put(struct xfs_buf_log_item *); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_attach_iodone(struct xfs_buf *, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d957a46dc1cb..05db9540e459 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1563,7 +1563,7 @@ xfs_itruncate_extents_flags( error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags, XFS_ITRUNC_MAX_EXTENTS, &done); if (error) - goto out_bmap_cancel; + goto out; /* * Duplicate the transaction that has the permanent @@ -1599,14 +1599,6 @@ xfs_itruncate_extents_flags( out: *tpp = tp; return error; -out_bmap_cancel: - /* - * If the bunmapi call encounters an error, return to the caller where - * the transaction can be properly aborted. We just need to make sure - * we're not holding any resources that we were not when we came in. - */ - xfs_defer_cancel(tp); - goto out; } int diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index c3e74f9128e8..f48ffd7a8d3e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -471,8 +471,18 @@ xfs_vn_get_link_inline( struct inode *inode, struct delayed_call *done) { + char *link; + ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); - return XFS_I(inode)->i_df.if_u1.if_data; + + /* + * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if + * if_data is junk. + */ + link = XFS_I(inode)->i_df.if_u1.if_data; + if (!link) + return ERR_PTR(-EFSCORRUPTED); + return link; } STATIC int diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a21dc61ec09e..1fc9e9042e0e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1570,16 +1570,6 @@ xlog_find_zeroed( if (last_cycle != 0) { /* log completely written to */ xlog_put_bp(bp); return 0; - } else if (first_cycle != 1) { - /* - * If the cycle of the last block is zero, the cycle of - * the first block must be 1. If it's not, maybe we're - * not looking at a log... Bail out. - */ - xfs_warn(log->l_mp, - "Log inconsistent or not a log (last==0, first!=1)"); - error = -EINVAL; - goto bp_err; } /* we have a partially zeroed log */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 38f405415b88..5289e22cb081 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -352,6 +352,47 @@ xfs_reflink_convert_cow( return error; } +/* + * Find the extent that maps the given range in the COW fork. Even if the extent + * is not shared we might have a preallocation for it in the COW fork. If so we + * use it that rather than trigger a new allocation. + */ +static int +xfs_find_trim_cow_extent( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + bool *shared, + bool *found) +{ + xfs_fileoff_t offset_fsb = imap->br_startoff; + xfs_filblks_t count_fsb = imap->br_blockcount; + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec got; + bool trimmed; + + *found = false; + + /* + * If we don't find an overlapping extent, trim the range we need to + * allocate to fit the hole we found. + */ + if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) || + got.br_startoff > offset_fsb) + return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); + + *shared = true; + if (isnullstartblock(got.br_startblock)) { + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); + return 0; + } + + /* real extent found - no need to allocate */ + xfs_trim_extent(&got, offset_fsb, count_fsb); + *imap = got; + *found = true; + return 0; +} + /* Allocate all CoW reservations covering a range of blocks in a file. */ int xfs_reflink_allocate_cow( @@ -363,78 +404,64 @@ xfs_reflink_allocate_cow( struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_filblks_t count_fsb = imap->br_blockcount; - struct xfs_bmbt_irec got; - struct xfs_trans *tp = NULL; + struct xfs_trans *tp; int nimaps, error = 0; - bool trimmed; + bool found; xfs_filblks_t resaligned; xfs_extlen_t resblks = 0; - struct xfs_iext_cursor icur; -retry: - ASSERT(xfs_is_reflink_inode(ip)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(xfs_is_reflink_inode(ip)); - /* - * Even if the extent is not shared we might have a preallocation for - * it in the COW fork. If so use it. - */ - if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) && - got.br_startoff <= offset_fsb) { - *shared = true; - - /* If we have a real allocation in the COW fork we're done. */ - if (!isnullstartblock(got.br_startblock)) { - xfs_trim_extent(&got, offset_fsb, count_fsb); - *imap = got; - goto convert; - } + error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + if (error || !*shared) + return error; + if (found) + goto convert; - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); - } else { - error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); - if (error || !*shared) - goto out; - } + resaligned = xfs_aligned_fsb_count(imap->br_startoff, + imap->br_blockcount, xfs_get_cowextsz_hint(ip)); + resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); - if (!tp) { - resaligned = xfs_aligned_fsb_count(imap->br_startoff, - imap->br_blockcount, xfs_get_cowextsz_hint(ip)); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); + xfs_iunlock(ip, *lockmode); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); + *lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, *lockmode); - xfs_iunlock(ip, *lockmode); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); - *lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, *lockmode); + if (error) + return error; - if (error) - return error; + error = xfs_qm_dqattach_locked(ip, false); + if (error) + goto out_trans_cancel; - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out; - goto retry; + /* + * Check for an overlapping extent again now that we dropped the ilock. + */ + error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + if (error || !*shared) + goto out_trans_cancel; + if (found) { + xfs_trans_cancel(tp); + goto convert; } error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) - goto out; + goto out_trans_cancel; xfs_trans_ijoin(tp, ip, 0); - nimaps = 1; - /* Allocate the entire reservation as unwritten blocks. */ + nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, resblks, imap, &nimaps); if (error) - goto out_trans_cancel; + goto out_unreserve; xfs_inode_set_cowblocks_tag(ip); - - /* Finish up. */ error = xfs_trans_commit(tp); if (error) return error; @@ -447,12 +474,12 @@ retry: return -ENOSPC; convert: return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb); -out_trans_cancel: + +out_unreserve: xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, XFS_QMOPT_RES_REGBLKS); -out: - if (tp) - xfs_trans_cancel(tp); +out_trans_cancel: + xfs_trans_cancel(tp); return error; } @@ -666,14 +693,12 @@ xfs_reflink_end_cow( if (!del.br_blockcount) goto prev_extent; - ASSERT(!isnullstartblock(got.br_startblock)); - /* - * Don't remap unwritten extents; these are - * speculatively preallocated CoW extents that have been - * allocated but have not yet been involved in a write. + * Only remap real extent that contain data. With AIO + * speculatively preallocations can leak into the range we + * are called upon, and we need to skip them. */ - if (got.br_state == XFS_EXT_UNWRITTEN) + if (!xfs_bmap_is_real_extent(&got)) goto prev_extent; /* Unmap the old blocks in the data fork. */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ad315e83bc02..3043e5ed6495 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -473,7 +473,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index bedc5a5133a5..912b42f5fe4a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -259,6 +259,14 @@ xfs_trans_alloc( struct xfs_trans *tp; int error; + /* + * Allocate the handle before we do our freeze accounting and setting up + * GFP_NOFS allocation context so that we avoid lockdep false positives + * by doing GFP_KERNEL allocations inside sb_start_intwrite(). + */ + tp = kmem_zone_zalloc(xfs_trans_zone, + (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); + if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); @@ -270,8 +278,6 @@ xfs_trans_alloc( mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); atomic_inc(&mp->m_active_trans); - tp = kmem_zone_zalloc(xfs_trans_zone, - (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); tp->t_magic = XFS_TRANS_HEADER_MAGIC; tp->t_flags = flags; tp->t_mountp = mp; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 15919f67a88f..286a287ac57a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -322,49 +322,38 @@ xfs_trans_read_buf_map( } /* - * Release the buffer bp which was previously acquired with one of the - * xfs_trans_... buffer allocation routines if the buffer has not - * been modified within this transaction. If the buffer is modified - * within this transaction, do decrement the recursion count but do - * not release the buffer even if the count goes to 0. If the buffer is not - * modified within the transaction, decrement the recursion count and - * release the buffer if the recursion count goes to 0. + * Release a buffer previously joined to the transaction. If the buffer is + * modified within this transaction, decrement the recursion count but do not + * release the buffer even if the count goes to 0. If the buffer is not modified + * within the transaction, decrement the recursion count and release the buffer + * if the recursion count goes to 0. * - * If the buffer is to be released and it was not modified before - * this transaction began, then free the buf_log_item associated with it. + * If the buffer is to be released and it was not already dirty before this + * transaction began, then also free the buf_log_item associated with it. * - * If the transaction pointer is NULL, make this just a normal - * brelse() call. + * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call. */ void xfs_trans_brelse( - xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_trans *tp, + struct xfs_buf *bp) { - struct xfs_buf_log_item *bip; - int freed; + struct xfs_buf_log_item *bip = bp->b_log_item; - /* - * Default to a normal brelse() call if the tp is NULL. - */ - if (tp == NULL) { - ASSERT(bp->b_transp == NULL); + ASSERT(bp->b_transp == tp); + + if (!tp) { xfs_buf_relse(bp); return; } - ASSERT(bp->b_transp == tp); - bip = bp->b_log_item; + trace_xfs_trans_brelse(bip); ASSERT(bip->bli_item.li_type == XFS_LI_BUF); - ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); - trace_xfs_trans_brelse(bip); - /* - * If the release is just for a recursive lock, - * then decrement the count and return. + * If the release is for a recursive lookup, then decrement the count + * and return. */ if (bip->bli_recur > 0) { bip->bli_recur--; @@ -372,64 +361,24 @@ xfs_trans_brelse( } /* - * If the buffer is dirty within this transaction, we can't + * If the buffer is invalidated or dirty in this transaction, we can't * release it until we commit. */ if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags)) return; - - /* - * If the buffer has been invalidated, then we can't release - * it until the transaction commits to disk unless it is re-dirtied - * as part of this transaction. This prevents us from pulling - * the item from the AIL before we should. - */ if (bip->bli_flags & XFS_BLI_STALE) return; - ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); - /* - * Free up the log item descriptor tracking the released item. + * Unlink the log item from the transaction and clear the hold flag, if + * set. We wouldn't want the next user of the buffer to get confused. */ + ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); xfs_trans_del_item(&bip->bli_item); + bip->bli_flags &= ~XFS_BLI_HOLD; - /* - * Clear the hold flag in the buf log item if it is set. - * We wouldn't want the next user of the buffer to - * get confused. - */ - if (bip->bli_flags & XFS_BLI_HOLD) { - bip->bli_flags &= ~XFS_BLI_HOLD; - } - - /* - * Drop our reference to the buf log item. - */ - freed = atomic_dec_and_test(&bip->bli_refcount); - - /* - * If the buf item is not tracking data in the log, then we must free it - * before releasing the buffer back to the free pool. - * - * If the fs has shutdown and we dropped the last reference, it may fall - * on us to release a (possibly dirty) bli if it never made it to the - * AIL (e.g., the aborted unpin already happened and didn't release it - * due to our reference). Since we're already shutdown and need - * ail_lock, just force remove from the AIL and release the bli here. - */ - if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) { - xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - } else if (!(bip->bli_flags & XFS_BLI_DIRTY)) { -/*** - ASSERT(bp->b_pincount == 0); -***/ - ASSERT(atomic_read(&bip->bli_refcount) == 0); - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); - ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); - xfs_buf_item_relse(bp); - } + /* drop the reference to the bli */ + xfs_buf_item_put(bip); bp->b_transp = NULL; xfs_buf_relse(bp); |