diff options
Diffstat (limited to 'fs/ext4')
| -rw-r--r-- | fs/ext4/acl.c | 8 | ||||
| -rw-r--r-- | fs/ext4/balloc.c | 1 | ||||
| -rw-r--r-- | fs/ext4/block_validity.c | 26 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 39 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.h | 2 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 25 | ||||
| -rw-r--r-- | fs/ext4/fast_commit.c | 306 | ||||
| -rw-r--r-- | fs/ext4/fast_commit.h | 2 | ||||
| -rw-r--r-- | fs/ext4/file.c | 10 | ||||
| -rw-r--r-- | fs/ext4/hash.c | 2 | ||||
| -rw-r--r-- | fs/ext4/indirect.c | 2 | ||||
| -rw-r--r-- | fs/ext4/inline.c | 37 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 198 | ||||
| -rw-r--r-- | fs/ext4/ioctl.c | 10 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 385 | ||||
| -rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
| -rw-r--r-- | fs/ext4/namei.c | 41 | ||||
| -rw-r--r-- | fs/ext4/orphan.c | 4 | ||||
| -rw-r--r-- | fs/ext4/page-io.c | 22 | ||||
| -rw-r--r-- | fs/ext4/readpage.c | 14 | ||||
| -rw-r--r-- | fs/ext4/resize.c | 7 | ||||
| -rw-r--r-- | fs/ext4/super.c | 168 | ||||
| -rw-r--r-- | fs/ext4/sysfs.c | 8 | ||||
| -rw-r--r-- | fs/ext4/xattr.c | 6 |
24 files changed, 788 insertions, 537 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 5a35768d6149..57e82e25f8e2 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -139,7 +139,7 @@ fail: /* * Inode operation get_posix_acl(). * - * inode->i_mutex: don't care + * inode->i_rwsem: don't care */ struct posix_acl * ext4_get_acl(struct inode *inode, int type, bool rcu) @@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type, bool rcu) /* * Set the access or default ACL of an inode. * - * inode->i_mutex: down unless called from ext4_new_inode + * inode->i_rwsem: down unless called from ext4_new_inode */ static int __ext4_set_acl(handle_t *handle, struct inode *inode, int type, @@ -271,8 +271,8 @@ out_stop: /* * Initialize the ACLs of a new inode. Called from ext4_new_inode. * - * dir->i_mutex: down - * inode->i_mutex: up (access to inode is still exclusive) + * dir->i_rwsem: down + * inode->i_rwsem: up (access to inode is still exclusive) */ int ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index a0fb0c4bdc7c..78ee3ef795ae 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -411,6 +411,7 @@ verified: * ext4_read_block_bitmap_nowait() * @sb: super block * @block_group: given block group + * @ignore_locked: ignore locked buffers * * Read the bitmap for a given block_group,and validate the * bits for block/inode/inode tables are set in the bitmaps diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4666b55b736e..5504f72bbbbe 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -292,15 +292,10 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); } -/* - * Returns 1 if the passed-in block region (start_blk, - * start_blk+count) is valid; 0 if some part of the block region - * overlaps with some other filesystem metadata blocks. - */ -int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, - unsigned int count) +int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_system_blocks *system_blks; struct ext4_system_zone *entry; struct rb_node *n; @@ -329,7 +324,9 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, else if (start_blk >= (entry->start_blk + entry->count)) n = n->rb_right; else { - ret = (entry->ino == inode->i_ino); + ret = 0; + if (inode) + ret = (entry->ino == inode->i_ino); break; } } @@ -338,6 +335,17 @@ out_rcu: return ret; } +/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with some other filesystem metadata blocks. + */ +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, + unsigned int count) +{ + return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count); +} + int ext4_check_blockref(const char *function, unsigned int line, struct inode *inode, __le32 *p, unsigned int max) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 715ee206dfe1..3f87cca49f0c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1028,7 +1028,7 @@ struct ext4_inode_info { /* * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention + * data. Taking i_rwsem even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs. @@ -1046,6 +1046,8 @@ struct ext4_inode_info { /* Fast commit related info */ + /* For tracking dentry create updates */ + struct list_head i_fc_dilist; struct list_head i_fc_list; /* * inodes that need fast commit * protected by sbi->s_fc_lock. @@ -1279,7 +1281,7 @@ struct ext4_inode_info { #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le -extern void ext4_set_bits(void *bm, int cur, int len); +extern void mb_set_bits(void *bm, int cur, int len); /* * Maximal mount counts between two filesystem checks @@ -1699,6 +1701,7 @@ struct ext4_sb_info { */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; + u64 s_dax_part_off; #ifdef CONFIG_EXT4_DEBUG unsigned long s_simulate_fail; #endif @@ -1749,6 +1752,7 @@ struct ext4_sb_info { spinlock_t s_fc_lock; struct buffer_head *s_fc_bh; struct ext4_fc_stats s_fc_stats; + tid_t s_fc_ineligible_tid; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif @@ -1794,10 +1798,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) enum { EXT4_MF_MNTDIR_SAMPLED, EXT4_MF_FS_ABORTED, /* Fatal error detected */ - EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ - EXT4_MF_FC_COMMITTING /* File system underoing a fast - * commit. - */ + EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ }; static inline void ext4_set_mount_flag(struct super_block *sb, int bit) @@ -2484,7 +2485,7 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct fscrypt_str cf_name; #endif }; @@ -2720,7 +2721,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, struct ext4_filename *fname); @@ -2753,7 +2754,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif return err; @@ -2772,7 +2773,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); #endif return err; @@ -2789,7 +2790,7 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->usr_fname = NULL; fname->disk_name.name = NULL; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif @@ -2805,7 +2806,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif @@ -2821,7 +2822,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, static inline void ext4_fname_free_filename(struct ext4_filename *fname) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif @@ -2925,7 +2926,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); -void ext4_fc_mark_ineligible(struct super_block *sb, int reason); +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle); void ext4_fc_start_update(struct inode *inode); void ext4_fc_stop_update(struct inode *inode); void ext4_fc_del(struct inode *inode); @@ -2934,6 +2935,9 @@ void ext4_fc_replay_cleanup(struct super_block *sb); int ext4_fc_commit(journal_t *journal, tid_t commit_tid); int __init ext4_fc_init_dentry_cache(void); void ext4_fc_destroy_dentry_cache(void); +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, + int len, int replay); /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; @@ -3406,7 +3410,7 @@ do { \ #define EXT4_FREECLUSTERS_WATERMARK 0 #endif -/* Update i_disksize. Requires i_mutex to avoid races with truncate */ +/* Update i_disksize. Requires i_rwsem to avoid races with truncate */ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) { WARN_ON_ONCE(S_ISREG(inode->i_mode) && @@ -3417,7 +3421,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) up_write(&EXT4_I(inode)->i_data_sem); } -/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ +/* Update i_size, i_disksize. Requires i_rwsem to avoid races with truncate */ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) { int changed = 0; @@ -3705,6 +3709,9 @@ extern int ext4_inode_block_valid(struct inode *inode, unsigned int count); extern int ext4_check_blockref(const char *, unsigned int, struct inode *, __le32 *, unsigned int); +extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count); + /* extents.c */ struct ext4_ext_path; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 0e4fa644df01..db2ae4a2b38d 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -491,7 +491,7 @@ static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks) /* * This function controls whether or not we should try to go down the * dioread_nolock code paths, which makes it safe to avoid taking - * i_mutex for direct I/O reads. This only works for extent-based + * i_rwsem for direct I/O reads. This only works for extent-based * files, and it doesn't work if data journaling is enabled, since the * dioread_nolock code uses b_private to pass information back to the * I/O completion handler, and this conflicts with the jbd's use of diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1077ce7e189f..0d98cf402282 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -27,8 +27,8 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/fiemap.h> -#include <linux/backing-dev.h> #include <linux/iomap.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4_extents.h" #include "xattr.h" @@ -97,7 +97,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by - * i_mutex. So we can safely drop the i_data_sem here. + * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); ext4_discard_preallocations(inode, 0); @@ -3368,7 +3368,6 @@ static int ext4_split_extent(handle_t *handle, return -EFSCORRUPTED; } unwritten = ext4_ext_is_unwritten(ex); - split_flag1 = 0; if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; @@ -4404,8 +4403,7 @@ retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); if (err == -ENOMEM) { - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_ATOMIC); goto retry; } if (err) @@ -4413,8 +4411,7 @@ retry: retry_remove_space: err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); if (err == -ENOMEM) { - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_ATOMIC); goto retry_remove_space; } return err; @@ -4574,7 +4571,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); /* Preallocate the range including the unaligned edges */ @@ -4740,7 +4737,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); @@ -5336,7 +5333,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode, 0); @@ -5476,7 +5473,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; @@ -5573,7 +5570,7 @@ out_mutex: * stuff such as page-cache locking consistency, bh mapping consistency or * extent's data copying must be performed by caller. * Locking: - * i_mutex is held for both inodes + * i_rwsem is held for both inodes * i_data_sem is locked for write for both inodes * Assumptions: * All pages from requested range are locked for both inodes @@ -6093,11 +6090,15 @@ int ext4_ext_clear_bb(struct inode *inode) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + 0, path[j].p_block, 1, 1); } ext4_ext_drop_refs(path); kfree(path); } ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + map.m_lblk, map.m_pblk, map.m_len, 1); } cur = cur + map.m_len; } diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 5ae8026a0c56..3d72565ec6e8 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -199,6 +199,7 @@ void ext4_fc_init_inode(struct inode *inode) ext4_fc_reset_inode(inode); ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); INIT_LIST_HEAD(&ei->i_fc_list); + INIT_LIST_HEAD(&ei->i_fc_dilist); init_waitqueue_head(&ei->i_fc_wait); atomic_set(&ei->i_fc_updates, 0); } @@ -279,6 +280,8 @@ void ext4_fc_stop_update(struct inode *inode) void ext4_fc_del(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_fc_dentry_update *fc_dentry; if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) @@ -286,7 +289,7 @@ void ext4_fc_del(struct inode *inode) restart: spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); - if (list_empty(&ei->i_fc_list)) { + if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) { spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); return; } @@ -295,23 +298,62 @@ restart: ext4_fc_wait_committing_inode(inode); goto restart; } - list_del_init(&ei->i_fc_list); - spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); + + if (!list_empty(&ei->i_fc_list)) + list_del_init(&ei->i_fc_list); + + /* + * Since this inode is getting removed, let's also remove all FC + * dentry create references, since it is not needed to log it anyways. + */ + if (list_empty(&ei->i_fc_dilist)) { + spin_unlock(&sbi->s_fc_lock); + return; + } + + fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist); + WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT); + list_del_init(&fc_dentry->fcd_list); + list_del_init(&fc_dentry->fcd_dilist); + + WARN_ON(!list_empty(&ei->i_fc_dilist)); + spin_unlock(&sbi->s_fc_lock); + + if (fc_dentry->fcd_name.name && + fc_dentry->fcd_name.len > DNAME_INLINE_LEN) + kfree(fc_dentry->fcd_name.name); + kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); + + return; } /* - * Mark file system as fast commit ineligible. This means that next commit - * operation would result in a full jbd2 commit. + * Mark file system as fast commit ineligible, and record latest + * ineligible transaction tid. This means until the recorded + * transaction, commit operation would result in a full jbd2 commit. */ -void ext4_fc_mark_ineligible(struct super_block *sb, int reason) +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) { struct ext4_sb_info *sbi = EXT4_SB(sb); + tid_t tid; if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) return; ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + if (handle && !IS_ERR(handle)) + tid = handle->h_transaction->t_tid; + else { + read_lock(&sbi->s_journal->j_state_lock); + tid = sbi->s_journal->j_running_transaction ? + sbi->s_journal->j_running_transaction->t_tid : 0; + read_unlock(&sbi->s_journal->j_state_lock); + } + spin_lock(&sbi->s_fc_lock); + if (sbi->s_fc_ineligible_tid < tid) + sbi->s_fc_ineligible_tid = tid; + spin_unlock(&sbi->s_fc_lock); WARN_ON(reason >= EXT4_FC_REASON_MAX); sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; } @@ -337,13 +379,6 @@ static int ext4_fc_track_template( tid_t tid = 0; int ret; - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (sbi->s_mount_state & EXT4_FC_REPLAY)) - return -EOPNOTSUPP; - - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) - return -EINVAL; - tid = handle->h_transaction->t_tid; mutex_lock(&ei->i_fc_lock); if (tid == ei->i_sync_tid) { @@ -361,7 +396,8 @@ static int ext4_fc_track_template( spin_lock(&sbi->s_fc_lock); if (list_empty(&EXT4_I(inode)->i_fc_list)) list_add_tail(&EXT4_I(inode)->i_fc_list, - (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? + (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? &sbi->s_fc_q[FC_Q_STAGING] : &sbi->s_fc_q[FC_Q_MAIN]); spin_unlock(&sbi->s_fc_lock); @@ -387,7 +423,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) mutex_unlock(&ei->i_fc_lock); node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -400,7 +436,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_NOMEM); + EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -412,13 +448,28 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) node->fcd_name.name = node->fcd_iname; } node->fcd_name.len = dentry->d_name.len; - + INIT_LIST_HEAD(&node->fcd_dilist); spin_lock(&sbi->s_fc_lock); - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) + if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_STAGING]); else list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); + + /* + * This helps us keep a track of all fc_dentry updates which is part of + * this ext4 inode. So in case the inode is getting unlinked, before + * even we get a chance to fsync, we could remove all fc_dentry + * references while evicting the inode in ext4_fc_del(). + * Also with this, we don't need to loop over all the inodes in + * sbi->s_fc_q to get the corresponding inode in + * ext4_fc_commit_dentry_updates(). + */ + if (dentry_update->op == EXT4_FC_TAG_CREAT) { + WARN_ON(!list_empty(&ei->i_fc_dilist)); + list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist); + } spin_unlock(&sbi->s_fc_lock); mutex_lock(&ei->i_fc_lock); @@ -436,12 +487,22 @@ void __ext4_fc_track_unlink(handle_t *handle, ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); - trace_ext4_fc_track_unlink(inode, dentry, ret); + trace_ext4_fc_track_unlink(handle, inode, dentry, ret); } void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) { - __ext4_fc_track_unlink(handle, d_inode(dentry), dentry); + struct inode *inode = d_inode(dentry); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + + __ext4_fc_track_unlink(handle, inode, dentry); } void __ext4_fc_track_link(handle_t *handle, @@ -455,12 +516,22 @@ void __ext4_fc_track_link(handle_t *handle, ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); - trace_ext4_fc_track_link(inode, dentry, ret); + trace_ext4_fc_track_link(handle, inode, dentry, ret); } void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) { - __ext4_fc_track_link(handle, d_inode(dentry), dentry); + struct inode *inode = d_inode(dentry); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + + __ext4_fc_track_link(handle, inode, dentry); } void __ext4_fc_track_create(handle_t *handle, struct inode *inode, @@ -474,12 +545,22 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); - trace_ext4_fc_track_create(inode, dentry, ret); + trace_ext4_fc_track_create(handle, inode, dentry, ret); } void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) { - __ext4_fc_track_create(handle, d_inode(dentry), dentry); + struct inode *inode = d_inode(dentry); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + + __ext4_fc_track_create(handle, inode, dentry); } /* __track_fn for inode tracking */ @@ -495,6 +576,7 @@ static int __track_inode(struct inode *inode, void *arg, bool update) void ext4_fc_track_inode(handle_t *handle, struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int ret; if (S_ISDIR(inode->i_mode)) @@ -502,12 +584,19 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) if (ext4_should_journal_data(inode)) { ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_INODE_JOURNAL_DATA); + EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); return; } + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); - trace_ext4_fc_track_inode(inode, ret); + trace_ext4_fc_track_inode(handle, inode, ret); } struct __track_range_args { @@ -545,18 +634,26 @@ static int __track_range(struct inode *inode, void *arg, bool update) void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, ext4_lblk_t end) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct __track_range_args args; int ret; if (S_ISDIR(inode->i_mode)) return; + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + args.start = start; args.end = end; ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); - trace_ext4_fc_track_range(inode, start, end, ret); + trace_ext4_fc_track_range(handle, inode, start, end, ret); } static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) @@ -879,7 +976,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) int ret = 0; spin_lock(&sbi->s_fc_lock); - ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING); list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); while (atomic_read(&ei->i_fc_updates)) { @@ -939,7 +1035,7 @@ __releases(&sbi->s_fc_lock) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; struct inode *inode; - struct ext4_inode_info *ei, *ei_n; + struct ext4_inode_info *ei; int ret; if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) @@ -955,21 +1051,16 @@ __releases(&sbi->s_fc_lock) spin_lock(&sbi->s_fc_lock); continue; } - - inode = NULL; - list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN], - i_fc_list) { - if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { - inode = &ei->vfs_inode; - break; - } - } /* - * If we don't find inode in our list, then it was deleted, - * in which case, we don't need to record it's create tag. + * With fcd_dilist we need not loop in sbi->s_fc_q to get the + * corresponding inode pointer */ - if (!inode) - continue; + WARN_ON(list_empty(&fc_dentry->fcd_dilist)); + ei = list_first_entry(&fc_dentry->fcd_dilist, + struct ext4_inode_info, i_fc_dilist); + inode = &ei->vfs_inode; + WARN_ON(inode->i_ino != fc_dentry->fcd_ino); + spin_unlock(&sbi->s_fc_lock); /* @@ -1073,11 +1164,12 @@ out: } static void ext4_fc_update_stats(struct super_block *sb, int status, - u64 commit_time, int nblks) + u64 commit_time, int nblks, tid_t commit_tid) { struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; - jbd_debug(1, "Fast commit ended with status = %d", status); + jbd_debug(1, "Fast commit ended with status = %d for tid %u", + status, commit_tid); if (status == EXT4_FC_STATUS_OK) { stats->fc_num_commits++; stats->fc_numblks += nblks; @@ -1095,7 +1187,7 @@ static void ext4_fc_update_stats(struct super_block *sb, int status, } else { stats->fc_skipped_commits++; } - trace_ext4_fc_commit_stop(sb, nblks, status); + trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid); } /* @@ -1113,13 +1205,13 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; ktime_t start_time, commit_time; - trace_ext4_fc_commit_start(sb); - - start_time = ktime_get(); - if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) return jbd2_complete_transaction(journal, commit_tid); + trace_ext4_fc_commit_start(sb, commit_tid); + + start_time = ktime_get(); + restart_fc: ret = jbd2_fc_begin_commit(journal, commit_tid); if (ret == -EALREADY) { @@ -1127,14 +1219,16 @@ restart_fc: if (atomic_read(&sbi->s_fc_subtid) <= subtid && commit_tid > journal->j_commit_sequence) goto restart_fc; - ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0); + ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, + commit_tid); return 0; } else if (ret) { /* * Commit couldn't start. Just update stats and perform a * full commit. */ - ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0); + ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0, + commit_tid); return jbd2_complete_transaction(journal, commit_tid); } @@ -1166,12 +1260,12 @@ restart_fc: * don't react too strongly to vast changes in the commit time */ commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); - ext4_fc_update_stats(sb, status, commit_time, nblks); + ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid); return ret; fallback: ret = jbd2_fc_end_commit_fallback(journal); - ext4_fc_update_stats(sb, status, 0, 0); + ext4_fc_update_stats(sb, status, 0, 0, commit_tid); return ret; } @@ -1179,7 +1273,7 @@ fallback: * Fast commit cleanup routine. This is called after every fast commit and * full commit. full is true if we are called after a full commit. */ -static void ext4_fc_cleanup(journal_t *journal, int full) +static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) { struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1189,6 +1283,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) if (full && sbi->s_fc_bh) sbi->s_fc_bh = NULL; + trace_ext4_fc_cleanup(journal, full, tid); jbd2_fc_release_bufs(journal); spin_lock(&sbi->s_fc_lock); @@ -1197,7 +1292,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_del_init(&iter->i_fc_list); ext4_clear_inode_state(&iter->vfs_inode, EXT4_STATE_FC_COMMITTING); - ext4_fc_reset_inode(&iter->vfs_inode); + if (iter->i_sync_tid <= tid) + ext4_fc_reset_inode(&iter->vfs_inode); /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ smp_mb(); #if (BITS_PER_LONG < 64) @@ -1212,6 +1308,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) struct ext4_fc_dentry_update, fcd_list); list_del_init(&fc_dentry->fcd_list); + list_del_init(&fc_dentry->fcd_dilist); spin_unlock(&sbi->s_fc_lock); if (fc_dentry->fcd_name.name && @@ -1226,8 +1323,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], &sbi->s_fc_q[FC_Q_MAIN]); - ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); - ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + if (tid >= sbi->s_fc_ineligible_tid) { + sbi->s_fc_ineligible_tid = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + } if (full) sbi->s_fc_bytes = 0; @@ -1392,14 +1491,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) if (state->fc_modified_inodes[i] == ino) return 0; if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { - state->fc_modified_inodes_size += - EXT4_FC_REPLAY_REALLOC_INCREMENT; state->fc_modified_inodes = krealloc( - state->fc_modified_inodes, sizeof(int) * - state->fc_modified_inodes_size, - GFP_KERNEL); + state->fc_modified_inodes, + sizeof(int) * (state->fc_modified_inodes_size + + EXT4_FC_REPLAY_REALLOC_INCREMENT), + GFP_KERNEL); if (!state->fc_modified_inodes) return -ENOMEM; + state->fc_modified_inodes_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; } state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; return 0; @@ -1431,7 +1531,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, } inode = NULL; - ext4_fc_record_modified_inode(sb, ino); + ret = ext4_fc_record_modified_inode(sb, ino); + if (ret) + goto out; raw_fc_inode = (struct ext4_inode *) (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); @@ -1563,16 +1665,23 @@ out: } /* - * Record physical disk regions which are in use as per fast commit area. Our - * simple replay phase allocator excludes these regions from allocation. + * Record physical disk regions which are in use as per fast commit area, + * and used by inodes during replay phase. Our simple replay phase + * allocator excludes these regions from allocation. */ -static int ext4_fc_record_regions(struct super_block *sb, int ino, - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) { struct ext4_fc_replay_state *state; struct ext4_fc_alloc_region *region; state = &EXT4_SB(sb)->s_fc_replay_state; + /* + * during replay phase, the fc_regions_valid may not same as + * fc_regions_used, update it when do new additions. + */ + if (replay && state->fc_regions_used != state->fc_regions_valid) + state->fc_regions_used = state->fc_regions_valid; if (state->fc_regions_used == state->fc_regions_size) { state->fc_regions_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; @@ -1590,6 +1699,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino, region->pblk = pblk; region->len = len; + if (replay) + state->fc_regions_valid++; + return 0; } @@ -1621,6 +1733,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; start = le32_to_cpu(ex->ee_block); start_pblk = ext4_ext_pblock(ex); @@ -1638,18 +1752,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb, map.m_pblk = 0; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret == 0) { /* Range is not mapped */ path = ext4_find_extent(inode, cur, NULL, 0); - if (IS_ERR(path)) { - iput(inode); - return 0; - } + if (IS_ERR(path)) + goto out; memset(&newex, 0, sizeof(newex)); newex.ee_block = cpu_to_le32(cur); ext4_ext_store_pblock( @@ -1663,10 +1773,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, up_write((&EXT4_I(inode)->i_data_sem)); ext4_ext_drop_refs(path); kfree(path); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; goto next; } @@ -1679,10 +1787,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), start_pblk + cur - start); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * Mark the old blocks as free since they aren't used * anymore. We maintain an array of all the modified @@ -1702,10 +1808,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ext4_ext_is_unwritten(ex), map.m_pblk); ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), map.m_pblk); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * We may have split the extent tree while toggling the state. * Try to shrink the extent tree now. @@ -1717,6 +1821,7 @@ next: } ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); +out: iput(inode); return 0; } @@ -1746,6 +1851,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", inode->i_ino, le32_to_cpu(lrange.fc_lblk), @@ -1755,10 +1862,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, map.m_len = remaining; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret > 0) { remaining -= ret; cur += ret; @@ -1770,18 +1875,17 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } down_write(&EXT4_I(inode)->i_data_sem); - ret = ext4_ext_remove_space(inode, lrange.fc_lblk, - lrange.fc_lblk + lrange.fc_len - 1); + ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), + le32_to_cpu(lrange.fc_lblk) + + le32_to_cpu(lrange.fc_len) - 1); up_write(&EXT4_I(inode)->i_data_sem); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); ext4_mark_inode_dirty(NULL, inode); +out: iput(inode); - return 0; } @@ -1852,8 +1956,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) if (state->fc_regions[i].ino == 0 || state->fc_regions[i].len == 0) continue; - if (blk >= state->fc_regions[i].pblk && - blk < state->fc_regions[i].pblk + state->fc_regions[i].len) + if (in_range(blk, state->fc_regions[i].pblk, + state->fc_regions[i].len)) return true; } return false; @@ -1937,7 +2041,7 @@ static int ext4_fc_replay_scan(journal_t *journal, ret = ext4_fc_record_regions(sb, le32_to_cpu(ext.fc_ino), le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), - ext4_ext_get_actual_len(ex)); + ext4_ext_get_actual_len(ex), 0); if (ret < 0) break; ret = JBD2_FC_REPLAY_CONTINUE; diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 083ad1cb705a..80414dcba6e1 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -93,7 +93,6 @@ enum { EXT4_FC_REASON_RENAME_DIR, EXT4_FC_REASON_FALLOC_RANGE, EXT4_FC_REASON_INODE_JOURNAL_DATA, - EXT4_FC_COMMIT_FAILED, EXT4_FC_REASON_MAX }; @@ -109,6 +108,7 @@ struct ext4_fc_dentry_update { struct qstr fcd_name; /* Dirent name */ unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ struct list_head fcd_list; + struct list_head fcd_dilist; }; struct ext4_fc_stats { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8cc11715518a..8bd66cdc41be 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -36,9 +36,11 @@ #include "acl.h" #include "truncate.h" -static bool ext4_dio_supported(struct inode *inode) +static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter) { - if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode)) + struct inode *inode = file_inode(iocb->ki_filp); + + if (!fscrypt_dio_supported(iocb, iter)) return false; if (fsverity_active(inode)) return false; @@ -61,7 +63,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) inode_lock_shared(inode); } - if (!ext4_dio_supported(inode)) { + if (!ext4_dio_supported(iocb, to)) { inode_unlock_shared(inode); /* * Fallback to buffered I/O if the operation being performed on @@ -509,7 +511,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) } /* Fallback to buffered I/O if the inode does not support direct I/O. */ - if (!ext4_dio_supported(inode)) { + if (!ext4_dio_supported(iocb, from)) { if (ilock_shared) inode_unlock_shared(inode); else diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index f34f4176c1e7..147b5241dd94 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -290,7 +290,7 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len, int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 89efa78ed4b2..07a8c75b65ed 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by - * i_mutex. So we can safely drop the i_data_sem here. + * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); ext4_discard_preallocations(inode, 0); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 39a1ab129fdc..9c076262770d 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -7,7 +7,7 @@ #include <linux/iomap.h> #include <linux/fiemap.h> #include <linux/iversion.h> -#include <linux/backing-dev.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4.h" @@ -911,7 +911,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, struct page **pagep, void **fsdata) { - int ret, inline_size; + int ret; handle_t *handle; struct page *page; struct ext4_iloc iloc; @@ -928,14 +928,9 @@ retry_journal: goto out; } - inline_size = ext4_get_max_inline_size(inode); - - ret = -ENOSPC; - if (inline_size >= pos + len) { - ret = ext4_prepare_inline_data(handle, inode, pos + len); - if (ret && ret != -ENOSPC) - goto out_journal; - } + ret = ext4_prepare_inline_data(handle, inode, pos + len); + if (ret && ret != -ENOSPC) + goto out_journal; /* * We cannot recurse into the filesystem as the transaction @@ -1133,7 +1128,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { - ext4_create_inline_data(handle, inode, inline_size); + int ret; + + ret = ext4_create_inline_data(handle, inode, inline_size); + if (ret) { + ext4_msg(inode->i_sb, KERN_EMERG, + "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", + inode->i_ino, ret); + return; + } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } @@ -1780,19 +1783,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; - bool ret = true; + bool ret = false; err = ext4_get_inode_loc(dir, &iloc); if (err) { EXT4_ERROR_INODE_ERR(dir, -err, "error %d getting inode %lu block", err, dir->i_ino); - return true; + return false; } down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; + ret = true; goto out; } @@ -1801,7 +1805,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); - ret = true; goto out; } @@ -1820,16 +1823,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); - ret = true; goto out; } if (le32_to_cpu(de->inode)) { - ret = false; goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); } + ret = true; out: up_read(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); @@ -1929,8 +1931,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) retry: err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); if (err == -ENOMEM) { - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_ATOMIC); goto retry; } if (err) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9dbeb772de60..1ce13f69fbec 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -41,6 +41,7 @@ #include <linux/bitops.h> #include <linux/iomap.h> #include <linux/iversion.h> +#include <linux/dax.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -136,8 +137,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, new_size); } -static void ext4_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int pextents); @@ -185,7 +184,7 @@ void ext4_evict_inode(struct inode *inode) * journal. So although mm thinks everything is clean and * ready for reaping the inode might still have some pages to * write in the running transaction or waiting to be - * checkpointed. Thus calling jbd2_journal_invalidatepage() + * checkpointed. Thus calling jbd2_journal_invalidate_folio() * (via truncate_inode_pages()) to discard these buffers can * cause data loss. Also even if we did not discard these * buffers, we would have no way to find them after the inode @@ -337,7 +336,7 @@ stop_handle: return; no_delete: if (!list_empty(&EXT4_I(inode)->i_fc_list)) - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } @@ -1223,7 +1222,7 @@ retry_journal: /* * __block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. + * i_size_read because we hold i_rwsem. * * Add inode to orphan list in case we crash before * truncate finishes @@ -1570,16 +1569,18 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + struct folio *folio = page_folio(page); - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); + BUG_ON(!folio_test_locked(folio)); + BUG_ON(folio_test_writeback(folio)); if (invalidate) { - if (page_mapped(page)) - clear_page_dirty_for_io(page); - block_invalidatepage(page, 0, PAGE_SIZE); - ClearPageUptodate(page); + if (folio_mapped(folio)) + folio_clear_dirty_for_io(folio); + block_invalidate_folio(folio, 0, + folio_size(folio)); + folio_clear_uptodate(folio); } - unlock_page(page); + folio_unlock(folio); } pagevec_release(&pvec); } @@ -1970,6 +1971,7 @@ out_no_pagelock: static int ext4_writepage(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); int ret = 0; loff_t size; unsigned int len; @@ -1979,8 +1981,8 @@ static int ext4_writepage(struct page *page, bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); - unlock_page(page); + folio_invalidate(folio, 0, folio_size(folio)); + folio_unlock(folio); return -EIO; } @@ -1992,6 +1994,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2593,6 +2604,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/[email protected] + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; @@ -3181,40 +3208,39 @@ static void ext4_readahead(struct readahead_control *rac) ext4_mpage_readpages(inode, rac, NULL); } -static void ext4_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) +static void ext4_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - trace_ext4_invalidatepage(page, offset, length); + trace_ext4_invalidate_folio(folio, offset, length); /* No journalling happens on data buffers when this function is used */ - WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); + WARN_ON(folio_buffers(folio) && buffer_jbd(folio_buffers(folio))); - block_invalidatepage(page, offset, length); + block_invalidate_folio(folio, offset, length); } -static int __ext4_journalled_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +static int __ext4_journalled_invalidate_folio(struct folio *folio, + size_t offset, size_t length) { - journal_t *journal = EXT4_JOURNAL(page->mapping->host); + journal_t *journal = EXT4_JOURNAL(folio->mapping->host); - trace_ext4_journalled_invalidatepage(page, offset, length); + trace_ext4_journalled_invalidate_folio(folio, offset, length); /* * If it's a full truncate we just forget about the pending dirtying */ - if (offset == 0 && length == PAGE_SIZE) - ClearPageChecked(page); + if (offset == 0 && length == folio_size(folio)) + folio_clear_checked(folio); - return jbd2_journal_invalidatepage(journal, page, offset, length); + return jbd2_journal_invalidate_folio(journal, folio, offset, length); } /* Wrapper for aops... */ -static void ext4_journalled_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +static void ext4_journalled_invalidate_folio(struct folio *folio, + size_t offset, + size_t length) { - WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); + WARN_ON(__ext4_journalled_invalidate_folio(folio, offset, length) < 0); } static int ext4_releasepage(struct page *page, gfp_t wait) @@ -3253,7 +3279,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, struct ext4_map_blocks *map, loff_t offset, - loff_t length) + loff_t length, unsigned int flags) { u8 blkbits = inode->i_blkbits; @@ -3270,8 +3296,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, if (map->m_flags & EXT4_MAP_NEW) iomap->flags |= IOMAP_F_NEW; - iomap->bdev = inode->i_sb->s_bdev; - iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; + if (flags & IOMAP_DAX) + iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; + else + iomap->bdev = inode->i_sb->s_bdev; iomap->offset = (u64) map->m_lblk << blkbits; iomap->length = (u64) map->m_len << blkbits; @@ -3291,9 +3319,13 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, if (map->m_flags & EXT4_MAP_UNWRITTEN) { iomap->type = IOMAP_UNWRITTEN; iomap->addr = (u64) map->m_pblk << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; } else if (map->m_flags & EXT4_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; iomap->addr = (u64) map->m_pblk << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; @@ -3330,8 +3362,8 @@ retry: * DAX and direct I/O are the only two operations that are currently * supported with IOMAP_WRITE. */ - WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT)); - if (IS_DAX(inode)) + WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT))); + if (flags & IOMAP_DAX) m_flags = EXT4_GET_BLOCKS_CREATE_ZERO; /* * We use i_size instead of i_disksize here because delalloc writeback @@ -3402,7 +3434,14 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret < 0) return ret; out: - ext4_set_iomap(inode, iomap, &map, offset, length); + /* + * When inline encryption is enabled, sometimes I/O to an encrypted file + * has to be broken up to guarantee DUN contiguity. Handle this by + * limiting the length of the mapping returned. + */ + map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); + + ext4_set_iomap(inode, iomap, &map, offset, length, flags); return 0; } @@ -3522,7 +3561,7 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, delalloc = ext4_iomap_is_delalloc(inode, &map); set_iomap: - ext4_set_iomap(inode, iomap, &map, offset, length); + ext4_set_iomap(inode, iomap, &map, offset, length, flags); if (delalloc && iomap->type == IOMAP_HOLE) iomap->type = IOMAP_DELALLOC; @@ -3534,29 +3573,32 @@ const struct iomap_ops ext4_iomap_report_ops = { }; /* - * Pages can be marked dirty completely asynchronously from ext4's journalling - * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do - * much here because ->set_page_dirty is called under VFS locks. The page is - * not necessarily locked. + * Whenever the folio is being dirtied, corresponding buffers should already + * be attached to the transaction (we take care of this in ext4_page_mkwrite() + * and ext4_write_begin()). However we cannot move buffers to dirty transaction + * lists here because ->dirty_folio is called under VFS locks and the folio + * is not necessarily locked. * - * We cannot just dirty the page and leave attached buffers clean, because the + * We cannot just dirty the folio and leave attached buffers clean, because the * buffers' dirty state is "definitive". We cannot just set the buffers dirty * or jbddirty because all the journalling code will explode. * - * So what we do is to mark the page "pending dirty" and next time writepage + * So what we do is to mark the folio "pending dirty" and next time writepage * is called, propagate that into the buffers appropriately. */ -static int ext4_journalled_set_page_dirty(struct page *page) +static bool ext4_journalled_dirty_folio(struct address_space *mapping, + struct folio *folio) { - SetPageChecked(page); - return __set_page_dirty_nobuffers(page); + WARN_ON_ONCE(!page_has_buffers(&folio->page)); + folio_set_checked(folio); + return filemap_dirty_folio(mapping, folio); } -static int ext4_set_page_dirty(struct page *page) +static bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio) { - WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page)); - WARN_ON_ONCE(!page_has_buffers(page)); - return __set_page_dirty_buffers(page); + WARN_ON_ONCE(!folio_test_locked(folio) && !folio_test_dirty(folio)); + WARN_ON_ONCE(!folio_buffers(folio)); + return block_dirty_folio(mapping, folio); } static int ext4_iomap_swap_activate(struct swap_info_struct *sis, @@ -3573,9 +3615,9 @@ static const struct address_space_operations ext4_aops = { .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_write_end, - .set_page_dirty = ext4_set_page_dirty, + .dirty_folio = ext4_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, + .invalidate_folio = ext4_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, @@ -3591,9 +3633,9 @@ static const struct address_space_operations ext4_journalled_aops = { .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_journalled_write_end, - .set_page_dirty = ext4_journalled_set_page_dirty, + .dirty_folio = ext4_journalled_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_journalled_invalidatepage, + .invalidate_folio = ext4_journalled_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .is_partially_uptodate = block_is_partially_uptodate, @@ -3608,9 +3650,9 @@ static const struct address_space_operations ext4_da_aops = { .writepages = ext4_writepages, .write_begin = ext4_da_write_begin, .write_end = ext4_da_write_end, - .set_page_dirty = ext4_set_page_dirty, + .dirty_folio = ext4_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, + .invalidate_folio = ext4_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, @@ -3622,9 +3664,8 @@ static const struct address_space_operations ext4_da_aops = { static const struct address_space_operations ext4_dax_aops = { .writepages = ext4_dax_writepages, .direct_IO = noop_direct_IO, - .set_page_dirty = __set_page_dirty_no_writeback, + .dirty_folio = noop_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = noop_invalidatepage, .swap_activate = ext4_iomap_swap_activate, }; @@ -3762,8 +3803,8 @@ static int ext4_block_zero_page_range(handle_t *handle, length = max; if (IS_DAX(inode)) { - return iomap_zero_range(inode, from, length, NULL, - &ext4_iomap_ops); + return dax_zero_range(inode, from, length, NULL, + &ext4_iomap_ops); } return __ext4_block_zero_page_range(handle, mapping, from, length); } @@ -3972,7 +4013,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); /* @@ -4122,7 +4163,7 @@ int ext4_truncate(struct inode *inode) /* * There is a possibility that we're either freeing the inode * or it's a completely new inode. In those cases we might not - * have i_mutex locked because it's not necessary. + * have i_rwsem locked because it's not necessary. */ if (!(inode->i_state & (I_NEW|I_FREEING))) WARN_ON(!inode_is_locked(inode)); @@ -5197,13 +5238,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) } /* - * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate - * buffers that are attached to a page stradding i_size and are undergoing + * In data=journal mode ext4_journalled_invalidate_folio() may fail to invalidate + * buffers that are attached to a folio straddling i_size and are undergoing * commit. In that case we have to wait for commit to finish and try again. */ static void ext4_wait_for_tail_page_commit(struct inode *inode) { - struct page *page; unsigned offset; journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = 0; @@ -5211,25 +5251,25 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) offset = inode->i_size & (PAGE_SIZE - 1); /* - * If the page is fully truncated, we don't need to wait for any commit - * (and we even should not as __ext4_journalled_invalidatepage() may - * strip all buffers from the page but keep the page dirty which can then - * confuse e.g. concurrent ext4_writepage() seeing dirty page without + * If the folio is fully truncated, we don't need to wait for any commit + * (and we even should not as __ext4_journalled_invalidate_folio() may + * strip all buffers from the folio but keep the folio dirty which can then + * confuse e.g. concurrent ext4_writepage() seeing dirty folio without * buffers). Also we don't need to wait for any commit if all buffers in - * the page remain valid. This is most beneficial for the common case of + * the folio remain valid. This is most beneficial for the common case of * blocksize == PAGESIZE. */ if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) return; while (1) { - page = find_lock_page(inode->i_mapping, + struct folio *folio = filemap_lock_folio(inode->i_mapping, inode->i_size >> PAGE_SHIFT); - if (!page) + if (!folio) return; - ret = __ext4_journalled_invalidatepage(page, offset, - PAGE_SIZE - offset); - unlock_page(page); - put_page(page); + ret = __ext4_journalled_invalidate_folio(folio, offset, + folio_size(folio) - offset); + folio_unlock(folio); + folio_put(folio); if (ret != -EBUSY) return; commit_tid = 0; @@ -5264,7 +5304,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) * transaction are already on disk (truncate waits for pages under * writeback). * - * Called with inode->i_mutex down. + * Called with inode->i_rwsem down. */ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *attr) @@ -5976,7 +6016,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return PTR_ERR(handle); ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); + EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle); err = ext4_mark_inode_dirty(handle, inode); ext4_handle_sync(handle); ext4_journal_stop(handle); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bbbedf27b71c..992229ca2d83 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -269,7 +269,7 @@ out: return err ? err : 0; } -/** +/* * Swap memory between @a and @b for @len bytes. * * @a: pointer to first memory area @@ -290,7 +290,7 @@ static void memswap(void *a, void *b, size_t len) } } -/** +/* * Swap i_data and associated attributes between @inode1 and @inode2. * This function is used for the primary swap between inode1 and inode2 * and also to revert this primary swap in case of errors. @@ -344,7 +344,7 @@ void ext4_reset_inode_seed(struct inode *inode) ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen)); } -/** +/* * Swap the information from the given @inode and the inode * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other * important fields of the inodes. @@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb, err = -EINVAL; goto err_out; } - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle); /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); @@ -1373,7 +1373,7 @@ mext_out: err = ext4_resize_fs(sb, n_blocks_count); if (EXT4_SB(sb)->s_journal) { - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cf2fd9fc7d98..252c168454c7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1000,7 +1000,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac) return 0; if (ac->ac_criteria >= 2) return 0; - if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) + if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) return 0; return 1; } @@ -1689,7 +1689,7 @@ static int mb_test_and_clear_bits(void *bm, int cur, int len) return zero_bit; } -void ext4_set_bits(void *bm, int cur, int len) +void mb_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1996,7 +1996,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info); - ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); + mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -2834,7 +2834,7 @@ out: static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) @@ -2845,7 +2845,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; ++*pos; @@ -2857,7 +2857,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err, buddy_loaded = 0; @@ -2985,7 +2985,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos) __acquires(&EXT4_SB(sb)->s_mb_rb_lock) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; read_lock(&EXT4_SB(sb)->s_mb_rb_lock); @@ -2998,7 +2998,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock) static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; ++*pos; @@ -3010,7 +3010,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long position = ((unsigned long) v); struct ext4_group_info *grp; @@ -3058,7 +3058,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v) static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v) __releases(&EXT4_SB(sb)->s_mb_rb_lock) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); read_unlock(&EXT4_SB(sb)->s_mb_rb_lock); } @@ -3825,7 +3825,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -3844,7 +3844,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); if (ext4_has_group_desc_csum(sb) && (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { @@ -3899,69 +3899,103 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; - int i, clen, err; + int i, err; int already; + unsigned int clen, clen_changed, thisgrp_len; - clen = EXT4_B2C(sbi, len); + while (len > 0) { + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto out_err; - } + /* + * Check to see if we are freeing blocks across a group + * boundary. + * In case of flex_bg, this can happen that (block, len) may + * span across more than one group. In that case we need to + * get the corresponding group metadata to work with. + * For this we have goto again loop. + */ + thisgrp_len = min_t(unsigned int, (unsigned int)len, + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); + clen = EXT4_NUM_B2C(sbi, thisgrp_len); + + if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) { + ext4_error(sb, "Marking blocks in system zone - " + "Block = %llu, len = %u", + block, thisgrp_len); + bitmap_bh = NULL; + break; + } - err = -EIO; - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - goto out_err; + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; + break; + } - ext4_lock_group(sb, group); - already = 0; - for (i = 0; i < clen; i++) - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) - already++; + err = -EIO; + gdp = ext4_get_group_desc(sb, group, &gdp_bh); + if (!gdp) + break; - if (state) - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); - else - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, - group, gdp)); - } - if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen + already; - else - clen = ext4_free_group_clusters(sb, gdp) + clen - already; + ext4_lock_group(sb, group); + already = 0; + for (i = 0; i < clen; i++) + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == + !state) + already++; + + clen_changed = clen - already; + if (state) + mb_set_bits(bitmap_bh->b_data, blkoff, clen); + else + mb_clear_bits(bitmap_bh->b_data, blkoff, clen); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + } + if (state) + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; + else + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; - ext4_free_group_clusters_set(sb, gdp, clen); - ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); + ext4_free_group_clusters_set(sb, gdp, clen); + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); - ext4_unlock_group(sb, group); + ext4_unlock_group(sb, group); - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group); + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); - atomic64_sub(len, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); + + } + + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); + if (err) + break; + sync_dirty_buffer(bitmap_bh); + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); + sync_dirty_buffer(gdp_bh); + if (err) + break; + + block += thisgrp_len; + len -= thisgrp_len; + brelse(bitmap_bh); + BUG_ON(len < 0); } - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); if (err) - goto out_err; - sync_dirty_buffer(bitmap_bh); - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(gdp_bh); - -out_err: - brelse(bitmap_bh); + brelse(bitmap_bh); } /* @@ -4433,7 +4467,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, efd_node); - ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); + mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); n = rb_next(n); } return; @@ -4474,7 +4508,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - ext4_set_bits(bitmap, start, len); + mb_set_bits(bitmap, start, len); preallocated += len; } mb_debug(sb, "preallocated %d for group %u\n", preallocated, group); @@ -5753,7 +5787,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, struct super_block *sb = ar->inode->i_sb; ext4_group_t group; ext4_grpblk_t blkoff; - int i = sb->s_blocksize; + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); + ext4_grpblk_t i = 0; ext4_fsblk_t goal, block; struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -5775,19 +5810,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, ext4_get_group_no_and_offset(sb, max(ext4_group_first_block_no(sb, group), goal), NULL, &blkoff); - i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize, + while (1) { + i = mb_find_next_zero_bit(bitmap_bh->b_data, max, blkoff); + if (i >= max) + break; + if (ext4_fc_replay_check_excluded(sb, + ext4_group_first_block_no(sb, group) + i)) { + blkoff = i + 1; + } else + break; + } brelse(bitmap_bh); - if (i >= sb->s_blocksize) - continue; - if (ext4_fc_replay_check_excluded(sb, - ext4_group_first_block_no(sb, group) + i)) - continue; - break; + if (i < max) + break; } - if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize) + if (group >= ext4_get_groups_count(sb) || i >= max) { + *errp = -ENOSPC; return 0; + } block = ext4_group_first_block_no(sb, group) + i; ext4_mb_mark_bb(sb, block, 1, 1); @@ -5838,17 +5880,17 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, } /** - * ext4_free_blocks() -- Free given blocks and update quota + * ext4_mb_clear_bb() -- helper function for freeing blocks. + * Used by ext4_free_blocks() * @handle: handle for this transaction * @inode: inode - * @bh: optional buffer of the block to be freed * @block: starting physical block to be freed * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ -void ext4_free_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t block, - unsigned long count, int flags) +static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, + int flags) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; @@ -5865,80 +5907,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); - if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); - return; - } - - might_sleep(); - if (bh) { - if (block) - BUG_ON(block != bh->b_blocknr); - else - block = bh->b_blocknr; - } - - if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && - !ext4_inode_block_valid(inode, block, count)) { - ext4_error(sb, "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); - goto error_return; - } - - ext4_debug("freeing block %llu\n", block); - trace_ext4_free_blocks(inode, block, count, flags); - - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - BUG_ON(count > 1); - - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, - inode, bh, block); - } - - /* - * If the extent to be freed does not begin on a cluster - * boundary, we need to deal with partial clusters at the - * beginning and end of the extent. Normally we will free - * blocks at the beginning or the end unless we are explicitly - * requested to avoid doing so. - */ - overflow = EXT4_PBLK_COFF(sbi, block); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { - overflow = sbi->s_cluster_ratio - overflow; - block += overflow; - if (count > overflow) - count -= overflow; - else - return; - } else { - block -= overflow; - count += overflow; - } - } - overflow = EXT4_LBLK_COFF(sbi, count); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { - if (count > overflow) - count -= overflow; - else - return; - } else - count += sbi->s_cluster_ratio - overflow; - } - - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - int i; - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; - - for (i = 0; i < count; i++) { - cond_resched(); - if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); - ext4_forget(handle, is_metadata, inode, bh, block + i); - } - } - do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -5969,13 +5937,7 @@ do_more: goto error_return; } - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || - in_range(ext4_inode_bitmap(sb, gdp), block, count) || - in_range(block, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group)) { - + if (!ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ @@ -6046,7 +6008,7 @@ do_more: NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" - " group:%d block:%d count:%lu failed" + " group:%u block:%d count:%lu failed" " with %d", block_group, bit, count, err); } else @@ -6107,6 +6069,103 @@ error_return: } /** + * ext4_free_blocks() -- Free given blocks and update quota + * @handle: handle for this transaction + * @inode: inode + * @bh: optional buffer of the block to be freed + * @block: starting physical block to be freed + * @count: number of blocks to be freed + * @flags: flags used by ext4_free_blocks + */ +void ext4_free_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags) +{ + struct super_block *sb = inode->i_sb; + unsigned int overflow; + struct ext4_sb_info *sbi; + + sbi = EXT4_SB(sb); + + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + ext4_free_blocks_simple(inode, block, count); + return; + } + + might_sleep(); + if (bh) { + if (block) + BUG_ON(block != bh->b_blocknr); + else + block = bh->b_blocknr; + } + + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_inode_block_valid(inode, block, count)) { + ext4_error(sb, "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); + return; + } + + ext4_debug("freeing block %llu\n", block); + trace_ext4_free_blocks(inode, block, count, flags); + + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + BUG_ON(count > 1); + + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block); + } + + /* + * If the extent to be freed does not begin on a cluster + * boundary, we need to deal with partial clusters at the + * beginning and end of the extent. Normally we will free + * blocks at the beginning or the end unless we are explicitly + * requested to avoid doing so. + */ + overflow = EXT4_PBLK_COFF(sbi, block); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { + overflow = sbi->s_cluster_ratio - overflow; + block += overflow; + if (count > overflow) + count -= overflow; + else + return; + } else { + block -= overflow; + count += overflow; + } + } + overflow = EXT4_LBLK_COFF(sbi, count); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { + if (count > overflow) + count -= overflow; + else + return; + } else + count += sbi->s_cluster_ratio - overflow; + } + + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + int i; + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; + + for (i = 0; i < count; i++) { + cond_resched(); + if (is_metadata) + bh = sb_find_get_block(inode->i_sb, block + i); + ext4_forget(handle, is_metadata, inode, bh, block + i); + } + } + + ext4_mb_clear_bb(handle, inode, block, count, flags); + return; +} + +/** * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block @@ -6162,11 +6221,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, goto error_return; } - if (in_range(ext4_block_bitmap(sb, desc), block, count) || - in_range(ext4_inode_bitmap(sb, desc), block, count) || - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, desc), - sbi->s_itb_per_group)) { + if (!ext4_sb_block_valid(sb, NULL, block, count)) { ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index ff8916e1d38e..7a5353a8cfd7 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -485,7 +485,7 @@ int ext4_ext_migrate(struct inode *inode) * when we add extents we extent the journal */ /* - * Even though we take i_mutex we can still cause block + * Even though we take i_rwsem we can still cause block * allocation via mmap write to holes. If we have allocated * new blocks we fail migrate. New block allocation will * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 52c9bd154122..e37da8d5cd0c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1317,7 +1317,7 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) dx_set_count(entries, count + 1); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Test whether a case-insensitive directory entry matches the filename * being searched for. If quick is set, assume the name being looked up @@ -1428,7 +1428,7 @@ static bool ext4_match(struct inode *parent, f.crypto_buf = fname->crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) && (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { if (fname->cf_name.name) { @@ -1800,7 +1800,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi } } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (!inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as @@ -2308,7 +2308,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) && sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; @@ -2997,14 +2997,14 @@ bool ext4_empty_dir(struct inode *inode) if (inode->i_size < ext4_dir_rec_len(1, NULL) + ext4_dir_rec_len(2, NULL)) { EXT4_ERROR_INODE(inode, "invalid size"); - return true; + return false; } /* The first directory block must not be a hole, * so treat it as DIRENT_HTREE */ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) - return true; + return false; de = (struct ext4_dir_entry_2 *) bh->b_data; if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, @@ -3012,7 +3012,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); - return true; + return false; } offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize); @@ -3021,7 +3021,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { ext4_warning_inode(inode, "directory missing '..'"); brelse(bh); - return true; + return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { @@ -3035,7 +3035,7 @@ bool ext4_empty_dir(struct inode *inode) continue; } if (IS_ERR(bh)) - return true; + return false; } de = (struct ext4_dir_entry_2 *) (bh->b_data + (offset & (sb->s_blocksize - 1))); @@ -3126,7 +3126,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) ext4_fc_track_unlink(handle, dentry); retval = ext4_mark_inode_dirty(handle, dir); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the @@ -3231,7 +3231,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); if (!retval) ext4_fc_track_unlink(handle, dentry); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the @@ -3889,14 +3889,21 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, * dirents in directories. */ ext4_fc_mark_ineligible(old.inode->i_sb, - EXT4_FC_REASON_RENAME_DIR); + EXT4_FC_REASON_RENAME_DIR, handle); } else { + struct super_block *sb = old.inode->i_sb; + if (new.inode) ext4_fc_track_unlink(handle, new.dentry); - __ext4_fc_track_link(handle, old.inode, new.dentry); - __ext4_fc_track_unlink(handle, old.inode, old.dentry); - if (whiteout) - __ext4_fc_track_create(handle, whiteout, old.dentry); + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && + !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) && + !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) { + __ext4_fc_track_link(handle, old.inode, new.dentry); + __ext4_fc_track_unlink(handle, old.inode, old.dentry); + if (whiteout) + __ext4_fc_track_create(handle, whiteout, + old.dentry); + } } if (new.inode) { @@ -4049,7 +4056,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(retval)) goto end_rename; ext4_fc_mark_ineligible(new.inode->i_sb, - EXT4_FC_REASON_CROSS_RENAME); + EXT4_FC_REASON_CROSS_RENAME, handle); if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 53adc8f570a3..7de0612eb42d 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -93,7 +93,7 @@ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode) * At filesystem recovery time, we walk this list deleting unlinked * inodes and truncating linked inodes in ext4_orphan_cleanup(). * - * Orphan list manipulation functions must be called under i_mutex unless + * Orphan list manipulation functions must be called under i_rwsem unless * we are just creating the inode or deleting it. */ int ext4_orphan_add(handle_t *handle, struct inode *inode) @@ -119,7 +119,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* * Orphan handling is only valid for files with data blocks * being truncated, or files being unlinked. Note that we either - * hold i_mutex, or the inode can not be referenced from outside, + * hold i_rwsem, or the inode can not be referenced from outside, * so i_nlink should not be bumped due to race */ ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 9cb261714991..17bb78ebd784 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -24,7 +24,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> -#include <linux/backing-dev.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -323,10 +323,9 @@ static void ext4_end_bio(struct bio *bio) { ext4_io_end_t *io_end = bio->bi_private; sector_t bi_sector = bio->bi_iter.bi_sector; - char b[BDEVNAME_SIZE]; - if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n", - bio_devname(bio, b), + if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n", + bio->bi_bdev, (long long) bio->bi_iter.bi_sector, (unsigned) bio_sectors(bio), bio->bi_status)) { @@ -372,10 +371,9 @@ void ext4_io_submit(struct ext4_io_submit *io) struct bio *bio = io->io_bio; if (bio) { - int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? - REQ_SYNC : 0; + if (io->io_wbc->sync_mode == WB_SYNC_ALL) + io->io_bio->bi_opf |= REQ_SYNC; io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint; - bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); submit_bio(io->io_bio); } io->io_bio = NULL; @@ -398,10 +396,9 @@ static void io_submit_init_bio(struct ext4_io_submit *io, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). */ - bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); + bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; @@ -523,12 +520,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, ret = PTR_ERR(bounce_page); if (ret == -ENOMEM && (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { - gfp_flags = GFP_NOFS; + gfp_t new_gfp_flags = GFP_NOFS; if (io->io_bio) ext4_io_submit(io); else - gfp_flags |= __GFP_NOFAIL; - congestion_wait(BLK_RW_ASYNC, HZ/50); + new_gfp_flags |= __GFP_NOFAIL; + memalloc_retry_wait(gfp_flags); + gfp_flags = new_gfp_flags; goto retry_encrypt; } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 3db923403505..1aa26d6634fc 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -43,7 +43,6 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> -#include <linux/cleancache.h> #include "ext4.h" @@ -350,11 +349,6 @@ int ext4_mpage_readpages(struct inode *inode, } else if (fully_mapped) { SetPageMappedToDisk(page); } - if (fully_mapped && blocks_per_page == 1 && - !PageUptodate(page) && cleancache_get_page(page) == 0) { - SetPageUptodate(page); - goto confused; - } /* * This page will go to BIO. Do we need to send this @@ -371,15 +365,15 @@ int ext4_mpage_readpages(struct inode *inode, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset(). */ - bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages)); + bio = bio_alloc(bdev, bio_max_segs(nr_pages), + REQ_OP_READ, GFP_KERNEL); fscrypt_set_bio_crypt_ctx(bio, inode, next_block, GFP_KERNEL); ext4_set_bio_post_read_ctx(bio, inode, page->index); - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, - rac ? REQ_RAHEAD : 0); + if (rac) + bio->bi_opf |= REQ_RAHEAD; } length = first_hole << blkbits; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ee8f02f406cb..90a941d20dff 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -14,6 +14,7 @@ #include <linux/errno.h> #include <linux/slab.h> +#include <linux/jiffies.h> #include "ext4_jbd2.h" @@ -483,7 +484,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", first_cluster, first_cluster - start, count2); - ext4_set_bits(bh->b_data, first_cluster - start, count2); + mb_set_bits(bh->b_data, first_cluster - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); @@ -632,7 +633,7 @@ handle_bb: if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, + mb_set_bits(bh->b_data, 0, EXT4_NUM_B2C(sbi, overhead)); } ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), @@ -2100,7 +2101,7 @@ retry: */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { - if (jiffies - last_update_time > HZ * 10) { + if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time) ext4_msg(sb, KERN_INFO, "resized to %llu blocks", diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a936ecbaa3b..81749eaddf4c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -39,7 +39,6 @@ #include <linux/log2.h> #include <linux/crc16.h> #include <linux/dax.h> -#include <linux/cleancache.h> #include <linux/uaccess.h> #include <linux/iversion.h> #include <linux/unicode.h> @@ -1302,7 +1301,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif kfree(sbi); @@ -1317,7 +1316,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) { struct ext4_inode_info *ei; - ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); + ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; @@ -1962,33 +1961,26 @@ static const struct mount_opts { {Opt_err, 0, 0} }; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) static const struct ext4_sb_encodings { __u16 magic; char *name; - char *version; + unsigned int version; } ext4_sb_encoding_map[] = { - {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"}, + {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)}, }; -static int ext4_sb_read_encoding(const struct ext4_super_block *es, - const struct ext4_sb_encodings **encoding, - __u16 *flags) +static const struct ext4_sb_encodings * +ext4_sb_read_encoding(const struct ext4_super_block *es) { __u16 magic = le16_to_cpu(es->s_encoding); int i; for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++) if (magic == ext4_sb_encoding_map[i].magic) - break; - - if (i >= ARRAY_SIZE(ext4_sb_encoding_map)) - return -EINVAL; - - *encoding = &ext4_sb_encoding_map[i]; - *flags = le16_to_cpu(es->s_encoding_flags); + return &ext4_sb_encoding_map[i]; - return 0; + return NULL; } #endif @@ -2029,12 +2021,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) #define EXT4_SPEC_s_commit_interval (1 << 16) #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) #define EXT4_SPEC_s_sb_block (1 << 18) +#define EXT4_SPEC_mb_optimize_scan (1 << 19) struct ext4_fs_context { char *s_qf_names[EXT4_MAXQUOTAS]; char *test_dummy_enc_arg; int s_jquota_fmt; /* Format of quota to use */ - int mb_optimize_scan; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif @@ -2053,8 +2045,8 @@ struct ext4_fs_context { unsigned int mask_s_mount_opt; unsigned int vals_s_mount_opt2; unsigned int mask_s_mount_opt2; - unsigned int vals_s_mount_flags; - unsigned int mask_s_mount_flags; + unsigned long vals_s_mount_flags; + unsigned long mask_s_mount_flags; unsigned int opt_flags; /* MOPT flags */ unsigned int spec; u32 s_max_batch_time; @@ -2157,23 +2149,36 @@ static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name |= flag; \ -} \ +} + +#define EXT4_CLEAR_CTX(name) \ static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name &= ~flag; \ -} \ +} + +#define EXT4_TEST_CTX(name) \ static inline unsigned long \ ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ return (ctx->vals_s_##name & flag); \ -} \ +} -EXT4_SET_CTX(flags); +EXT4_SET_CTX(flags); /* set only */ EXT4_SET_CTX(mount_opt); +EXT4_CLEAR_CTX(mount_opt); +EXT4_TEST_CTX(mount_opt); EXT4_SET_CTX(mount_opt2); -EXT4_SET_CTX(mount_flags); +EXT4_CLEAR_CTX(mount_opt2); +EXT4_TEST_CTX(mount_opt2); + +static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) +{ + set_bit(bit, &ctx->mask_s_mount_flags); + set_bit(bit, &ctx->vals_s_mount_flags); +} static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) { @@ -2243,7 +2248,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) param->key); return 0; case Opt_abort: - ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); return 0; case Opt_i_version: ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); @@ -2459,12 +2464,17 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_clear_mount_opt(ctx, m->mount_opt); return 0; case Opt_mb_optimize_scan: - if (result.int_32 != 0 && result.int_32 != 1) { + if (result.int_32 == 1) { + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else if (result.int_32 == 0) { + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else { ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); return -EINVAL; } - ctx->mb_optimize_scan = result.int_32; return 0; } @@ -3156,8 +3166,6 @@ done: EXT4_BLOCKS_PER_GROUP(sb), EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt, sbi->s_mount_opt2); - - cleancache_init_fs(sb); return err; } @@ -3478,8 +3486,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) */ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) { - unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS; + loff_t upper_limit, res = EXT4_NDIR_BLOCKS; int meta_blocks; + unsigned int ppb = 1 << (bits - 2); /* * This is calculated to be the largest file size for a dense, block @@ -3511,27 +3520,42 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) } + /* Compute how many blocks we can address by block tree */ + res += ppb; + res += ppb * ppb; + res += ((loff_t)ppb) * ppb * ppb; + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; + /* Does block tree limit file size? */ + if (res + meta_blocks <= upper_limit) + goto check_lfs; + + res = upper_limit; + /* How many metadata blocks are needed for addressing upper_limit? */ + upper_limit -= EXT4_NDIR_BLOCKS; /* indirect blocks */ meta_blocks = 1; + upper_limit -= ppb; /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; - - res += 1LL << (bits-2); - res += 1LL << (2*(bits-2)); - res += 1LL << (3*(bits-2)); + if (upper_limit < ppb * ppb) { + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb); + res -= meta_blocks; + goto check_lfs; + } + meta_blocks += 1 + ppb; + upper_limit -= ppb * ppb; + /* tripple indirect blocks for the rest */ + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) + + DIV_ROUND_UP_ULL(upper_limit, ppb*ppb); + res -= meta_blocks; +check_lfs: res <<= bits; - if (res > upper_limit) - res = upper_limit; - if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; - return (loff_t)res; + return res; } static ext4_fsblk_t descriptor_loc(struct super_block *sb, @@ -3616,7 +3640,7 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_UNICODE +#if !IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb)) { ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " @@ -4338,7 +4362,7 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb) if (!sbi) return NULL; - sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev); + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off); sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); @@ -4379,7 +4403,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) /* Set defaults for the variables that will be set during parsing */ ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sectors_written_start = @@ -4620,14 +4643,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (err < 0) goto failed_mount; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; - __u16 encoding_flags; + __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags); - if (ext4_sb_read_encoding(es, &encoding_info, - &encoding_flags)) { + encoding_info = ext4_sb_read_encoding(es); + if (!encoding_info) { ext4_msg(sb, KERN_ERR, "Encoding requested by superblock is unknown"); goto failed_mount; @@ -4636,15 +4659,21 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) encoding = utf8_load(encoding_info->version); if (IS_ERR(encoding)) { ext4_msg(sb, KERN_ERR, - "can't mount with superblock charset: %s-%s " + "can't mount with superblock charset: %s-%u.%u.%u " "not supported by the kernel. flags: 0x%x.", - encoding_info->name, encoding_info->version, + encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), encoding_flags); goto failed_mount; } ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " - "%s-%s with flags 0x%hx", encoding_info->name, - encoding_info->version?:"\b", encoding_flags); + "%s-%u.%u.%u with flags 0x%hx", encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), + encoding_flags); sb->s_encoding = encoding; sb->s_encoding_flags = encoding_flags; @@ -4756,9 +4785,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount; } - if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) - set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + if (sbi->s_daxdev) { + if (blocksize == PAGE_SIZE) + set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + else + ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + } if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (ext4_has_feature_inline_data(sb)) { @@ -5083,7 +5115,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); sbi->s_fc_bytes = 0; ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); + sbi->s_fc_ineligible_tid = 0; spin_lock_init(&sbi->s_fc_lock); memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); sbi->s_fc_replay_state.fc_regions = NULL; @@ -5321,12 +5353,12 @@ no_journal: * turned off by passing "mb_optimize_scan=0". This can also be * turned on forcefully by passing "mb_optimize_scan=1". */ - if (ctx->mb_optimize_scan == 1) - set_opt2(sb, MB_OPTIMIZE_SCAN); - else if (ctx->mb_optimize_scan == 0) - clear_opt2(sb, MB_OPTIMIZE_SCAN); - else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) - set_opt2(sb, MB_OPTIMIZE_SCAN); + if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) { + if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) + set_opt2(sb, MB_OPTIMIZE_SCAN); + else + clear_opt2(sb, MB_OPTIMIZE_SCAN); + } err = ext4_mb_init(sb); if (err) { @@ -5515,7 +5547,7 @@ failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -5541,7 +5573,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) sbi = ext4_alloc_sbi(sb); if (!sbi) - ret = -ENOMEM; + return -ENOMEM; fc->s_fs_info = sbi; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index f61e65ae27d8..d233c24ea342 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -309,7 +309,7 @@ EXT4_ATTR_FEATURE(meta_bg_resize); EXT4_ATTR_FEATURE(encryption); EXT4_ATTR_FEATURE(test_dummy_encryption_v2); #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) EXT4_ATTR_FEATURE(casefold); #endif #ifdef CONFIG_FS_VERITY @@ -317,7 +317,7 @@ EXT4_ATTR_FEATURE(verity); #endif EXT4_ATTR_FEATURE(metadata_csum_seed); EXT4_ATTR_FEATURE(fast_commit); -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) EXT4_ATTR_FEATURE(encrypted_casefold); #endif @@ -329,7 +329,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(casefold), #endif #ifdef CONFIG_FS_VERITY @@ -337,7 +337,7 @@ static struct attribute *ext4_feat_attrs[] = { #endif ATTR_LIST(metadata_csum_seed), ATTR_LIST(fast_commit), -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) ATTR_LIST(encrypted_casefold), #endif NULL, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1e0fc1ed845b..042325349098 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2408,7 +2408,7 @@ retry_inode: if (IS_SYNC(inode)) ext4_handle_sync(handle); } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); cleanup: brelse(is.iloc.bh); @@ -2486,7 +2486,7 @@ retry: if (error == 0) error = error2; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL); return error; } @@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, error); goto cleanup; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); } error = 0; cleanup: |