aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/block_validity.c16
-rw-r--r--fs/ext4/dir.c83
-rw-r--r--fs/ext4/ext4.h168
-rw-r--r--fs/ext4/ext4_jbd2.c4
-rw-r--r--fs/ext4/ext4_jbd2.h9
-rw-r--r--fs/ext4/extents.c42
-rw-r--r--fs/ext4/fast_commit.c272
-rw-r--r--fs/ext4/fast_commit.h82
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsmap.c2
-rw-r--r--fs/ext4/fsync.c4
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/indirect.c4
-rw-r--r--fs/ext4/inline.c1
-rw-r--r--fs/ext4/inode-test.c320
-rw-r--r--fs/ext4/inode.c65
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/mballoc.c45
-rw-r--r--fs/ext4/namei.c107
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c513
-rw-r--r--fs/ext4/sysfs.c12
-rw-r--r--fs/ext4/xattr.c1
24 files changed, 877 insertions, 890 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1d640b145637..f45f9feebe59 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -185,7 +185,7 @@ static int ext4_init_block_bitmap(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start, tmp;
- J_ASSERT_BH(bh, buffer_locked(bh));
+ ASSERT(buffer_locked(bh));
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 8e6ca23ed172..4666b55b736e 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -176,12 +176,10 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
err = add_system_zone(system_blks, map.m_pblk, n, ino);
if (err < 0) {
if (err == -EFSCORRUPTED) {
- __ext4_error(sb, __func__, __LINE__,
- -err, map.m_pblk,
- "blocks %llu-%llu from inode %u overlap system zone",
- map.m_pblk,
- map.m_pblk + map.m_len - 1,
- ino);
+ EXT4_ERROR_INODE_ERR(inode, -err,
+ "blocks %llu-%llu from inode overlap system zone",
+ map.m_pblk,
+ map.m_pblk + map.m_len - 1);
}
break;
}
@@ -206,7 +204,7 @@ static void ext4_destroy_system_zone(struct rcu_head *rcu)
*
* The update of system_blks pointer in this function is protected by
* sb->s_umount semaphore. However we have to be careful as we can be
- * racing with ext4_data_block_valid() calls reading system_blks rbtree
+ * racing with ext4_inode_block_valid() calls reading system_blks rbtree
* protected only by RCU. That's why we first build the rbtree and then
* swap it in place.
*/
@@ -258,7 +256,7 @@ int ext4_setup_system_zone(struct super_block *sb)
/*
* System blks rbtree complete, announce it once to prevent racing
- * with ext4_data_block_valid() accessing the rbtree at the same
+ * with ext4_inode_block_valid() accessing the rbtree at the same
* time.
*/
rcu_assign_pointer(sbi->s_system_blks, system_blks);
@@ -278,7 +276,7 @@ err:
*
* The update of system_blks pointer in this function is protected by
* sb->s_umount semaphore. However we have to be careful as we can be
- * racing with ext4_data_block_valid() calls reading system_blks rbtree
+ * racing with ext4_inode_block_valid() calls reading system_blks rbtree
* protected only by RCU. So we first clear the system_blks pointer and
* then free the rbtree only after RCU grace period expires.
*/
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 5b81f3b080ee..5ed870614c8d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -118,11 +118,9 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct buffer_head *bh = NULL;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
- if (IS_ENCRYPTED(inode)) {
- err = fscrypt_get_encryption_info(inode);
- if (err)
- return err;
- }
+ err = fscrypt_prepare_readdir(inode);
+ if (err)
+ return err;
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx);
@@ -616,13 +614,6 @@ finished:
return 0;
}
-static int ext4_dir_open(struct inode * inode, struct file * filp)
-{
- if (IS_ENCRYPTED(inode))
- return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
- return 0;
-}
-
static int ext4_release_dir(struct inode *inode, struct file *filp)
{
if (filp->private_data)
@@ -664,73 +655,5 @@ const struct file_operations ext4_dir_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file,
- .open = ext4_dir_open,
.release = ext4_release_dir,
};
-
-#ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
- struct qstr qstr = {.name = str, .len = len };
- const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *inode = d_inode_rcu(parent);
- char strbuf[DNAME_INLINE_LEN];
-
- if (!inode || !IS_CASEFOLDED(inode) ||
- !EXT4_SB(inode->i_sb)->s_encoding) {
- if (len != name->len)
- return -1;
- return memcmp(str, name->name, len);
- }
-
- /*
- * If the dentry name is stored in-line, then it may be concurrently
- * modified by a rename. If this happens, the VFS will eventually retry
- * the lookup, so it doesn't matter what ->d_compare() returns.
- * However, it's unsafe to call utf8_strncasecmp() with an unstable
- * string. Therefore, we have to copy the name into a temporary buffer.
- */
- if (len <= DNAME_INLINE_LEN - 1) {
- memcpy(strbuf, str, len);
- strbuf[len] = 0;
- qstr.name = strbuf;
- /* prevent compiler from optimizing out the temporary buffer */
- barrier();
- }
-
- return ext4_ci_compare(inode, name, &qstr, false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
- const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
- const struct unicode_map *um = sbi->s_encoding;
- const struct inode *inode = d_inode_rcu(dentry);
- unsigned char *norm;
- int len, ret = 0;
-
- if (!inode || !IS_CASEFOLDED(inode) || !um)
- return 0;
-
- norm = kmalloc(PATH_MAX, GFP_ATOMIC);
- if (!norm)
- return -ENOMEM;
-
- len = utf8_casefold(um, str, norm, PATH_MAX);
- if (len < 0) {
- if (ext4_has_strict_mode(sbi))
- ret = -EINVAL;
- goto out;
- }
- str->hash = full_name_hash(dentry, norm, len);
-out:
- kfree(norm);
- return ret;
-}
-
-const struct dentry_operations ext4_dentry_ops = {
- .d_hash = ext4_d_hash,
- .d_compare = ext4_d_compare,
-};
-#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 254d1c26bea8..2866d249f3d2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -98,6 +98,16 @@
#define ext_debug(ino, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
+#define ASSERT(assert) \
+do { \
+ if (unlikely(!(assert))) { \
+ printk(KERN_EMERG \
+ "Assertion failure in %s() at %s:%d: '%s'\n", \
+ __func__, __FILE__, __LINE__, #assert); \
+ BUG(); \
+ } \
+} while (0)
+
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@@ -1028,9 +1038,6 @@ struct ext4_inode_info {
* protected by sbi->s_fc_lock.
*/
- /* Fast commit subtid when this inode was committed */
- unsigned int i_fc_committed_subtid;
-
/* Start of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_start;
@@ -1166,10 +1173,6 @@ struct ext4_inode_info {
#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
#define EXT4_ERROR_FS 0x0002 /* Errors detected */
#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
-#define EXT4_FC_INELIGIBLE 0x0008 /* Fast commit ineligible */
-#define EXT4_FC_COMMITTING 0x0010 /* File system underoing a fast
- * commit.
- */
#define EXT4_FC_REPLAY 0x0020 /* Fast commit replay ongoing */
/*
@@ -1238,13 +1241,13 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
-#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
-#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */
-
#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
#define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */
+#define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */
+#define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */
+
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt
@@ -1426,12 +1429,6 @@ struct ext4_super_block {
#ifdef __KERNEL__
-/*
- * run-time mount flags
- */
-#define EXT4_MF_MNTDIR_SAMPLED 0x0001
-#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
-
#ifdef CONFIG_FS_ENCRYPTION
#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL)
#else
@@ -1444,14 +1441,6 @@ struct ext4_super_block {
#define EXT4_ENC_UTF8_12_1 1
/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL (1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
- (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
-/*
* fourth extended-fs super-block data in memory
*/
struct ext4_sb_info {
@@ -1474,7 +1463,7 @@ struct ext4_sb_info {
struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
- unsigned int s_mount_flags;
+ unsigned long s_mount_flags;
unsigned int s_def_mount_opt;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
@@ -1500,10 +1489,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
- struct unicode_map *s_encoding;
- __u16 s_encoding_flags;
-#endif
/* Journaling */
struct journal_s *s_journal;
@@ -1644,6 +1629,27 @@ struct ext4_sb_info {
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;
+ /* Information about errors that happened during this mount */
+ spinlock_t s_error_lock;
+ int s_add_error_count;
+ int s_first_error_code;
+ __u32 s_first_error_line;
+ __u32 s_first_error_ino;
+ __u64 s_first_error_block;
+ const char *s_first_error_func;
+ time64_t s_first_error_time;
+ int s_last_error_code;
+ __u32 s_last_error_line;
+ __u32 s_last_error_ino;
+ __u64 s_last_error_block;
+ const char *s_last_error_func;
+ time64_t s_last_error_time;
+ /*
+ * If we are in a context where we cannot update error information in
+ * the on-disk superblock, we queue this work to do it.
+ */
+ struct work_struct s_error_work;
+
/* Ext4 fast commit stuff */
atomic_t s_fc_subtid;
atomic_t s_fc_ineligible_updates;
@@ -1707,6 +1713,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
})
/*
+ * run-time mount flags
+ */
+enum {
+ EXT4_MF_MNTDIR_SAMPLED,
+ EXT4_MF_FS_ABORTED, /* Fatal error detected */
+ EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
+ EXT4_MF_FC_COMMITTING /* File system underoing a fast
+ * commit.
+ */
+};
+
+static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
+{
+ set_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline void ext4_clear_mount_flag(struct super_block *sb, int bit)
+{
+ clear_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline int ext4_test_mount_flag(struct super_block *sb, int bit)
+{
+ return test_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+
+/*
* Simulate_fail codes
*/
#define EXT4_SIM_BBITMAP_EIO 1
@@ -1855,7 +1889,6 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
-#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
#define EXT4_GOOD_OLD_INODE_SIZE 128
@@ -1875,6 +1908,13 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200
+/*
+ * The reason why "FAST_COMMIT" is a compat feature is that, FS becomes
+ * incompatible only if fast commit blocks are present in the FS. Since we
+ * clear the journal (and thus the fast commit blocks), we don't mark FS as
+ * incompatible. We also have a JBD2 incompat feature, which gets set when
+ * there are fast commit blocks present in the journal.
+ */
#define EXT4_FEATURE_COMPAT_FAST_COMMIT 0x0400
#define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800
@@ -2685,7 +2725,8 @@ void ext4_insert_dentry(struct inode *inode,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
- if (!ext4_has_feature_dir_index(inode->i_sb)) {
+ if (!ext4_has_feature_dir_index(inode->i_sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
/* ext4_iget() should have caught this... */
WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
@@ -2743,12 +2784,16 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
int ext4_fc_info_show(struct seq_file *seq, void *v);
void ext4_fc_init(struct super_block *sb, journal_t *journal);
void ext4_fc_init_inode(struct inode *inode);
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end);
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_inode(struct inode *inode);
+void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
+ struct dentry *dentry);
+void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
+ struct dentry *dentry);
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
void ext4_fc_start_ineligible(struct super_block *sb, int reason);
void ext4_fc_stop_ineligible(struct super_block *sb);
@@ -2937,9 +2982,9 @@ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group,
unsigned int flags);
-extern __printf(6, 7)
-void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64,
- const char *, ...);
+extern __printf(7, 8)
+void __ext4_error(struct super_block *, const char *, unsigned int, bool,
+ int, __u64, const char *, ...);
extern __printf(6, 7)
void __ext4_error_inode(struct inode *, const char *, unsigned int,
ext4_fsblk_t, int, const char *, ...);
@@ -2948,9 +2993,6 @@ void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
const char *, ...);
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
-extern __printf(5, 6)
-void __ext4_abort(struct super_block *, const char *, unsigned int, int,
- const char *, ...);
extern __printf(4, 5)
void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...);
@@ -2980,6 +3022,9 @@ void __ext4_grp_locked_error(const char *, unsigned int,
#define EXT4_ERROR_FILE(file, block, fmt, a...) \
ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
+#define ext4_abort(sb, err, fmt, a...) \
+ __ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a)
+
#ifdef CONFIG_PRINTK
#define ext4_error_inode(inode, func, line, block, fmt, ...) \
@@ -2990,11 +3035,11 @@ void __ext4_grp_locked_error(const char *, unsigned int,
#define ext4_error_file(file, func, line, block, fmt, ...) \
__ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__)
#define ext4_error(sb, fmt, ...) \
- __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__)
+ __ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \
+ ##__VA_ARGS__)
#define ext4_error_err(sb, err, fmt, ...) \
- __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__)
-#define ext4_abort(sb, err, fmt, ...) \
- __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__)
+ __ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \
+ ##__VA_ARGS__)
#define ext4_warning(sb, fmt, ...) \
__ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define ext4_warning_inode(inode, fmt, ...) \
@@ -3027,17 +3072,12 @@ do { \
#define ext4_error(sb, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
- __ext4_error(sb, "", 0, 0, 0, " "); \
+ __ext4_error(sb, "", 0, false, 0, 0, " "); \
} while (0)
#define ext4_error_err(sb, err, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
- __ext4_error(sb, "", 0, err, 0, " "); \
-} while (0)
-#define ext4_abort(sb, err, fmt, ...) \
-do { \
- no_printk(fmt, ##__VA_ARGS__); \
- __ext4_abort(sb, "", 0, err, " "); \
+ __ext4_error(sb, "", 0, false, err, 0, " "); \
} while (0)
#define ext4_warning(sb, fmt, ...) \
do { \
@@ -3346,6 +3386,21 @@ static inline void ext4_unlock_group(struct super_block *sb,
spin_unlock(ext4_group_lock_ptr(sb, group));
}
+#ifdef CONFIG_QUOTA
+static inline bool ext4_quota_capable(struct super_block *sb)
+{
+ return (test_opt(sb, QUOTA) || ext4_has_feature_quota(sb));
+}
+
+static inline bool ext4_is_quota_journalled(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ return (ext4_has_feature_quota(sb) ||
+ sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]);
+}
+#endif
+
/*
* Block validity checking
*/
@@ -3366,10 +3421,6 @@ static inline void ext4_unlock_group(struct super_block *sb,
/* dir.c */
extern const struct file_operations ext4_dir_operations;
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations ext4_dentry_ops;
-#endif
-
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
@@ -3464,7 +3515,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
extern int ext4_ci_compare(const struct inode *parent,
const struct qstr *fname,
const struct qstr *entry, bool quick);
-extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
struct inode *inode);
extern int __ext4_link(struct inode *dir, struct inode *inode,
struct dentry *dentry);
@@ -3598,7 +3649,6 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
- struct writeback_control *wbc,
bool keep_towrite);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0fd0c42a4f7d..1a0a827a7f34 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -296,8 +296,8 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
if (err) {
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
- __ext4_abort(inode->i_sb, where, line, -err,
- "error %d when attempting revoke", err);
+ __ext4_error(inode->i_sb, where, line, true, -err, 0,
+ "error %d when attempting revoke", err);
}
BUFFER_TRACE(bh, "exit");
return err;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 00dc668e052b..a124c68b0c75 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -86,17 +86,14 @@
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
* allocated so we need to update only data block */
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
- ext4_has_feature_quota(sb)) ? 1 : 0)
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((ext4_quota_capable(sb)) ? 1 : 0)
/* Amount of blocks needed for quota insert/delete - we do some block writes
* but inode, sb and group updates are done only once */
-#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
- ext4_has_feature_quota(sb)) ?\
+#define EXT4_QUOTA_INIT_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\
(DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+3+DQUOT_INIT_REWRITE) : 0)
-#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
- ext4_has_feature_quota(sb)) ?\
+#define EXT4_QUOTA_DEL_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\
(DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+3+DQUOT_DEL_REWRITE) : 0)
#else
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 559100f3e23c..3960b7ec3ab7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1471,16 +1471,16 @@ static int ext4_ext_search_left(struct inode *inode,
}
/*
- * search the closest allocated block to the right for *logical
- * and returns it at @logical + it's physical address at @phys
- * if *logical is the largest allocated block, the function
- * returns 0 at @phys
- * return value contains 0 (success) or error code
+ * Search the closest allocated block to the right for *logical
+ * and returns it at @logical + it's physical address at @phys.
+ * If not exists, return 0 and @phys is set to 0. We will return
+ * 1 which means we found an allocated block and ret_ex is valid.
+ * Or return a (< 0) error code.
*/
static int ext4_ext_search_right(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys,
- struct ext4_extent **ret_ex)
+ struct ext4_extent *ret_ex)
{
struct buffer_head *bh = NULL;
struct ext4_extent_header *eh;
@@ -1574,10 +1574,11 @@ got_index:
found_extent:
*logical = le32_to_cpu(ex->ee_block);
*phys = ext4_ext_pblock(ex);
- *ret_ex = ex;
+ if (ret_ex)
+ *ret_ex = *ex;
if (bh)
put_bh(bh);
- return 0;
+ return 1;
}
/*
@@ -2868,8 +2869,8 @@ again:
*/
lblk = ex_end + 1;
err = ext4_ext_search_right(inode, path, &lblk, &pblk,
- &ex);
- if (err)
+ NULL);
+ if (err < 0)
goto out;
if (pblk) {
partial.pclu = EXT4_B2C(sbi, pblk);
@@ -3723,7 +3724,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
ext4_ext_show_leaf(inode, path);
- ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return err;
}
@@ -3795,7 +3795,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
if (*allocated > map->m_len)
*allocated = map->m_len;
map->m_len = *allocated;
- ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return 0;
}
@@ -4039,7 +4038,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags)
{
struct ext4_ext_path *path = NULL;
- struct ext4_extent newex, *ex, *ex2;
+ struct ext4_extent newex, *ex, ex2;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_fsblk_t newblock = 0, pblk;
int err = 0, depth, ret;
@@ -4175,15 +4174,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (err)
goto out;
ar.lright = map->m_lblk;
- ex2 = NULL;
err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
- if (err)
+ if (err < 0)
goto out;
/* Check if the extent after searching to the right implies a
* cluster we can use. */
- if ((sbi->s_cluster_ratio > 1) && ex2 &&
- get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
+ if ((sbi->s_cluster_ratio > 1) && err &&
+ get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
ar.len = allocated = map->m_len;
newblock = map->m_pblk;
goto got_allocated_blocks;
@@ -4329,7 +4327,6 @@ got_allocated_blocks:
map->m_len = ar.len;
allocated = map->m_len;
ext4_ext_show_leaf(inode, path);
- ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
out:
ext4_ext_drop_refs(path);
kfree(path);
@@ -4602,7 +4599,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret))
goto out_handle;
- ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
+ ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
@@ -4651,8 +4648,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
- ext4_fc_track_range(inode, offset >> blkbits,
- (offset + len - 1) >> blkbits);
ext4_fc_start_update(inode);
@@ -5820,8 +5815,8 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
int ret;
path = ext4_find_extent(inode, start, NULL, 0);
- if (!path)
- return -EINVAL;
+ if (IS_ERR(path))
+ return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
if (!ex) {
ret = -EFSCORRUPTED;
@@ -5993,7 +5988,6 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
kfree(path);
break;
}
- ex = path2[path2->p_depth].p_ext;
for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
cmp1 = cmp2 = 0;
if (i <= path->p_depth)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 447c8d93f480..4fcc21c25e79 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -83,7 +83,7 @@
*
* Atomicity of commits
* --------------------
- * In order to gaurantee atomicity during the commit operation, fast commit
+ * In order to guarantee atomicity during the commit operation, fast commit
* uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
* tag contains CRC of the contents and TID of the transaction after which
* this fast commit should be applied. Recovery code replays fast commit
@@ -103,8 +103,69 @@
*
* Replay code should thus check for all the valid tails in the FC area.
*
+ * Fast Commit Replay Idempotence
+ * ------------------------------
+ *
+ * Fast commits tags are idempotent in nature provided the recovery code follows
+ * certain rules. The guiding principle that the commit path follows while
+ * committing is that it stores the result of a particular operation instead of
+ * storing the procedure.
+ *
+ * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
+ * was associated with inode 10. During fast commit, instead of storing this
+ * operation as a procedure "rename a to b", we store the resulting file system
+ * state as a "series" of outcomes:
+ *
+ * - Link dirent b to inode 10
+ * - Unlink dirent a
+ * - Inode <10> with valid refcount
+ *
+ * Now when recovery code runs, it needs "enforce" this state on the file
+ * system. This is what guarantees idempotence of fast commit replay.
+ *
+ * Let's take an example of a procedure that is not idempotent and see how fast
+ * commits make it idempotent. Consider following sequence of operations:
+ *
+ * rm A; mv B A; read A
+ * (x) (y) (z)
+ *
+ * (x), (y) and (z) are the points at which we can crash. If we store this
+ * sequence of operations as is then the replay is not idempotent. Let's say
+ * while in replay, we crash at (z). During the second replay, file A (which was
+ * actually created as a result of "mv B A" operation) would get deleted. Thus,
+ * file named A would be absent when we try to read A. So, this sequence of
+ * operations is not idempotent. However, as mentioned above, instead of storing
+ * the procedure fast commits store the outcome of each procedure. Thus the fast
+ * commit log for above procedure would be as follows:
+ *
+ * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
+ * inode 11 before the replay)
+ *
+ * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11]
+ * (w) (x) (y) (z)
+ *
+ * If we crash at (z), we will have file A linked to inode 11. During the second
+ * replay, we will remove file A (inode 11). But we will create it back and make
+ * it point to inode 11. We won't find B, so we'll just skip that step. At this
+ * point, the refcount for inode 11 is not reliable, but that gets fixed by the
+ * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
+ * similarly. Thus, by converting a non-idempotent procedure into a series of
+ * idempotent outcomes, fast commits ensured idempotence during the replay.
+ *
* TODOs
* -----
+ *
+ * 0) Fast commit replay path hardening: Fast commit replay code should use
+ * journal handles to make sure all the updates it does during the replay
+ * path are atomic. With that if we crash during fast commit replay, after
+ * trying to do recovery again, we will find a file system where fast commit
+ * area is invalid (because new full commit would be found). In order to deal
+ * with that, fast commit replay code should ensure that the "FC_REPLAY"
+ * superblock state is persisted before starting the replay, so that after
+ * the crash, fast commit recovery code can look at that flag and perform
+ * fast commit recovery even if that area is invalidated by later full
+ * commits.
+ *
* 1) Make fast commit atomic updates more fine grained. Today, a fast commit
* eligible update must be protected within ext4_fc_start_update() and
* ext4_fc_stop_update(). These routines are called at much higher
@@ -152,7 +213,31 @@ void ext4_fc_init_inode(struct inode *inode)
INIT_LIST_HEAD(&ei->i_fc_list);
init_waitqueue_head(&ei->i_fc_wait);
atomic_set(&ei->i_fc_updates, 0);
- ei->i_fc_committed_subtid = 0;
+}
+
+/* This function must be called with sbi->s_fc_lock held. */
+static void ext4_fc_wait_committing_inode(struct inode *inode)
+__releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
+{
+ wait_queue_head_t *wq;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+#if (BITS_PER_LONG < 64)
+ DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
+ EXT4_STATE_FC_COMMITTING);
+ wq = bit_waitqueue(&ei->i_state_flags,
+ EXT4_STATE_FC_COMMITTING);
+#else
+ DEFINE_WAIT_BIT(wait, &ei->i_flags,
+ EXT4_STATE_FC_COMMITTING);
+ wq = bit_waitqueue(&ei->i_flags,
+ EXT4_STATE_FC_COMMITTING);
+#endif
+ lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
+ schedule();
+ finish_wait(wq, &wait.wq_entry);
}
/*
@@ -176,22 +261,7 @@ restart:
goto out;
if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
- wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
- DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
-#else
- DEFINE_WAIT_BIT(wait, &ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
-#endif
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
- schedule();
- finish_wait(wq, &wait.wq_entry);
+ ext4_fc_wait_committing_inode(inode);
goto restart;
}
out:
@@ -234,26 +304,10 @@ restart:
}
if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
- wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
- DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
-#else
- DEFINE_WAIT_BIT(wait, &ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
-#endif
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
- schedule();
- finish_wait(wq, &wait.wq_entry);
+ ext4_fc_wait_committing_inode(inode);
goto restart;
}
- if (!list_empty(&ei->i_fc_list))
- list_del_init(&ei->i_fc_list);
+ list_del_init(&ei->i_fc_list);
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
}
@@ -269,7 +323,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
- sbi->s_mount_state |= EXT4_FC_INELIGIBLE;
+ ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -292,7 +346,7 @@ void ext4_fc_start_ineligible(struct super_block *sb, int reason)
}
/*
- * Stop a fast commit ineligible update. We set EXT4_FC_INELIGIBLE flag here
+ * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
* to ensure that after stopping the ineligible update, at least one full
* commit takes place.
*/
@@ -302,14 +356,14 @@ void ext4_fc_stop_ineligible(struct super_block *sb)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
- EXT4_SB(sb)->s_mount_state |= EXT4_FC_INELIGIBLE;
+ ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
}
static inline int ext4_fc_is_ineligible(struct super_block *sb)
{
- return (EXT4_SB(sb)->s_mount_state & EXT4_FC_INELIGIBLE) ||
- atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
+ return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
+ atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
}
/*
@@ -323,13 +377,14 @@ static inline int ext4_fc_is_ineligible(struct super_block *sb)
* If enqueue is set, this function enqueues the inode in fast commit list.
*/
static int ext4_fc_track_template(
- struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool),
+ handle_t *handle, struct inode *inode,
+ int (*__fc_track_fn)(struct inode *, void *, bool),
void *args, int enqueue)
{
- tid_t running_txn_tid;
bool update = false;
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ tid_t tid = 0;
int ret;
if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
@@ -339,15 +394,13 @@ static int ext4_fc_track_template(
if (ext4_fc_is_ineligible(inode->i_sb))
return -EINVAL;
- running_txn_tid = sbi->s_journal ?
- sbi->s_journal->j_commit_sequence + 1 : 0;
-
+ tid = handle->h_transaction->t_tid;
mutex_lock(&ei->i_fc_lock);
- if (running_txn_tid == ei->i_sync_tid) {
+ if (tid == ei->i_sync_tid) {
update = true;
} else {
ext4_fc_reset_inode(inode);
- ei->i_sync_tid = running_txn_tid;
+ ei->i_sync_tid = tid;
}
ret = __fc_track_fn(inode, args, update);
mutex_unlock(&ei->i_fc_lock);
@@ -358,7 +411,7 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
- (sbi->s_mount_state & EXT4_FC_COMMITTING) ?
+ (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock);
@@ -384,7 +437,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -397,7 +450,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_MEM);
+ EXT4_FC_REASON_NOMEM);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -411,7 +464,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.len = dentry->d_name.len;
spin_lock(&sbi->s_fc_lock);
- if (sbi->s_mount_state & EXT4_FC_COMMITTING)
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
else
@@ -422,7 +475,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
return 0;
}
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
+void __ext4_fc_track_unlink(handle_t *handle,
+ struct inode *inode, struct dentry *dentry)
{
struct __track_dentry_update_args args;
int ret;
@@ -430,12 +484,18 @@ void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
args.dentry = dentry;
args.op = EXT4_FC_TAG_UNLINK;
- ret = ext4_fc_track_template(inode, __track_dentry_update,
+ ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
trace_ext4_fc_track_unlink(inode, dentry, ret);
}
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
+{
+ __ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
+}
+
+void __ext4_fc_track_link(handle_t *handle,
+ struct inode *inode, struct dentry *dentry)
{
struct __track_dentry_update_args args;
int ret;
@@ -443,20 +503,26 @@ void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
args.dentry = dentry;
args.op = EXT4_FC_TAG_LINK;
- ret = ext4_fc_track_template(inode, __track_dentry_update,
+ ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
trace_ext4_fc_track_link(inode, dentry, ret);
}
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
+{
+ __ext4_fc_track_link(handle, d_inode(dentry), dentry);
+}
+
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
{
struct __track_dentry_update_args args;
+ struct inode *inode = d_inode(dentry);
int ret;
args.dentry = dentry;
args.op = EXT4_FC_TAG_CREAT;
- ret = ext4_fc_track_template(inode, __track_dentry_update,
+ ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
trace_ext4_fc_track_create(inode, dentry, ret);
}
@@ -472,14 +538,20 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
return 0;
}
-void ext4_fc_track_inode(struct inode *inode)
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
{
int ret;
if (S_ISDIR(inode->i_mode))
return;
- ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
+ if (ext4_should_journal_data(inode)) {
+ ext4_fc_mark_ineligible(inode->i_sb,
+ EXT4_FC_REASON_INODE_JOURNAL_DATA);
+ return;
+ }
+
+ ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
trace_ext4_fc_track_inode(inode, ret);
}
@@ -515,7 +587,7 @@ static int __track_range(struct inode *inode, void *arg, bool update)
return 0;
}
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end)
{
struct __track_range_args args;
@@ -527,7 +599,7 @@ void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
args.start = start;
args.end = end;
- ret = ext4_fc_track_template(inode, __track_range, &args, 1);
+ ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1);
trace_ext4_fc_track_range(inode, start, end, ret);
}
@@ -537,10 +609,11 @@ static void ext4_fc_submit_bh(struct super_block *sb)
int write_flags = REQ_SYNC;
struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
+ /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
if (test_opt(sb, BARRIER))
write_flags |= REQ_FUA | REQ_PREFLUSH;
lock_buffer(bh);
- clear_buffer_dirty(bh);
+ set_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = ext4_end_buffer_io_sync;
submit_bh(REQ_OP_WRITE, write_flags, bh);
@@ -846,7 +919,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
int ret = 0;
spin_lock(&sbi->s_fc_lock);
- sbi->s_mount_state |= EXT4_FC_COMMITTING;
+ ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
@@ -900,6 +973,8 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal)
/* Commit all the directory entry updates */
static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
+__acquires(&sbi->s_fc_lock)
+__releases(&sbi->s_fc_lock)
{
struct super_block *sb = (struct super_block *)(journal->j_private);
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -964,7 +1039,6 @@ static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
fc_dentry->fcd_parent, fc_dentry->fcd_ino,
fc_dentry->fcd_name.len,
fc_dentry->fcd_name.name, crc)) {
- spin_lock(&sbi->s_fc_lock);
ret = -ENOSPC;
goto lock_and_exit;
}
@@ -997,6 +1071,13 @@ static int ext4_fc_perform_commit(journal_t *journal)
if (ret)
return ret;
+ /*
+ * If file system device is different from journal device, issue a cache
+ * flush before we start writing fast commit blocks.
+ */
+ if (journal->j_fs_dev != journal->j_dev)
+ blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
+
blk_start_plug(&plug);
if (sbi->s_fc_bytes == 0) {
/*
@@ -1032,8 +1113,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
if (ret)
goto out;
spin_lock(&sbi->s_fc_lock);
- EXT4_I(inode)->i_fc_committed_subtid =
- atomic_read(&sbi->s_fc_subtid);
}
spin_unlock(&sbi->s_fc_lock);
@@ -1132,7 +1211,7 @@ out:
"Fast commit ended with blks = %d, reason = %d, subtid - %d",
nblks, reason, subtid);
if (reason == EXT4_FC_REASON_FC_FAILED)
- return jbd2_fc_end_commit_fallback(journal, commit_tid);
+ return jbd2_fc_end_commit_fallback(journal);
if (reason == EXT4_FC_REASON_FC_START_FAILED ||
reason == EXT4_FC_REASON_INELIGIBLE)
return jbd2_complete_transaction(journal, commit_tid);
@@ -1191,8 +1270,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
&sbi->s_fc_q[FC_Q_STAGING]);
- sbi->s_mount_state &= ~EXT4_FC_COMMITTING;
- sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (full)
sbi->s_fc_bytes = 0;
@@ -1202,18 +1281,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
/* Ext4 Replay Path Routines */
-/* Get length of a particular tlv */
-static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
-{
- return le16_to_cpu(tl->fc_len);
-}
-
-/* Get a pointer to "value" of a tlv */
-static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
-{
- return (u8 *)tl + sizeof(*tl);
-}
-
/* Helper struct for dentry replay routines */
struct dentry_info_args {
int parent_ino, dname_len, ino, inode_len;
@@ -1264,7 +1331,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
return 0;
}
- ret = __ext4_unlink(old_parent, &entry, inode);
+ ret = __ext4_unlink(NULL, old_parent, &entry, inode);
/* -ENOENT ok coz it might not exist anymore. */
if (ret == -ENOENT)
ret = 0;
@@ -1617,8 +1684,10 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
if (ret == 0) {
/* Range is not mapped */
path = ext4_find_extent(inode, cur, NULL, 0);
- if (!path)
- continue;
+ if (IS_ERR(path)) {
+ iput(inode);
+ return 0;
+ }
memset(&newex, 0, sizeof(newex));
newex.ee_block = cpu_to_le32(cur);
ext4_ext_store_pblock(
@@ -1750,32 +1819,6 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
return 0;
}
-static inline const char *tag2str(u16 tag)
-{
- switch (tag) {
- case EXT4_FC_TAG_LINK:
- return "TAG_ADD_ENTRY";
- case EXT4_FC_TAG_UNLINK:
- return "TAG_DEL_ENTRY";
- case EXT4_FC_TAG_ADD_RANGE:
- return "TAG_ADD_RANGE";
- case EXT4_FC_TAG_CREAT:
- return "TAG_CREAT_DENTRY";
- case EXT4_FC_TAG_DEL_RANGE:
- return "TAG_DEL_RANGE";
- case EXT4_FC_TAG_INODE:
- return "TAG_INODE";
- case EXT4_FC_TAG_PAD:
- return "TAG_PAD";
- case EXT4_FC_TAG_TAIL:
- return "TAG_TAIL";
- case EXT4_FC_TAG_HEAD:
- return "TAG_HEAD";
- default:
- return "TAG_ERROR";
- }
-}
-
static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
{
struct ext4_fc_replay_state *state;
@@ -2087,13 +2130,9 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
return;
journal->j_fc_cleanup_callback = ext4_fc_cleanup;
- if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) {
- pr_warn("Error while enabling fast commits, turning off.");
- ext4_clear_feature_fast_commit(sb);
- }
}
-const char *fc_ineligible_reasons[] = {
+static const char *fc_ineligible_reasons[] = {
"Extended attributes changed",
"Cross rename",
"Journal flag changed",
@@ -2102,6 +2141,7 @@ const char *fc_ineligible_reasons[] = {
"Resize",
"Dir renamed",
"Falloc range op",
+ "Data journalling",
"FC Commit Failed"
};
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 06907d485989..b77f70f55a62 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -3,8 +3,10 @@
#ifndef __FAST_COMMIT_H__
#define __FAST_COMMIT_H__
-/* Number of blocks in journal area to allocate for fast commits */
-#define EXT4_NUM_FC_BLKS 256
+/*
+ * Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and
+ * linux/fs/ext4/fast_commit.h. These file should always be byte identical.
+ */
/* Fast commit tags */
#define EXT4_FC_TAG_ADD_RANGE 0x0001
@@ -53,7 +55,7 @@ struct ext4_fc_del_range {
struct ext4_fc_dentry_info {
__le32 fc_parent_ino;
__le32 fc_ino;
- u8 fc_dname[0];
+ __u8 fc_dname[0];
};
/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */
@@ -69,19 +71,6 @@ struct ext4_fc_tail {
};
/*
- * In memory list of dentry updates that are performed on the file
- * system used by fast commit code.
- */
-struct ext4_fc_dentry_update {
- int fcd_op; /* Type of update create / unlink / link */
- int fcd_parent; /* Parent inode number */
- int fcd_ino; /* Inode number */
- struct qstr fcd_name; /* Dirent name */
- unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
- struct list_head fcd_list;
-};
-
-/*
* Fast commit reason codes
*/
enum {
@@ -100,15 +89,30 @@ enum {
EXT4_FC_REASON_XATTR = 0,
EXT4_FC_REASON_CROSS_RENAME,
EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
- EXT4_FC_REASON_MEM,
+ EXT4_FC_REASON_NOMEM,
EXT4_FC_REASON_SWAP_BOOT,
EXT4_FC_REASON_RESIZE,
EXT4_FC_REASON_RENAME_DIR,
EXT4_FC_REASON_FALLOC_RANGE,
+ EXT4_FC_REASON_INODE_JOURNAL_DATA,
EXT4_FC_COMMIT_FAILED,
EXT4_FC_REASON_MAX
};
+#ifdef __KERNEL__
+/*
+ * In memory list of dentry updates that are performed on the file
+ * system used by fast commit code.
+ */
+struct ext4_fc_dentry_update {
+ int fcd_op; /* Type of update create / unlink / link */
+ int fcd_parent; /* Parent inode number */
+ int fcd_ino; /* Inode number */
+ struct qstr fcd_name; /* Dirent name */
+ unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
+ struct list_head fcd_list;
+};
+
struct ext4_fc_stats {
unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
unsigned long fc_num_commits;
@@ -147,13 +151,51 @@ struct ext4_fc_replay_state {
};
#define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1)
+#endif
#define fc_for_each_tl(__start, __end, __tl) \
- for (tl = (struct ext4_fc_tl *)start; \
- (u8 *)tl < (u8 *)end; \
- tl = (struct ext4_fc_tl *)((u8 *)tl + \
+ for (tl = (struct ext4_fc_tl *)(__start); \
+ (__u8 *)tl < (__u8 *)(__end); \
+ tl = (struct ext4_fc_tl *)((__u8 *)tl + \
sizeof(struct ext4_fc_tl) + \
+ le16_to_cpu(tl->fc_len)))
+static inline const char *tag2str(__u16 tag)
+{
+ switch (tag) {
+ case EXT4_FC_TAG_LINK:
+ return "ADD_ENTRY";
+ case EXT4_FC_TAG_UNLINK:
+ return "DEL_ENTRY";
+ case EXT4_FC_TAG_ADD_RANGE:
+ return "ADD_RANGE";
+ case EXT4_FC_TAG_CREAT:
+ return "CREAT_DENTRY";
+ case EXT4_FC_TAG_DEL_RANGE:
+ return "DEL_RANGE";
+ case EXT4_FC_TAG_INODE:
+ return "INODE";
+ case EXT4_FC_TAG_PAD:
+ return "PAD";
+ case EXT4_FC_TAG_TAIL:
+ return "TAIL";
+ case EXT4_FC_TAG_HEAD:
+ return "HEAD";
+ default:
+ return "ERROR";
+ }
+}
+
+/* Get length of a particular tlv */
+static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
+{
+ return le16_to_cpu(tl->fc_len);
+}
+
+/* Get a pointer to "value" of a tlv */
+static inline __u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
+{
+ return (__u8 *)tl + sizeof(*tl);
+}
#endif /* __FAST_COMMIT_H__ */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d85412d12e3a..3ed8c048fb12 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -761,7 +761,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (!daxdev_mapping_supported(vma, dax_dev))
return -EOPNOTSUPP;
- ext4_fc_start_update(inode);
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
@@ -769,7 +768,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
} else {
vma->vm_ops = &ext4_file_vm_ops;
}
- ext4_fc_stop_update(inode);
return 0;
}
@@ -782,13 +780,13 @@ static int ext4_sample_last_mounted(struct super_block *sb,
handle_t *handle;
int err;
- if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
+ if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
return 0;
if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
return 0;
- sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
+ ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
/*
* Sample where the filesystem has been mounted and
* store it in the superblock for sysadmin convenience
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index b232c2767534..4c2a9fe30067 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys,
/* Fabricate an rmap entry for the external log device. */
irec.fmr_physical = journal->j_blk_offset;
- irec.fmr_length = journal->j_maxlen;
+ irec.fmr_length = journal->j_total_len;
irec.fmr_owner = EXT4_FMR_OWN_LOG;
irec.fmr_flags = 0;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 81a545fd14a3..113bfb023a4a 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -136,14 +136,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (unlikely(ext4_forced_shutdown(sbi)))
return -EIO;
- J_ASSERT(ext4_journal_current_handle() == NULL);
+ ASSERT(ext4_journal_current_handle() == NULL);
trace_ext4_sync_file_enter(file, datasync);
if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */
smp_rmb();
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
ret = -EROFS;
goto out;
}
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 2924261226e0..a92eb79de0cc 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo)
{
#ifdef CONFIG_UNICODE
- const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+ const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 05efa682bc2f..1223a18c3ff9 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -534,8 +534,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t first_block = 0;
trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
- J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
- J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
+ ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
+ ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, map->m_lblk, offsets,
&blocks_to_boundary);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index caa51473207d..b41512d1badc 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1880,6 +1880,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
+ ext4_write_unlock_xattr(inode, &no_expand);
*has_inline = 0;
ext4_journal_stop(handle);
return 0;
diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
index d62d802c9c12..7935ea6cf92c 100644
--- a/fs/ext4/inode-test.c
+++ b/fs/ext4/inode-test.c
@@ -80,6 +80,145 @@ struct timestamp_expectation {
bool lower_bound;
};
+static const struct timestamp_expectation test_data[] = {
+ {
+ .test_case_name = LOWER_BOUND_NEG_NO_EXTRA_BITS_CASE,
+ .msb_set = true,
+ .lower_bound = true,
+ .extra_bits = 0,
+ .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NEG_NO_EXTRA_BITS_CASE,
+ .msb_set = true,
+ .lower_bound = false,
+ .extra_bits = 0,
+ .expected = {.tv_sec = -1LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_NO_EXTRA_BITS_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 0,
+ .expected = {0LL, 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_NO_EXTRA_BITS_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 0,
+ .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NEG_LO_1_CASE,
+ .msb_set = true,
+ .lower_bound = true,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NEG_LO_1_CASE,
+ .msb_set = true,
+ .lower_bound = false,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_LO_1_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_LO_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NEG_HI_1_CASE,
+ .msb_set = true,
+ .lower_bound = true,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NEG_HI_1_CASE,
+ .msb_set = true,
+ .lower_bound = false,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_HI_1_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_HI_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_HI_1_NS_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 6,
+ .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_HI_1_NS_MAX_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 0xFFFFFFFF,
+ .expected = {.tv_sec = 0x300000000LL,
+ .tv_nsec = MAX_NANOSECONDS},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_EXTRA_BITS_1_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 3,
+ .expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_EXTRA_BITS_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 3,
+ .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
+ }
+};
+
+static void timestamp_expectation_to_desc(const struct timestamp_expectation *t,
+ char *desc)
+{
+ strscpy(desc, t->test_case_name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(ext4_inode, test_data, timestamp_expectation_to_desc);
+
static time64_t get_32bit_time(const struct timestamp_expectation * const test)
{
if (test->msb_set) {
@@ -101,166 +240,35 @@ static time64_t get_32bit_time(const struct timestamp_expectation * const test)
*/
static void inode_test_xtimestamp_decoding(struct kunit *test)
{
- const struct timestamp_expectation test_data[] = {
- {
- .test_case_name = LOWER_BOUND_NEG_NO_EXTRA_BITS_CASE,
- .msb_set = true,
- .lower_bound = true,
- .extra_bits = 0,
- .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NEG_NO_EXTRA_BITS_CASE,
- .msb_set = true,
- .lower_bound = false,
- .extra_bits = 0,
- .expected = {.tv_sec = -1LL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = LOWER_BOUND_NONNEG_NO_EXTRA_BITS_CASE,
- .msb_set = false,
- .lower_bound = true,
- .extra_bits = 0,
- .expected = {0LL, 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NONNEG_NO_EXTRA_BITS_CASE,
- .msb_set = false,
- .lower_bound = false,
- .extra_bits = 0,
- .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = LOWER_BOUND_NEG_LO_1_CASE,
- .msb_set = true,
- .lower_bound = true,
- .extra_bits = 1,
- .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NEG_LO_1_CASE,
- .msb_set = true,
- .lower_bound = false,
- .extra_bits = 1,
- .expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = LOWER_BOUND_NONNEG_LO_1_CASE,
- .msb_set = false,
- .lower_bound = true,
- .extra_bits = 1,
- .expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NONNEG_LO_1_CASE,
- .msb_set = false,
- .lower_bound = false,
- .extra_bits = 1,
- .expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = LOWER_BOUND_NEG_HI_1_CASE,
- .msb_set = true,
- .lower_bound = true,
- .extra_bits = 2,
- .expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NEG_HI_1_CASE,
- .msb_set = true,
- .lower_bound = false,
- .extra_bits = 2,
- .expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = LOWER_BOUND_NONNEG_HI_1_CASE,
- .msb_set = false,
- .lower_bound = true,
- .extra_bits = 2,
- .expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NONNEG_HI_1_CASE,
- .msb_set = false,
- .lower_bound = false,
- .extra_bits = 2,
- .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NONNEG_HI_1_NS_1_CASE,
- .msb_set = false,
- .lower_bound = false,
- .extra_bits = 6,
- .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
- },
-
- {
- .test_case_name = LOWER_BOUND_NONNEG_HI_1_NS_MAX_CASE,
- .msb_set = false,
- .lower_bound = true,
- .extra_bits = 0xFFFFFFFF,
- .expected = {.tv_sec = 0x300000000LL,
- .tv_nsec = MAX_NANOSECONDS},
- },
-
- {
- .test_case_name = LOWER_BOUND_NONNEG_EXTRA_BITS_1_CASE,
- .msb_set = false,
- .lower_bound = true,
- .extra_bits = 3,
- .expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
- },
-
- {
- .test_case_name = UPPER_BOUND_NONNEG_EXTRA_BITS_1_CASE,
- .msb_set = false,
- .lower_bound = false,
- .extra_bits = 3,
- .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
- }
- };
-
struct timespec64 timestamp;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
- timestamp.tv_sec = get_32bit_time(&test_data[i]);
- ext4_decode_extra_time(&timestamp,
- cpu_to_le32(test_data[i].extra_bits));
-
- KUNIT_EXPECT_EQ_MSG(test,
- test_data[i].expected.tv_sec,
- timestamp.tv_sec,
- CASE_NAME_FORMAT,
- test_data[i].test_case_name,
- test_data[i].msb_set,
- test_data[i].lower_bound,
- test_data[i].extra_bits);
- KUNIT_EXPECT_EQ_MSG(test,
- test_data[i].expected.tv_nsec,
- timestamp.tv_nsec,
- CASE_NAME_FORMAT,
- test_data[i].test_case_name,
- test_data[i].msb_set,
- test_data[i].lower_bound,
- test_data[i].extra_bits);
- }
+
+ struct timestamp_expectation *test_param =
+ (struct timestamp_expectation *)(test->param_value);
+
+ timestamp.tv_sec = get_32bit_time(test_param);
+ ext4_decode_extra_time(&timestamp,
+ cpu_to_le32(test_param->extra_bits));
+
+ KUNIT_EXPECT_EQ_MSG(test,
+ test_param->expected.tv_sec,
+ timestamp.tv_sec,
+ CASE_NAME_FORMAT,
+ test_param->test_case_name,
+ test_param->msb_set,
+ test_param->lower_bound,
+ test_param->extra_bits);
+ KUNIT_EXPECT_EQ_MSG(test,
+ test_param->expected.tv_nsec,
+ timestamp.tv_nsec,
+ CASE_NAME_FORMAT,
+ test_param->test_case_name,
+ test_param->msb_set,
+ test_param->lower_bound,
+ test_param->extra_bits);
}
static struct kunit_case ext4_inode_test_cases[] = {
- KUNIT_CASE(inode_test_xtimestamp_decoding),
+ KUNIT_CASE_PARAM(inode_test_xtimestamp_decoding, ext4_inode_gen_params),
{}
};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03c2253005f0..27946882d4ce 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -175,6 +175,7 @@ void ext4_evict_inode(struct inode *inode)
*/
int extra_credits = 6;
struct ext4_xattr_inode_array *ea_inode_array = NULL;
+ bool freeze_protected = false;
trace_ext4_evict_inode(inode);
@@ -232,9 +233,14 @@ void ext4_evict_inode(struct inode *inode)
/*
* Protect us against freezing - iput() caller didn't have to have any
- * protection against it
+ * protection against it. When we are in a running transaction though,
+ * we are already protected against freezing and we cannot grab further
+ * protection due to lock ordering constraints.
*/
- sb_start_intwrite(inode->i_sb);
+ if (!ext4_journal_current_handle()) {
+ sb_start_intwrite(inode->i_sb);
+ freeze_protected = true;
+ }
if (!IS_NOQUOTA(inode))
extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
@@ -253,7 +259,8 @@ void ext4_evict_inode(struct inode *inode)
* cleaned up.
*/
ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
goto no_delete;
}
@@ -294,7 +301,8 @@ void ext4_evict_inode(struct inode *inode)
stop_handle:
ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
goto no_delete;
}
@@ -323,10 +331,13 @@ stop_handle:
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
+ if (!list_empty(&EXT4_I(inode)->i_fc_list))
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
}
@@ -730,7 +741,7 @@ out_sem:
if (ret)
return ret;
}
- ext4_fc_track_range(inode, map->m_lblk,
+ ext4_fc_track_range(handle, inode, map->m_lblk,
map->m_lblk + map->m_len - 1);
}
@@ -828,8 +839,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
int create = map_flags & EXT4_GET_BLOCKS_CREATE;
int err;
- J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
- || handle != NULL || create == 0);
+ ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ || handle != NULL || create == 0);
map.m_lblk = block;
map.m_len = 1;
@@ -844,9 +855,9 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
if (unlikely(!bh))
return ERR_PTR(-ENOMEM);
if (map.m_flags & EXT4_MAP_NEW) {
- J_ASSERT(create != 0);
- J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
- || (handle != NULL));
+ ASSERT(create != 0);
+ ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ || (handle != NULL));
/*
* Now that we do not always journal data, we should
@@ -1918,7 +1929,7 @@ static int __ext4_journalled_writepage(struct page *page,
}
if (ret == 0)
ret = err;
- err = ext4_jbd2_inode_add_write(handle, inode, 0, len);
+ err = ext4_jbd2_inode_add_write(handle, inode, page_offset(page), len);
if (ret == 0)
ret = err;
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
@@ -2053,7 +2064,7 @@ static int ext4_writepage(struct page *page,
unlock_page(page);
return -ENOMEM;
}
- ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
+ ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite);
ext4_io_submit(&io_submit);
/* Drop io_end reference we got from init */
ext4_put_io_end_defer(io_submit.io_end);
@@ -2087,7 +2098,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
- err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
+ err = ext4_bio_write_page(&mpd->io_submit, page, len, false);
if (!err)
mpd->wbc->nr_to_write--;
mpd->first_page++;
@@ -2440,7 +2451,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
struct super_block *sb = inode->i_sb;
if (ext4_forced_shutdown(EXT4_SB(sb)) ||
- EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
goto invalidate_dirty_pages;
/*
* Let the uper layers retry transient errors.
@@ -2674,7 +2685,7 @@ static int ext4_writepages(struct address_space *mapping,
* the stack trace.
*/
if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
- sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+ ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
ret = -EROFS;
goto out_writepages;
}
@@ -3307,10 +3318,11 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
if (journal) {
if (jbd2_transaction_committed(journal,
- EXT4_I(inode)->i_datasync_tid))
- return true;
- return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) >=
- EXT4_I(inode)->i_fc_committed_subtid;
+ EXT4_I(inode)->i_datasync_tid))
+ return false;
+ if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
+ return !list_empty(&EXT4_I(inode)->i_fc_list);
+ return true;
}
/* Any metadata buffers to write? */
@@ -4107,7 +4119,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
up_write(&EXT4_I(inode)->i_data_sem);
}
- ext4_fc_track_range(inode, first_block, stop_block);
+ ext4_fc_track_range(handle, inode, first_block, stop_block);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
@@ -4607,7 +4619,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
(ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
if (flags & EXT4_IGET_HANDLE)
return ERR_PTR(-ESTALE);
- __ext4_error(sb, function, line, EFSCORRUPTED, 0,
+ __ext4_error(sb, function, line, false, EFSCORRUPTED, 0,
"inode #%lu: comm %s: iget: illegal inode #",
ino, current->comm);
return ERR_PTR(-EFSCORRUPTED);
@@ -5440,14 +5452,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}
if (shrink)
- ext4_fc_track_range(inode,
+ ext4_fc_track_range(handle, inode,
(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
inode->i_sb->s_blocksize_bits,
(oldsize > 0 ? oldsize - 1 : 0) >>
inode->i_sb->s_blocksize_bits);
else
ext4_fc_track_range(
- inode,
+ handle, inode,
(oldsize > 0 ? oldsize - 1 : oldsize) >>
inode->i_sb->s_blocksize_bits,
(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
@@ -5697,7 +5709,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
put_bh(iloc->bh);
return -EIO;
}
- ext4_fc_track_inode(inode);
+ ext4_fc_track_inode(handle, inode);
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
@@ -6157,7 +6169,8 @@ retry_alloc:
if (ext4_walk_page_buffers(handle, page_buffers(page),
0, len, NULL, write_end_fn))
goto out_error;
- if (ext4_jbd2_inode_add_write(handle, inode, 0, len))
+ if (ext4_jbd2_inode_add_write(handle, inode,
+ page_offset(page), len))
goto out_error;
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
} else {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f0381876a7e5..524e13432447 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -624,7 +624,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
case EXT4_GOING_FLAGS_DEFAULT:
freeze_bdev(sb->s_bdev);
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
- thaw_bdev(sb->s_bdev, sb);
+ thaw_bdev(sb->s_bdev);
break;
case EXT4_GOING_FLAGS_LOGFLUSH:
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 85abbfb98cbe..99bf091fee10 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -822,24 +822,6 @@ void ext4_mb_generate_buddy(struct super_block *sb,
spin_unlock(&sbi->s_bal_lock);
}
-static void mb_regenerate_buddy(struct ext4_buddy *e4b)
-{
- int count;
- int order = 1;
- void *buddy;
-
- while ((buddy = mb_find_buddy(e4b, order++, &count))) {
- ext4_set_bits(buddy, 0, count);
- }
- e4b->bd_info->bb_fragments = 0;
- memset(e4b->bd_info->bb_counters, 0,
- sizeof(*e4b->bd_info->bb_counters) *
- (e4b->bd_sb->s_blocksize_bits + 2));
-
- ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
- e4b->bd_bitmap, e4b->bd_group);
-}
-
/* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve
@@ -1307,22 +1289,18 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
{
- int order = 1;
- int bb_incr = 1 << (e4b->bd_blkbits - 1);
+ int order = 1, max;
void *bb;
BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
- bb = e4b->bd_buddy;
while (order <= e4b->bd_blkbits + 1) {
- block = block >> 1;
- if (!mb_test_bit(block, bb)) {
+ bb = mb_find_buddy(e4b, order, &max);
+ if (!mb_test_bit(block >> order, bb)) {
/* this block is part of buddy of order 'order' */
return order;
}
- bb += bb_incr;
- bb_incr >>= 1;
order++;
}
return 0;
@@ -1512,7 +1490,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
- mb_regenerate_buddy(e4b);
goto done;
}
@@ -2395,9 +2372,9 @@ repeat:
nr = sbi->s_mb_prefetch;
if (ext4_has_feature_flex_bg(sb)) {
- nr = (group / sbi->s_mb_prefetch) *
- sbi->s_mb_prefetch;
- nr = nr + sbi->s_mb_prefetch - group;
+ nr = 1 << sbi->s_log_groups_per_flex;
+ nr -= group & (nr - 1);
+ nr = min(nr, sbi->s_mb_prefetch);
}
prefetch_grp = ext4_mb_prefetch(sb, group,
nr, &prefetch_ios);
@@ -2733,7 +2710,8 @@ static int ext4_mb_init_backend(struct super_block *sb)
if (ext4_has_feature_flex_bg(sb)) {
/* a single flex group is supposed to be read by a single IO */
- sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex;
+ sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex,
+ BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
} else {
sbi->s_mb_prefetch = 32;
@@ -4477,7 +4455,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
{
ext4_group_t i, ngroups;
- if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
return;
ngroups = ext4_get_groups_count(sb);
@@ -4508,7 +4486,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
- if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
return;
mb_debug(sb, "Can't allocate:"
@@ -5126,6 +5104,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_group_first_block_no(sb, group) +
EXT4_C2B(sbi, cluster),
"Block already on to-be-freed list");
+ kmem_cache_free(ext4_free_data_cachep, new_entry);
return 0;
}
}
@@ -5167,7 +5146,7 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
struct super_block *sb = ar->inode->i_sb;
ext4_group_t group;
ext4_grpblk_t blkoff;
- int i;
+ int i = sb->s_blocksize;
ext4_fsblk_t goal, block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5159830dacb8..b17a082b7db1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -182,10 +182,6 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
return bh;
}
-#ifndef assert
-#define assert(test) J_ASSERT(test)
-#endif
-
#ifdef DX_DEBUG
#define dxtrace(command) command
#else
@@ -643,13 +639,7 @@ static struct stats dx_show_leaf(struct inode *dir,
name = de->name;
len = de->name_len;
- if (IS_ENCRYPTED(dir))
- res = fscrypt_get_encryption_info(dir);
- if (res) {
- printk(KERN_WARNING "Error setting up"
- " fname crypto: %d\n", res);
- }
- if (!fscrypt_has_encryption_key(dir)) {
+ if (!IS_ENCRYPTED(dir)) {
/* Directory is not encrypted */
ext4fs_dirhash(dir, de->name,
de->name_len, &h);
@@ -849,7 +839,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
break;
}
}
- assert (at == p - 1);
+ ASSERT(at == p - 1);
}
at = p - 1;
@@ -1010,7 +1000,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
EXT4_DIR_REC_LEN(0));
/* Check if the directory is encrypted */
if (IS_ENCRYPTED(dir)) {
- err = fscrypt_get_encryption_info(dir);
+ err = fscrypt_prepare_readdir(dir);
if (err < 0) {
brelse(bh);
return err;
@@ -1265,8 +1255,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
struct dx_entry *old = frame->at, *new = old + 1;
int count = dx_get_count(entries);
- assert(count < dx_get_limit(entries));
- assert(old < entries + count);
+ ASSERT(count < dx_get_limit(entries));
+ ASSERT(old < entries + count);
memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
dx_set_hash(new, hash);
dx_set_block(new, block);
@@ -1285,8 +1275,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
{
- const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
+ const struct super_block *sb = parent->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
int ret;
if (quick)
@@ -1298,7 +1288,7 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
*/
- if (ext4_has_strict_mode(sbi))
+ if (sb_has_strict_encoding(sb))
return -EINVAL;
if (name->len != entry->len)
@@ -1315,7 +1305,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
{
int len;
- if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) {
+ if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding) {
cf_name->name = NULL;
return;
}
@@ -1324,7 +1314,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
if (!cf_name->name)
return;
- len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+ len = utf8_casefold(dir->i_sb->s_encoding,
iname, cf_name->name,
EXT4_NAME_LEN);
if (len <= 0) {
@@ -1361,7 +1351,7 @@ static inline bool ext4_match(const struct inode *parent,
#endif
#ifdef CONFIG_UNICODE
- if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+ if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent)) {
if (fname->cf_name.name) {
struct qstr cf = {.name = fname->cf_name.name,
.len = fname->cf_name.len};
@@ -1614,6 +1604,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
struct buffer_head *bh;
err = ext4_fname_prepare_lookup(dir, dentry, &fname);
+ generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
@@ -2180,9 +2171,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct super_block *sb;
-#ifdef CONFIG_UNICODE
- struct ext4_sb_info *sbi;
-#endif
struct ext4_filename fname;
int retval;
int dx_fallback=0;
@@ -2198,10 +2186,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (!dentry->d_name.len)
return -EINVAL;
+ if (fscrypt_is_nokey_name(dentry))
+ return -ENOKEY;
+
#ifdef CONFIG_UNICODE
- sbi = EXT4_SB(sb);
- if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
- sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name))
+ if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
+ sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
return -EINVAL;
#endif
@@ -2610,7 +2600,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
handle_t *handle;
- struct inode *inode, *inode_save;
+ struct inode *inode;
int err, credits, retries = 0;
err = dquot_initialize(dir);
@@ -2628,11 +2618,9 @@ retry:
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
- inode_save = inode;
- ihold(inode_save);
err = ext4_add_nondir(handle, dentry, &inode);
- ext4_fc_track_create(inode_save, dentry);
- iput(inode_save);
+ if (!err)
+ ext4_fc_track_create(handle, dentry);
}
if (handle)
ext4_journal_stop(handle);
@@ -2647,7 +2635,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
handle_t *handle;
- struct inode *inode, *inode_save;
+ struct inode *inode;
int err, credits, retries = 0;
err = dquot_initialize(dir);
@@ -2664,12 +2652,9 @@ retry:
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &ext4_special_inode_operations;
- inode_save = inode;
- ihold(inode_save);
err = ext4_add_nondir(handle, dentry, &inode);
if (!err)
- ext4_fc_track_create(inode_save, dentry);
- iput(inode_save);
+ ext4_fc_track_create(handle, dentry);
}
if (handle)
ext4_journal_stop(handle);
@@ -2833,7 +2818,6 @@ out_clear_inode:
iput(inode);
goto out_retry;
}
- ext4_fc_track_create(inode, dentry);
ext4_inc_count(dir);
ext4_update_dx_flag(dir);
@@ -2841,6 +2825,7 @@ out_clear_inode:
if (err)
goto out_clear_inode;
d_instantiate_new(dentry, inode);
+ ext4_fc_track_create(handle, dentry);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
@@ -2970,7 +2955,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
* hold i_mutex, or the inode can not be referenced from outside,
* so i_nlink should not be bumped due to race
*/
- J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
BUFFER_TRACE(sbi->s_sbh, "get_write_access");
@@ -3175,7 +3160,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
goto end_rmdir;
ext4_dec_count(dir);
ext4_update_dx_flag(dir);
- ext4_fc_track_unlink(inode, dentry);
+ ext4_fc_track_unlink(handle, dentry);
retval = ext4_mark_inode_dirty(handle, dir);
#ifdef CONFIG_UNICODE
@@ -3196,13 +3181,12 @@ end_rmdir:
return retval;
}
-int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
struct inode *inode)
{
int retval = -ENOENT;
struct buffer_head *bh;
struct ext4_dir_entry_2 *de;
- handle_t *handle = NULL;
int skip_remove_dentry = 0;
bh = ext4_find_entry(dir, d_name, &de, NULL);
@@ -3221,14 +3205,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
skip_remove_dentry = 1;
else
- goto out_bh;
- }
-
- handle = ext4_journal_start(dir, EXT4_HT_DIR,
- EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
- if (IS_ERR(handle)) {
- retval = PTR_ERR(handle);
- goto out_bh;
+ goto out;
}
if (IS_DIRSYNC(dir))
@@ -3237,12 +3214,12 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
if (!skip_remove_dentry) {
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
- goto out_handle;
+ goto out;
dir->i_ctime = dir->i_mtime = current_time(dir);
ext4_update_dx_flag(dir);
retval = ext4_mark_inode_dirty(handle, dir);
if (retval)
- goto out_handle;
+ goto out;
} else {
retval = 0;
}
@@ -3256,15 +3233,14 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
inode->i_ctime = current_time(inode);
retval = ext4_mark_inode_dirty(handle, inode);
-out_handle:
- ext4_journal_stop(handle);
-out_bh:
+out:
brelse(bh);
return retval;
}
static int ext4_unlink(struct inode *dir, struct dentry *dentry)
{
+ handle_t *handle;
int retval;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
@@ -3282,9 +3258,16 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (retval)
goto out_trace;
- retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry));
+ handle = ext4_journal_start(dir, EXT4_HT_DIR,
+ EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+ if (IS_ERR(handle)) {
+ retval = PTR_ERR(handle);
+ goto out_trace;
+ }
+
+ retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
if (!retval)
- ext4_fc_track_unlink(d_inode(dentry), dentry);
+ ext4_fc_track_unlink(handle, dentry);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
@@ -3295,6 +3278,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
+ if (handle)
+ ext4_journal_stop(handle);
out_trace:
trace_ext4_unlink_exit(dentry, retval);
@@ -3451,7 +3436,6 @@ retry:
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
- ext4_fc_track_link(inode, dentry);
err = ext4_mark_inode_dirty(handle, inode);
/* this can happen only for tmpfile being
* linked the first time
@@ -3459,6 +3443,7 @@ retry:
if (inode->i_nlink == 1)
ext4_orphan_del(handle, inode);
d_instantiate(dentry, inode);
+ ext4_fc_track_link(handle, dentry);
} else {
drop_nlink(inode);
iput(inode);
@@ -3919,9 +3904,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
EXT4_FC_REASON_RENAME_DIR);
} else {
if (new.inode)
- ext4_fc_track_unlink(new.inode, new.dentry);
- ext4_fc_track_link(old.inode, new.dentry);
- ext4_fc_track_unlink(old.inode, old.dentry);
+ ext4_fc_track_unlink(handle, new.dentry);
+ __ext4_fc_track_link(handle, old.inode, new.dentry);
+ __ext4_fc_track_unlink(handle, old.inode, old.dentry);
}
if (new.inode) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index defd2e10dfd1..03a44a0de86a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -111,9 +111,6 @@ static void ext4_finish_bio(struct bio *bio)
unsigned under_io = 0;
unsigned long flags;
- if (!page)
- continue;
-
if (fscrypt_is_bounce_page(page)) {
bounce_page = page;
page = fscrypt_pagecache_page(bounce_page);
@@ -438,7 +435,6 @@ submit_and_retry:
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
- struct writeback_control *wbc,
bool keep_towrite)
{
struct page *bounce_page = NULL;
@@ -448,6 +444,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
int ret = 0;
int nr_submitted = 0;
int nr_to_submit = 0;
+ struct writeback_control *wbc = io->io_wbc;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2fe141ff3c7e..21121787c874 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -289,18 +289,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
if (!ext4_has_metadata_csum(sb))
return;
- /*
- * Locking the superblock prevents the scenario
- * where:
- * 1) a first thread pauses during checksum calculation.
- * 2) a second thread updates the superblock, recalculates
- * the checksum, and updates s_checksum
- * 3) the first thread resumes and finishes its checksum calculation
- * and updates s_checksum with a potentially stale or torn value.
- */
- lock_buffer(EXT4_SB(sb)->s_sbh);
es->s_checksum = ext4_superblock_csum(sb, es);
- unlock_buffer(EXT4_SB(sb)->s_sbh);
}
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
@@ -415,10 +404,8 @@ void ext4_itable_unused_set(struct super_block *sb,
bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}
-static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
+static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
{
- time64_t now = ktime_get_real_seconds();
-
now = clamp_val(now, 0, (1ull << 40) - 1);
*lo = cpu_to_le32(lower_32_bits(now));
@@ -430,108 +417,11 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
}
#define ext4_update_tstamp(es, tstamp) \
- __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
+ __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
+ ktime_get_real_seconds())
#define ext4_get_tstamp(es, tstamp) \
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
-static void __save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- int err;
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- if (bdev_read_only(sb->s_bdev))
- return;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_update_tstamp(es, s_last_error_time);
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
- es->s_last_error_line = cpu_to_le32(line);
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- switch (error) {
- case EIO:
- err = EXT4_ERR_EIO;
- break;
- case ENOMEM:
- err = EXT4_ERR_ENOMEM;
- break;
- case EFSBADCRC:
- err = EXT4_ERR_EFSBADCRC;
- break;
- case 0:
- case EFSCORRUPTED:
- err = EXT4_ERR_EFSCORRUPTED;
- break;
- case ENOSPC:
- err = EXT4_ERR_ENOSPC;
- break;
- case ENOKEY:
- err = EXT4_ERR_ENOKEY;
- break;
- case EROFS:
- err = EXT4_ERR_EROFS;
- break;
- case EFBIG:
- err = EXT4_ERR_EFBIG;
- break;
- case EEXIST:
- err = EXT4_ERR_EEXIST;
- break;
- case ERANGE:
- err = EXT4_ERR_ERANGE;
- break;
- case EOVERFLOW:
- err = EXT4_ERR_EOVERFLOW;
- break;
- case EBUSY:
- err = EXT4_ERR_EBUSY;
- break;
- case ENOTDIR:
- err = EXT4_ERR_ENOTDIR;
- break;
- case ENOTEMPTY:
- err = EXT4_ERR_ENOTEMPTY;
- break;
- case ESHUTDOWN:
- err = EXT4_ERR_ESHUTDOWN;
- break;
- case EFAULT:
- err = EXT4_ERR_EFAULT;
- break;
- default:
- err = EXT4_ERR_UNKNOWN;
- }
- es->s_last_error_errcode = err;
- if (!es->s_first_error_time) {
- es->s_first_error_time = es->s_last_error_time;
- es->s_first_error_time_hi = es->s_last_error_time_hi;
- strncpy(es->s_first_error_func, func,
- sizeof(es->s_first_error_func));
- es->s_first_error_line = cpu_to_le32(line);
- es->s_first_error_ino = es->s_last_error_ino;
- es->s_first_error_block = es->s_last_error_block;
- es->s_first_error_errcode = es->s_last_error_errcode;
- }
- /*
- * Start the daily error reporting function if it hasn't been
- * started already
- */
- if (!es->s_error_count)
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- le32_add_cpu(&es->s_error_count, 1);
-}
-
-static void save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
-{
- __save_error_info(sb, error, ino, block, func, line);
- if (!bdev_read_only(sb->s_bdev))
- ext4_commit_super(sb, 1);
-}
-
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
@@ -660,6 +550,83 @@ static bool system_going_down(void)
|| system_state == SYSTEM_RESTART;
}
+struct ext4_err_translation {
+ int code;
+ int errno;
+};
+
+#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
+
+static struct ext4_err_translation err_translation[] = {
+ EXT4_ERR_TRANSLATE(EIO),
+ EXT4_ERR_TRANSLATE(ENOMEM),
+ EXT4_ERR_TRANSLATE(EFSBADCRC),
+ EXT4_ERR_TRANSLATE(EFSCORRUPTED),
+ EXT4_ERR_TRANSLATE(ENOSPC),
+ EXT4_ERR_TRANSLATE(ENOKEY),
+ EXT4_ERR_TRANSLATE(EROFS),
+ EXT4_ERR_TRANSLATE(EFBIG),
+ EXT4_ERR_TRANSLATE(EEXIST),
+ EXT4_ERR_TRANSLATE(ERANGE),
+ EXT4_ERR_TRANSLATE(EOVERFLOW),
+ EXT4_ERR_TRANSLATE(EBUSY),
+ EXT4_ERR_TRANSLATE(ENOTDIR),
+ EXT4_ERR_TRANSLATE(ENOTEMPTY),
+ EXT4_ERR_TRANSLATE(ESHUTDOWN),
+ EXT4_ERR_TRANSLATE(EFAULT),
+};
+
+static int ext4_errno_to_code(int errno)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err_translation); i++)
+ if (err_translation[i].errno == errno)
+ return err_translation[i].code;
+ return EXT4_ERR_UNKNOWN;
+}
+
+static void __save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (bdev_read_only(sb->s_bdev))
+ return;
+ /* We default to EFSCORRUPTED error... */
+ if (error == 0)
+ error = EFSCORRUPTED;
+
+ spin_lock(&sbi->s_error_lock);
+ sbi->s_add_error_count++;
+ sbi->s_last_error_code = error;
+ sbi->s_last_error_line = line;
+ sbi->s_last_error_ino = ino;
+ sbi->s_last_error_block = block;
+ sbi->s_last_error_func = func;
+ sbi->s_last_error_time = ktime_get_real_seconds();
+ if (!sbi->s_first_error_time) {
+ sbi->s_first_error_code = error;
+ sbi->s_first_error_line = line;
+ sbi->s_first_error_ino = ino;
+ sbi->s_first_error_block = block;
+ sbi->s_first_error_func = func;
+ sbi->s_first_error_time = sbi->s_last_error_time;
+ }
+ spin_unlock(&sbi->s_error_lock);
+}
+
+static void save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ __save_error_info(sb, error, ino, block, func, line);
+ if (!bdev_read_only(sb->s_bdev))
+ ext4_commit_super(sb, 1);
+}
+
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@@ -673,40 +640,50 @@ static bool system_going_down(void)
* We'll just use the jbd2_journal_abort() error code to record an error in
* the journal instead. On recovery, the journal will complain about
* that error until we've noted it down and cleared it.
+ *
+ * If force_ro is set, we unconditionally force the filesystem into an
+ * ABORT|READONLY state, unless the error response on the fs has been set to
+ * panic in which case we take the easy way out and panic immediately. This is
+ * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
+ * at a critical moment in log management.
*/
-
-static void ext4_handle_error(struct super_block *sb)
+static void ext4_handle_error(struct super_block *sb, bool force_ro)
{
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
- if (sb_rdonly(sb))
+ if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
return;
- if (!test_opt(sb, ERRORS_CONT)) {
- journal_t *journal = EXT4_SB(sb)->s_journal;
-
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (journal)
- jbd2_journal_abort(journal, -EIO);
- }
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+ if (journal)
+ jbd2_journal_abort(journal, -EIO);
/*
* We force ERRORS_RO behavior when system is rebooting. Otherwise we
* could panic during 'reboot -f' as the underlying device got already
* disabled.
*/
- if (test_opt(sb, ERRORS_RO) || system_going_down()) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
- } else if (test_opt(sb, ERRORS_PANIC)) {
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ /*
+ * Make sure updated value of ->s_mount_flags will be visible before
+ * ->s_flags update
+ */
+ smp_wmb();
+ sb->s_flags |= SB_RDONLY;
+}
+
+static void flush_stashed_error_work(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+ s_error_work);
+
+ ext4_commit_super(sbi->s_sb, 1);
}
#define ext4_error_ratelimit(sb) \
@@ -714,7 +691,7 @@ static void ext4_handle_error(struct super_block *sb)
"EXT4-fs error")
void __ext4_error(struct super_block *sb, const char *function,
- unsigned int line, int error, __u64 block,
+ unsigned int line, bool force_ro, int error, __u64 block,
const char *fmt, ...)
{
struct va_format vaf;
@@ -734,7 +711,7 @@ void __ext4_error(struct super_block *sb, const char *function,
va_end(args);
}
save_error_info(sb, error, 0, block, function, line);
- ext4_handle_error(sb);
+ ext4_handle_error(sb, force_ro);
}
void __ext4_error_inode(struct inode *inode, const char *function,
@@ -766,7 +743,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
}
save_error_info(inode->i_sb, error, inode->i_ino, block,
function, line);
- ext4_handle_error(inode->i_sb);
+ ext4_handle_error(inode->i_sb, false);
}
void __ext4_error_file(struct file *file, const char *function,
@@ -805,7 +782,7 @@ void __ext4_error_file(struct file *file, const char *function,
}
save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
function, line);
- ext4_handle_error(inode->i_sb);
+ ext4_handle_error(inode->i_sb, false);
}
const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -873,51 +850,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
}
save_error_info(sb, -errno, 0, 0, function, line);
- ext4_handle_error(sb);
-}
-
-/*
- * ext4_abort is a much stronger failure handler than ext4_error. The
- * abort function may be used to deal with unrecoverable failures such
- * as journal IO errors or ENOMEM at a critical moment in log management.
- *
- * We unconditionally force the filesystem into an ABORT|READONLY state,
- * unless the error response on the fs has been set to panic in which
- * case we take the easy way out and panic immediately.
- */
-
-void __ext4_abort(struct super_block *sb, const char *function,
- unsigned int line, int error, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
- return;
-
- save_error_info(sb, error, 0, 0, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
- sb->s_id, function, line, &vaf);
- va_end(args);
-
- if (sb_rdonly(sb) == 0) {
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
-
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
- }
- if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
- panic("EXT4-fs panic from previous error\n");
+ ext4_handle_error(sb, false);
}
void __ext4_msg(struct super_block *sb,
@@ -993,8 +926,6 @@ __acquires(bitlock)
return;
trace_ext4_error(sb, function, line);
- __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
-
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -1010,17 +941,16 @@ __acquires(bitlock)
va_end(args);
}
- if (test_opt(sb, WARN_ON_ERROR))
- WARN_ON_ONCE(1);
-
if (test_opt(sb, ERRORS_CONT)) {
- ext4_commit_super(sb, 0);
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
+ __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
+ schedule_work(&EXT4_SB(sb)->s_error_work);
return;
}
-
ext4_unlock_group(sb, grp);
- ext4_commit_super(sb, 1);
- ext4_handle_error(sb);
+ save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
+ ext4_handle_error(sb, false);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
@@ -1192,6 +1122,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_unregister_li_request(sb);
ext4_quota_off_umount(sb);
+ flush_work(&sbi->s_error_work);
destroy_workqueue(sbi->rsv_conversion_wq);
/*
@@ -1251,7 +1182,7 @@ static void ext4_put_super(struct super_block *sb)
* in-memory list had better be clean by this point. */
if (!list_empty(&sbi->s_orphan))
dump_orphan_list(sb, sbi);
- J_ASSERT(list_empty(&sbi->s_orphan));
+ ASSERT(list_empty(&sbi->s_orphan));
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
@@ -1288,7 +1219,7 @@ static void ext4_put_super(struct super_block *sb)
fs_put_dax(sbi->s_daxdev);
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
kfree(sbi);
}
@@ -1716,11 +1647,10 @@ enum {
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
- Opt_prefetch_block_bitmaps, Opt_no_fc,
+ Opt_prefetch_block_bitmaps,
#ifdef CONFIG_EXT4_DEBUG
- Opt_fc_debug_max_replay,
+ Opt_fc_debug_max_replay, Opt_fc_debug_force
#endif
- Opt_fc_debug_force
};
static const match_table_t tokens = {
@@ -1807,9 +1737,8 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable=%u"},
{Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"},
- {Opt_no_fc, "no_fc"},
- {Opt_fc_debug_force, "fc_debug_force"},
#ifdef CONFIG_EXT4_DEBUG
+ {Opt_fc_debug_force, "fc_debug_force"},
{Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
#endif
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
@@ -2027,8 +1956,8 @@ static const struct mount_opts {
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+ {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
{Opt_offgrpjquota, 0, MOPT_Q},
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
@@ -2039,11 +1968,9 @@ static const struct mount_opts {
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
MOPT_SET},
- {Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
- MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY},
+#ifdef CONFIG_EXT4_DEBUG
{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
-#ifdef CONFIG_EXT4_DEBUG
{Opt_fc_debug_max_replay, 0, MOPT_GTE0},
#endif
{Opt_err, 0, 0}
@@ -2153,7 +2080,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
return 1;
case Opt_abort:
- sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
return 1;
case Opt_i_version:
sb->s_flags |= SB_I_VERSION;
@@ -2653,10 +2580,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
} else if (test_opt2(sb, DAX_INODE)) {
SEQ_OPTS_PUTS("dax=inode");
}
-
- if (test_opt2(sb, JOURNAL_FAST_COMMIT))
- SEQ_OPTS_PUTS("fast_commit");
-
ext4_show_quota_options(seq, sb);
return 0;
}
@@ -3976,7 +3899,7 @@ int ext4_calculate_overhead(struct super_block *sb)
* loaded or not
*/
if (sbi->s_journal && !sbi->s_journal_bdev)
- overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
+ overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
/* j_inum for internal journal is non-zero */
j_inode = ext4_get_journal_inode(sb, j_inum);
@@ -4024,6 +3947,21 @@ static void ext4_set_resv_clusters(struct super_block *sb)
atomic64_set(&sbi->s_resv_clusters, resv_clusters);
}
+static const char *ext4_quota_mode(struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ if (!ext4_quota_capable(sb))
+ return "none";
+
+ if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
+ return "journalled";
+ else
+ return "writeback";
+#else
+ return "disabled";
+#endif
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
@@ -4063,9 +4001,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_sb = sb;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sb_block = sb_block;
- if (sb->s_bdev->bd_part)
- sbi->s_sectors_written_start =
- part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
+ sbi->s_sectors_written_start =
+ part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
/* Cleanup superblock name */
strreplace(sb->s_id, '/', '!');
@@ -4093,7 +4030,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (IS_ERR(bh)) {
ext4_msg(sb, KERN_ERR, "unable to read superblock");
ret = PTR_ERR(bh);
- bh = NULL;
goto out_fail;
}
/*
@@ -4207,18 +4143,25 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
-
- if (blocksize == PAGE_SIZE)
- set_opt(sb, DIOREAD_NOLOCK);
-
- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
- blocksize > EXT4_MAX_BLOCK_SIZE) {
+ if (le32_to_cpu(es->s_log_block_size) >
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
ext4_msg(sb, KERN_ERR,
- "Unsupported filesystem blocksize %d (%d log_block_size)",
- blocksize, le32_to_cpu(es->s_log_block_size));
+ "Invalid log block size: %u",
+ le32_to_cpu(es->s_log_block_size));
goto failed_mount;
}
+ if (le32_to_cpu(es->s_log_cluster_size) >
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log cluster size: %u",
+ le32_to_cpu(es->s_log_cluster_size));
+ goto failed_mount;
+ }
+
+ blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+
+ if (blocksize == PAGE_SIZE)
+ set_opt(sb, DIOREAD_NOLOCK);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
@@ -4303,7 +4246,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
#ifdef CONFIG_UNICODE
- if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) {
+ if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
const struct ext4_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
@@ -4334,15 +4277,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
- sbi->s_encoding = encoding;
- sbi->s_encoding_flags = encoding_flags;
+ sb->s_encoding = encoding;
+ sb->s_encoding_flags = encoding_flags;
}
#endif
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
- printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
+ printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
/* can't mount with both data=journal and dioread_nolock. */
clear_opt(sb, DIOREAD_NOLOCK);
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and delalloc");
@@ -4436,21 +4380,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
goto failed_mount;
- if (le32_to_cpu(es->s_log_block_size) >
- (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log block size: %u",
- le32_to_cpu(es->s_log_block_size));
- goto failed_mount;
- }
- if (le32_to_cpu(es->s_log_cluster_size) >
- (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log cluster size: %u",
- le32_to_cpu(es->s_log_cluster_size));
- goto failed_mount;
- }
-
if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
ext4_msg(sb, KERN_ERR,
"Number of reserved GDT blocks insanely large: %d",
@@ -4721,7 +4650,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"can't read group descriptor %d", i);
db_count = i;
ret = PTR_ERR(bh);
- bh = NULL;
goto failed_mount2;
}
rcu_read_lock();
@@ -4736,6 +4664,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
+ spin_lock_init(&sbi->s_error_lock);
+ INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
/* Register extent status tree shrinker */
if (ext4_es_register_shrinker(sbi))
@@ -4777,8 +4707,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
sbi->s_fc_bytes = 0;
- sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE;
- sbi->s_mount_state &= ~EXT4_FC_COMMITTING;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL;
@@ -4857,6 +4787,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount_wq;
}
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to set fast commit journal feature");
+ goto failed_mount_wq;
+ }
+
/* We have now updated the journal if required, so we can
* validate the data journaling mode. */
switch (test_opt(sb, DATA_FLAGS)) {
@@ -4883,6 +4821,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"requested data journaling mode");
goto failed_mount_wq;
}
+ break;
default:
break;
}
@@ -4974,11 +4913,6 @@ no_journal:
goto failed_mount4;
}
-#ifdef CONFIG_UNICODE
- if (sbi->s_encoding)
- sb->s_d_op = &ext4_dentry_ops;
-#endif
-
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
@@ -5016,13 +4950,11 @@ no_journal:
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
- ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
- ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
@@ -5102,10 +5034,11 @@ no_journal:
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %.*s%s%s", descr,
+ "Opts: %.*s%s%s. Quota mode: %s.", descr,
(int) sizeof(sbi->s_es->s_mount_opts),
sbi->s_es->s_mount_opts,
- *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+ *sbi->s_es->s_mount_opts ? "; " : "", orig_data,
+ ext4_quota_mode(sb));
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -5170,6 +5103,7 @@ failed_mount3a:
ext4_es_unregister_shrinker(sbi);
failed_mount3:
del_timer_sync(&sbi->s_err_report);
+ flush_work(&sbi->s_error_work);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
@@ -5184,7 +5118,7 @@ failed_mount:
crypto_free_shash(sbi->s_chksum_driver);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
#ifdef CONFIG_QUOTA
@@ -5496,6 +5430,7 @@ err_out:
static int ext4_commit_super(struct super_block *sb, int sync)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
@@ -5515,15 +5450,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)
*/
if (!(sb->s_flags & SB_RDONLY))
ext4_update_tstamp(es, s_wtime);
- if (sb->s_bdev->bd_part)
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part,
- sectors[STAT_WRITE]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1));
- else
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
+ es->s_kbytes_written =
+ cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+ EXT4_SB(sb)->s_sectors_written_start) >> 1));
if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
ext4_free_blocks_count_set(es,
EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
@@ -5532,6 +5462,46 @@ static int ext4_commit_super(struct super_block *sb, int sync)
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
+ /* Copy error information to the on-disk superblock */
+ spin_lock(&sbi->s_error_lock);
+ if (sbi->s_add_error_count > 0) {
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ if (!es->s_first_error_time && !es->s_first_error_time_hi) {
+ __ext4_update_tstamp(&es->s_first_error_time,
+ &es->s_first_error_time_hi,
+ sbi->s_first_error_time);
+ strncpy(es->s_first_error_func, sbi->s_first_error_func,
+ sizeof(es->s_first_error_func));
+ es->s_first_error_line =
+ cpu_to_le32(sbi->s_first_error_line);
+ es->s_first_error_ino =
+ cpu_to_le32(sbi->s_first_error_ino);
+ es->s_first_error_block =
+ cpu_to_le64(sbi->s_first_error_block);
+ es->s_first_error_errcode =
+ ext4_errno_to_code(sbi->s_first_error_code);
+ }
+ __ext4_update_tstamp(&es->s_last_error_time,
+ &es->s_last_error_time_hi,
+ sbi->s_last_error_time);
+ strncpy(es->s_last_error_func, sbi->s_last_error_func,
+ sizeof(es->s_last_error_func));
+ es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
+ es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
+ es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
+ es->s_last_error_errcode =
+ ext4_errno_to_code(sbi->s_last_error_code);
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
+ le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
+ sbi->s_add_error_count = 0;
+ }
+ spin_unlock(&sbi->s_error_lock);
+
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
if (sync)
@@ -5872,7 +5842,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -5885,8 +5855,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
+ /* Flush outstanding errors before changing fs state */
+ flush_work(&sbi->s_error_work);
+
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
err = -EROFS;
goto restore_opts;
}
@@ -6043,7 +6016,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
*/
*flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
- ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
+ ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.",
+ orig_data, ext4_quota_mode(sb));
kfree(orig_data);
return 0;
@@ -6222,11 +6196,8 @@ static int ext4_release_dquot(struct dquot *dquot)
static int ext4_mark_dquot_dirty(struct dquot *dquot)
{
struct super_block *sb = dquot->dq_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- /* Are we journaling quotas? */
- if (ext4_has_feature_quota(sb) ||
- sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ if (ext4_is_quota_journalled(sb)) {
dquot_mark_dquot_dirty(dquot);
return ext4_write_dquot(dquot);
} else {
@@ -6560,10 +6531,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
brelse(bh);
out:
if (inode->i_size < off + len) {
- ext4_fc_track_range(inode,
- (inode->i_size > 0 ? inode->i_size - 1 : 0)
- >> inode->i_sb->s_blocksize_bits,
- (off + len) >> inode->i_sb->s_blocksize_bits);
i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size;
err2 = ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 5ff33d18996a..075aa3a19ff5 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -62,11 +62,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%lu\n",
- (part_stat_read(sb->s_bdev->bd_part,
- sectors[STAT_WRITE]) -
+ (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
sbi->s_sectors_written_start) >> 1);
}
@@ -74,12 +71,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)(sbi->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part,
- sectors[STAT_WRITE]) -
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
EXT4_SB(sb)->s_sectors_written_start) >> 1)));
}
@@ -315,6 +309,7 @@ EXT4_ATTR_FEATURE(casefold);
EXT4_ATTR_FEATURE(verity);
#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
+EXT4_ATTR_FEATURE(fast_commit);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
@@ -331,6 +326,7 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(verity),
#endif
ATTR_LIST(metadata_csum_seed),
+ ATTR_LIST(fast_commit),
NULL,
};
ATTRIBUTE_GROUPS(ext4_feat);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6127e94ea4f5..4e3b1f8c2e81 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1927,7 +1927,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
} else {
/* Allocate a buffer where we construct the new block. */
s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
- /* assert(header == s->base) */
error = -ENOMEM;
if (s->base == NULL)
goto cleanup;