aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c202
1 files changed, 113 insertions, 89 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f44f668e407f..12b3f196010b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
"freeing block already freed "
"(bit %u)",
first + i);
- ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@@ -677,7 +677,7 @@ do { \
} \
} while (0)
-static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
+static void __mb_check_buddy(struct ext4_buddy *e4b, char *file,
const char *function, int line)
{
struct super_block *sb = e4b->bd_sb;
@@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
void *buddy2;
if (e4b->bd_info->bb_check_counter++ % 10)
- return 0;
+ return;
while (order > 1) {
buddy = mb_find_buddy(e4b, order, &max);
@@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
grp = ext4_get_group_info(sb, e4b->bd_group);
if (!grp)
- return NULL;
+ return;
list_for_each(cur, &grp->bb_prealloc_list) {
ext4_group_t groupnr;
struct ext4_prealloc_space *pa;
@@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
for (i = 0; i < pa->pa_len; i++)
MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
}
- return 0;
}
#undef MB_CHECK_ASSERT
#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
@@ -842,7 +841,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int new_order;
- if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
+ if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
return;
new_order = mb_avg_fragment_size_order(sb,
@@ -871,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
* cr level needs an update.
*/
static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *iter;
@@ -945,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o
* order. Updates *new_cr if cr level needs an update.
*/
static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = NULL;
@@ -990,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
* much and fall to CR_GOAL_LEN_SLOW in that case.
*/
static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = NULL;
@@ -1125,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
}
if (*new_cr == CR_POWER2_ALIGNED) {
- ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups);
+ ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
} else if (*new_cr == CR_GOAL_LEN_FAST) {
- ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups);
+ ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
} else if (*new_cr == CR_BEST_AVAIL_LEN) {
- ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups);
+ ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
} else {
/*
* TODO: For CR=2, we can arrange groups in an rb tree sorted by
@@ -1233,6 +1232,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
atomic64_add(period, &sbi->s_mb_generation_time);
}
+static void mb_regenerate_buddy(struct ext4_buddy *e4b)
+{
+ int count;
+ int order = 1;
+ void *buddy;
+
+ while ((buddy = mb_find_buddy(e4b, order++, &count)))
+ mb_set_bits(buddy, 0, count);
+
+ e4b->bd_info->bb_fragments = 0;
+ memset(e4b->bd_info->bb_counters, 0,
+ sizeof(*e4b->bd_info->bb_counters) *
+ (e4b->bd_sb->s_blocksize_bits + 2));
+
+ ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
+ e4b->bd_bitmap, e4b->bd_group, e4b->bd_info);
+}
+
/* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve
@@ -1891,11 +1908,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);
- this_cpu_inc(discard_pa_seq);
- e4b->bd_info->bb_free += count;
- if (first < e4b->bd_info->bb_first_free)
- e4b->bd_info->bb_first_free = first;
-
/* access memory sequentially: check left neighbour,
* clear range and then check right neighbour
*/
@@ -1909,21 +1921,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t blocknr;
+ /*
+ * Fastcommit replay can free already freed blocks which
+ * corrupts allocation info. Regenerate it.
+ */
+ if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+ mb_regenerate_buddy(e4b);
+ goto check;
+ }
+
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(sbi, block);
- if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
- ext4_grp_locked_error(sb, e4b->bd_group,
- inode ? inode->i_ino : 0,
- blocknr,
- "freeing already freed block (bit %u); block bitmap corrupt.",
- block);
- ext4_mark_group_bitmap_corrupted(
- sb, e4b->bd_group,
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
- }
- goto done;
+ ext4_grp_locked_error(sb, e4b->bd_group,
+ inode ? inode->i_ino : 0, blocknr,
+ "freeing already freed block (bit %u); block bitmap corrupt.",
+ block);
+ return;
}
+ this_cpu_inc(discard_pa_seq);
+ e4b->bd_info->bb_free += count;
+ if (first < e4b->bd_info->bb_first_free)
+ e4b->bd_info->bb_first_free = first;
+
/* let's maintain fragments counter */
if (left_is_free && right_is_free)
e4b->bd_info->bb_fragments--;
@@ -1948,9 +1970,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
if (first <= last)
mb_buddy_mark_free(e4b, first >> 1, last >> 1);
-done:
mb_set_largest_free_order(sb, e4b->bd_info);
mb_update_avg_fragment_size(sb, e4b->bd_info);
+check:
mb_check_buddy(e4b);
}
@@ -2276,6 +2298,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return;
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@@ -2283,6 +2308,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
}
@@ -2309,12 +2335,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (err)
return err;
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
- ext4_mb_unload_buddy(e4b);
- return 0;
- }
-
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
ex.fe_logical = 0xDEADFA11; /* debug value */
@@ -2347,6 +2371,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ac->ac_b_ex = ex;
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -2380,12 +2405,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
k = mb_find_next_zero_bit(buddy, max, 0);
if (k >= max) {
+ ext4_mark_group_bitmap_corrupted(ac->ac_sb,
+ e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
"%d free clusters of order %d. But found 0",
grp->bb_counters[i], i);
- ext4_mark_group_bitmap_corrupted(ac->ac_sb,
- e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
break;
}
ac->ac_found++;
@@ -2436,12 +2461,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* have free blocks
*/
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
"group info. But bitmap says 0",
free);
- ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
break;
}
@@ -2467,12 +2492,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
if (WARN_ON(ex.fe_len <= 0))
break;
if (free < ex.fe_len) {
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
"group info. But got %d blocks",
free, ex.fe_len);
- ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
/*
* The number of free blocks differs. This mostly
* indicate that the bitmap is corrupt. So exit
@@ -2990,8 +3015,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
{
struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
- int i;
- int err, buddy_loaded = 0;
+ int i, err;
+ char nbuf[16];
struct ext4_buddy e4b;
struct ext4_group_info *grinfo;
unsigned char blocksize_bits = min_t(unsigned char,
@@ -3018,23 +3043,26 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
err = ext4_mb_load_buddy(sb, group, &e4b);
if (err) {
- seq_printf(seq, "#%-5u: I/O error\n", group);
+ seq_printf(seq, "#%-5u: %s\n", group, ext4_decode_error(NULL, err, nbuf));
return 0;
}
- buddy_loaded = 1;
+ ext4_mb_unload_buddy(&e4b);
}
+ /*
+ * We care only about free space counters in the group info and
+ * these are safe to access even after the buddy has been unloaded
+ */
memcpy(&sg, grinfo, i);
-
- if (buddy_loaded)
- ext4_mb_unload_buddy(&e4b);
-
seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
sg.info.bb_fragments, sg.info.bb_first_free);
for (i = 0; i <= 13; i++)
seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
sg.info.bb_counters[i] : 0);
- seq_puts(seq, " ]\n");
+ seq_puts(seq, " ]");
+ if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info))
+ seq_puts(seq, " Block bitmap corrupted!");
+ seq_puts(seq, "\n");
return 0;
}
@@ -3725,7 +3753,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
return count;
}
-int ext4_mb_release(struct super_block *sb)
+void ext4_mb_release(struct super_block *sb)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
@@ -3801,13 +3829,10 @@ int ext4_mb_release(struct super_block *sb)
}
free_percpu(sbi->s_locality_groups);
-
- return 0;
}
static inline int ext4_issue_discard(struct super_block *sb,
- ext4_group_t block_group, ext4_grpblk_t cluster, int count,
- struct bio **biop)
+ ext4_group_t block_group, ext4_grpblk_t cluster, int count)
{
ext4_fsblk_t discard_block;
@@ -3816,13 +3841,8 @@ static inline int ext4_issue_discard(struct super_block *sb,
count = EXT4_C2B(EXT4_SB(sb), count);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- if (biop) {
- return __blkdev_issue_discard(sb->s_bdev,
- (sector_t)discard_block << (sb->s_blocksize_bits - 9),
- (sector_t)count << (sb->s_blocksize_bits - 9),
- GFP_NOFS, biop);
- } else
- return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
}
static void ext4_free_data_in_buddy(struct super_block *sb,
@@ -5146,10 +5166,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
.fe_len = ac->ac_orig_goal_len,
};
loff_t orig_goal_end = extent_logical_end(sbi, &ex);
+ loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex);
- /* we can't allocate as much as normalizer wants.
- * so, found space must get proper lstart
- * to cover original request */
+ /*
+ * We can't allocate as much as normalizer wants, so we try
+ * to get proper lstart to cover the original request, except
+ * when the goal doesn't cover the original request as below:
+ *
+ * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048
+ * best_ex:0/200(200) -> adjusted: 1848/2048(200)
+ */
BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
@@ -5161,7 +5187,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
* 1. Check if best ex can be kept at end of goal (before
* cr_best_avail trimmed it) and still cover original start
* 2. Else, check if best ex can be kept at start of goal and
- * still cover original start
+ * still cover original end
* 3. Else, keep the best ex at start of original request.
*/
ex.fe_len = ac->ac_b_ex.fe_len;
@@ -5171,7 +5197,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
goto adjust_bex;
ex.fe_logical = ac->ac_g_ex.fe_logical;
- if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
+ if (o_ex_end <= extent_logical_end(sbi, &ex))
goto adjust_bex;
ex.fe_logical = ac->ac_o_ex.fe_logical;
@@ -5179,7 +5205,6 @@ adjust_bex:
ac->ac_b_ex.fe_logical = ex.fe_logical;
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
- BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
}
@@ -5284,7 +5309,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
* the caller MUST hold group/inode locks.
* TODO: optimize the case when there are no in-core structures yet
*/
-static noinline_for_stack int
+static noinline_for_stack void
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
struct ext4_prealloc_space *pa)
{
@@ -5334,11 +5359,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
*/
}
atomic_add(free, &sbi->s_mb_discarded);
-
- return 0;
}
-static noinline_for_stack int
+static noinline_for_stack void
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
struct ext4_prealloc_space *pa)
{
@@ -5352,13 +5375,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
e4b->bd_group, group, pa->pa_pstart);
- return 0;
+ return;
}
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
-
- return 0;
}
/*
@@ -5479,7 +5500,7 @@ out_dbg:
*
* FIXME!! Make sure it is valid at all the call sites
*/
-void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
+void ext4_discard_preallocations(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct super_block *sb = inode->i_sb;
@@ -5491,9 +5512,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
struct rb_node *iter;
int err;
- if (!S_ISREG(inode->i_mode)) {
+ if (!S_ISREG(inode->i_mode))
return;
- }
if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
return;
@@ -5501,15 +5521,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
mb_debug(sb, "discard preallocation for inode %lu\n",
inode->i_ino);
trace_ext4_discard_preallocations(inode,
- atomic_read(&ei->i_prealloc_active), needed);
-
- if (needed == 0)
- needed = UINT_MAX;
+ atomic_read(&ei->i_prealloc_active));
repeat:
/* first, collect all pa's in the inode */
write_lock(&ei->i_prealloc_lock);
- for (iter = rb_first(&ei->i_prealloc_node); iter && needed;
+ for (iter = rb_first(&ei->i_prealloc_node); iter;
iter = rb_next(iter)) {
pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
@@ -5533,7 +5550,6 @@ repeat:
spin_unlock(&pa->pa_lock);
rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node);
list_add(&pa->u.pa_tmp_list, &list);
- needed--;
continue;
}
@@ -5943,7 +5959,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
/*
* release all resource we used in allocation
*/
-static int ext4_mb_release_context(struct ext4_allocation_context *ac)
+static void ext4_mb_release_context(struct ext4_allocation_context *ac)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_prealloc_space *pa = ac->ac_pa;
@@ -5980,7 +5996,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
mutex_unlock(&ac->ac_lg->lg_mutex);
ext4_mb_collect_stats(ac);
- return 0;
}
static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
@@ -6474,8 +6489,14 @@ do_more:
} else {
if (test_opt(sb, DISCARD)) {
err = ext4_issue_discard(sb, block_group, bit,
- count_clusters, NULL);
- if (err && err != -EOPNOTSUPP)
+ count_clusters);
+ /*
+ * Ignore EOPNOTSUPP error. This is consistent with
+ * what happens when using journal.
+ */
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ if (err)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%u block:%d count:%lu failed"
" with %d", block_group, bit, count,
@@ -6725,7 +6746,7 @@ __acquires(bitlock)
*/
mb_mark_used(e4b, &ex);
ext4_unlock_group(sb, group);
- ret = ext4_issue_discard(sb, group, start, count, NULL);
+ ret = ext4_issue_discard(sb, group, start, count);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
return ret;
@@ -6761,6 +6782,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
bool set_trimmed = false;
void *bitmap;
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ return 0;
+
last = ext4_last_grp_cluster(sb, e4b->bd_group);
bitmap = e4b->bd_bitmap;
if (start == 0 && max >= last)