aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 18:32:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 18:32:43 -0700
commitb41dae061bbd722b9d7fa828f35d22035b218e18 (patch)
treea5c0bade0c3d221483b54204bfc47e4fdbf09316 /fs/xfs/xfs_log_recover.c
parente6bc9de714972cac34daa1dc1567ee48a47a9342 (diff)
parent14e15f1bcd738dc13dd7c1e78e4800e8bc577980 (diff)
Merge tag 'xfs-5.4-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "For this cycle we have the usual pile of cleanups and bug fixes, some performance improvements for online metadata scrubbing, massive speedups in the directory entry creation code, some performance improvement in the file ACL lookup code, a fix for a logging stall during mount, and fixes for concurrency problems. It has survived a couple of weeks of xfstests runs and merges cleanly. Summary: - Remove KM_SLEEP/KM_NOSLEEP. - Ensure that memory buffers for IO are properly sector-aligned to avoid problems that the block layer doesn't check. - Make the bmap scrubber more efficient in its record checking. - Don't crash xfs_db when superblock inode geometry is corrupt. - Fix btree key helper functions. - Remove unneeded error returns for things that can't fail. - Fix buffer logging bugs in repair. - Clean up iterator return values. - Speed up directory entry creation. - Enable allocation of xattr value memory buffer during lookup. - Fix readahead racing with truncate/punch hole. - Other minor cleanups. - Fix one AGI/AGF deadlock with RENAME_WHITEOUT. - More BUG -> WARN whackamole. - Fix various problems with the log failing to advance under certain circumstances, which results in stalls during mount" * tag 'xfs-5.4-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (45 commits) xfs: push the grant head when the log head moves forward xfs: push iclog state cleaning into xlog_state_clean_log xfs: factor iclog state processing out of xlog_state_do_callback() xfs: factor callbacks out of xlog_state_do_callback() xfs: factor debug code out of xlog_state_do_callback() xfs: prevent CIL push holdoff in log recovery xfs: fix missed wakeup on l_flush_wait xfs: push the AIL in xlog_grant_head_wake xfs: Use WARN_ON_ONCE for bailout mount-operation xfs: Fix deadlock between AGI and AGF with RENAME_WHITEOUT xfs: define a flags field for the AG geometry ioctl structure xfs: add a xfs_valid_startblock helper xfs: remove the unused XFS_ALLOC_USERDATA flag xfs: cleanup xfs_fsb_to_db xfs: fix the dax supported check in xfs_ioctl_setattr_dax_invalidate xfs: Fix stale data exposure when readahead races with hole punch fs: Export generic_fadvise() mm: Handle MADV_WILLNEED through vfs_fadvise() xfs: allocate xattr buffer on demand xfs: consolidate attribute value copying ...
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c50
1 files changed, 32 insertions, 18 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 13d1d3e95b88..508319039dce 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -97,6 +97,8 @@ xlog_alloc_buffer(
struct xlog *log,
int nbblks)
{
+ int align_mask = xfs_buftarg_dma_alignment(log->l_targ);
+
/*
* Pass log block 0 since we don't have an addr yet, buffer will be
* verified on read.
@@ -125,7 +127,7 @@ xlog_alloc_buffer(
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
+ return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL);
}
/*
@@ -1960,7 +1962,7 @@ xlog_recover_buffer_pass1(
}
}
- bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
+ bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
bcp->bc_blkno = buf_f->blf_blkno;
bcp->bc_len = buf_f->blf_len;
bcp->bc_refcount = 1;
@@ -2930,7 +2932,7 @@ xlog_recover_inode_pass2(
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr;
} else {
- in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP);
+ in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
need_free = 1;
error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
if (error)
@@ -4161,7 +4163,7 @@ xlog_recover_add_item(
{
xlog_recover_item_t *item;
- item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
+ item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
INIT_LIST_HEAD(&item->ri_list);
list_add_tail(&item->ri_list, head);
}
@@ -4201,7 +4203,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
- ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);
+ ptr = kmem_realloc(old_ptr, len + old_len, 0);
memcpy(&ptr[old_len], dp, len);
item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -4261,7 +4263,7 @@ xlog_recover_add_to_trans(
return 0;
}
- ptr = kmem_alloc(len, KM_SLEEP);
+ ptr = kmem_alloc(len, 0);
memcpy(ptr, dp, len);
in_f = (struct xfs_inode_log_format *)ptr;
@@ -4289,7 +4291,7 @@ xlog_recover_add_to_trans(
item->ri_total = in_f->ilf_size;
item->ri_buf =
kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
- KM_SLEEP);
+ 0);
}
ASSERT(item->ri_total > item->ri_cnt);
/* Description region is ri_buf[0] */
@@ -4423,7 +4425,7 @@ xlog_recover_ophdr_to_trans(
* This is a new transaction so allocate a new recovery container to
* hold the recovery ops that will follow.
*/
- trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
+ trans = kmem_zalloc(sizeof(struct xlog_recover), 0);
trans->r_log_tid = tid;
trans->r_lsn = be64_to_cpu(rhead->h_lsn);
INIT_LIST_HEAD(&trans->r_itemq);
@@ -5022,16 +5024,27 @@ xlog_recover_process_one_iunlink(
}
/*
- * xlog_iunlink_recover
+ * Recover AGI unlinked lists
+ *
+ * This is called during recovery to process any inodes which we unlinked but
+ * not freed when the system crashed. These inodes will be on the lists in the
+ * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
+ * any inodes found on the lists. Each inode is removed from the lists when it
+ * has been fully truncated and is freed. The freeing of the inode and its
+ * removal from the list must be atomic.
+ *
+ * If everything we touch in the agi processing loop is already in memory, this
+ * loop can hold the cpu for a long time. It runs without lock contention,
+ * memory allocation contention, the need wait for IO, etc, and so will run
+ * until we either run out of inodes to process, run low on memory or we run out
+ * of log space.
*
- * This is called during recovery to process any inodes which
- * we unlinked but not freed when the system crashed. These
- * inodes will be on the lists in the AGI blocks. What we do
- * here is scan all the AGIs and fully truncate and free any
- * inodes found on the lists. Each inode is removed from the
- * lists when it has been fully truncated and is freed. The
- * freeing of the inode and its removal from the list must be
- * atomic.
+ * This behaviour is bad for latency on single CPU and non-preemptible kernels,
+ * and can prevent other filesytem work (such as CIL pushes) from running. This
+ * can lead to deadlocks if the recovery process runs out of log reservation
+ * space. Hence we need to yield the CPU when there is other kernel work
+ * scheduled on this CPU to ensure other scheduled work can run without undue
+ * latency.
*/
STATIC void
xlog_recover_process_iunlinks(
@@ -5078,6 +5091,7 @@ xlog_recover_process_iunlinks(
while (agino != NULLAGINO) {
agino = xlog_recover_process_one_iunlink(mp,
agno, agino, bucket);
+ cond_resched();
}
}
xfs_buf_rele(agibp);
@@ -5527,7 +5541,7 @@ xlog_do_log_recovery(
*/
log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
sizeof(struct list_head),
- KM_SLEEP);
+ 0);
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);