aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c143
1 files changed, 122 insertions, 21 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ac3b4db519df..72f038492ba8 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -33,6 +33,7 @@
#include <linux/migrate.h>
#include <linux/backing-dev.h>
#include <linux/freezer.h>
+#include <linux/sched/mm.h>
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -96,12 +97,16 @@ static inline void
xfs_buf_ioacct_inc(
struct xfs_buf *bp)
{
- if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
+ if (bp->b_flags & XBF_NO_IOACCT)
return;
ASSERT(bp->b_flags & XBF_ASYNC);
- bp->b_flags |= _XBF_IN_FLIGHT;
- percpu_counter_inc(&bp->b_target->bt_io_count);
+ spin_lock(&bp->b_lock);
+ if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
+ bp->b_state |= XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_inc(&bp->b_target->bt_io_count);
+ }
+ spin_unlock(&bp->b_lock);
}
/*
@@ -109,14 +114,24 @@ xfs_buf_ioacct_inc(
* freed and unaccount from the buftarg.
*/
static inline void
-xfs_buf_ioacct_dec(
+__xfs_buf_ioacct_dec(
struct xfs_buf *bp)
{
- if (!(bp->b_flags & _XBF_IN_FLIGHT))
- return;
+ lockdep_assert_held(&bp->b_lock);
+
+ if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
+ bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_dec(&bp->b_target->bt_io_count);
+ }
+}
- bp->b_flags &= ~_XBF_IN_FLIGHT;
- percpu_counter_dec(&bp->b_target->bt_io_count);
+static inline void
+xfs_buf_ioacct_dec(
+ struct xfs_buf *bp)
+{
+ spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+ spin_unlock(&bp->b_lock);
}
/*
@@ -148,9 +163,9 @@ xfs_buf_stale(
* unaccounted (released to LRU) before that occurs. Drop in-flight
* status now to preserve accounting consistency.
*/
- xfs_buf_ioacct_dec(bp);
-
spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+
atomic_set(&bp->b_lru_ref, 0);
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
(list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
@@ -442,17 +457,17 @@ _xfs_buf_map_pages(
bp->b_addr = NULL;
} else {
int retried = 0;
- unsigned noio_flag;
+ unsigned nofs_flag;
/*
* vm_map_ram() will allocate auxillary structures (e.g.
* pagetables) with GFP_KERNEL, yet we are likely to be under
* GFP_NOFS context here. Hence we need to tell memory reclaim
- * that we are in such a context via PF_MEMALLOC_NOIO to prevent
+ * that we are in such a context via PF_MEMALLOC_NOFS to prevent
* memory reclaim re-entering the filesystem here and
* potentially deadlocking.
*/
- noio_flag = memalloc_noio_save();
+ nofs_flag = memalloc_nofs_save();
do {
bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-1, PAGE_KERNEL);
@@ -460,7 +475,7 @@ _xfs_buf_map_pages(
break;
vm_unmap_aliases();
} while (retried++ <= 1);
- memalloc_noio_restore(noio_flag);
+ memalloc_nofs_restore(nofs_flag);
if (!bp->b_addr)
return -ENOMEM;
@@ -758,7 +773,7 @@ xfs_buf_readahead_map(
int nmaps,
const struct xfs_buf_ops *ops)
{
- if (bdi_read_congested(target->bt_bdi))
+ if (bdi_read_congested(target->bt_bdev->bd_bdi))
return;
xfs_buf_read_map(target, map, nmaps,
@@ -978,12 +993,12 @@ xfs_buf_rele(
* ensures the decrement occurs only once per-buf.
*/
if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
goto out_unlock;
}
/* the last reference has been dropped ... */
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
/*
* If the buffer is added to the LRU take a new reference to the
@@ -1078,6 +1093,8 @@ void
xfs_buf_unlock(
struct xfs_buf *bp)
{
+ ASSERT(xfs_buf_islocked(bp));
+
XB_CLEAR_OWNER(bp);
up(&bp->b_sema);
@@ -1177,7 +1194,7 @@ xfs_buf_ioerror_alert(
{
xfs_alert(bp->b_target->bt_mount,
"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
- (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
+ (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
}
int
@@ -1210,8 +1227,11 @@ xfs_buf_bio_end_io(
* don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete.
*/
- if (bio->bi_error)
- cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+ if (bio->bi_status) {
+ int error = blk_status_to_errno(bio->bi_status);
+
+ cmpxchg(&bp->b_io_error, 0, error);
+ }
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
@@ -1791,7 +1811,6 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
- btp->bt_bdi = blk_get_backing_dev_info(bdev);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
@@ -1815,6 +1834,28 @@ error:
}
/*
+ * Cancel a delayed write list.
+ *
+ * Remove each buffer from the list, clear the delwri queue flag and drop the
+ * associated buffer reference.
+ */
+void
+xfs_buf_delwri_cancel(
+ struct list_head *list)
+{
+ struct xfs_buf *bp;
+
+ while (!list_empty(list)) {
+ bp = list_first_entry(list, struct xfs_buf, b_list);
+
+ xfs_buf_lock(bp);
+ bp->b_flags &= ~_XBF_DELWRI_Q;
+ list_del_init(&bp->b_list);
+ xfs_buf_relse(bp);
+ }
+}
+
+/*
* Add a buffer to the delayed write list.
*
* This queues a buffer for writeout if it hasn't already been. Note that
@@ -2009,6 +2050,66 @@ xfs_buf_delwri_submit(
return error;
}
+/*
+ * Push a single buffer on a delwri queue.
+ *
+ * The purpose of this function is to submit a single buffer of a delwri queue
+ * and return with the buffer still on the original queue. The waiting delwri
+ * buffer submission infrastructure guarantees transfer of the delwri queue
+ * buffer reference to a temporary wait list. We reuse this infrastructure to
+ * transfer the buffer back to the original queue.
+ *
+ * Note the buffer transitions from the queued state, to the submitted and wait
+ * listed state and back to the queued state during this call. The buffer
+ * locking and queue management logic between _delwri_pushbuf() and
+ * _delwri_queue() guarantee that the buffer cannot be queued to another list
+ * before returning.
+ */
+int
+xfs_buf_delwri_pushbuf(
+ struct xfs_buf *bp,
+ struct list_head *buffer_list)
+{
+ LIST_HEAD (submit_list);
+ int error;
+
+ ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+ trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
+
+ /*
+ * Isolate the buffer to a new local list so we can submit it for I/O
+ * independently from the rest of the original list.
+ */
+ xfs_buf_lock(bp);
+ list_move(&bp->b_list, &submit_list);
+ xfs_buf_unlock(bp);
+
+ /*
+ * Delwri submission clears the DELWRI_Q buffer flag and returns with
+ * the buffer on the wait list with an associated reference. Rather than
+ * bounce the buffer from a local wait list back to the original list
+ * after I/O completion, reuse the original list as the wait list.
+ */
+ xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
+
+ /*
+ * The buffer is now under I/O and wait listed as during typical delwri
+ * submission. Lock the buffer to wait for I/O completion. Rather than
+ * remove the buffer from the wait list and release the reference, we
+ * want to return with the buffer queued to the original list. The
+ * buffer already sits on the original list with a wait list reference,
+ * however. If we let the queue inherit that wait list reference, all we
+ * need to do is reset the DELWRI_Q flag.
+ */
+ xfs_buf_lock(bp);
+ error = bp->b_error;
+ bp->b_flags |= _XBF_DELWRI_Q;
+ xfs_buf_unlock(bp);
+
+ return error;
+}
+
int __init
xfs_buf_init(void)
{