aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/raid5-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r--drivers/md/raid5-cache.c242
1 files changed, 233 insertions, 9 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 02a554434747..19c5af91bd1b 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -20,6 +20,7 @@
#include <linux/random.h>
#include "md.h"
#include "raid5.h"
+#include "bitmap.h"
/*
* metadata/data stored in disk with 4k size unit (a block) regardless
@@ -218,6 +219,43 @@ static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
io->state = state;
}
+static void
+r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev,
+ struct bio_list *return_bi)
+{
+ struct bio *wbi, *wbi2;
+
+ wbi = dev->written;
+ dev->written = NULL;
+ while (wbi && wbi->bi_iter.bi_sector <
+ dev->sector + STRIPE_SECTORS) {
+ wbi2 = r5_next_bio(wbi, dev->sector);
+ if (!raid5_dec_bi_active_stripes(wbi)) {
+ md_write_end(conf->mddev);
+ bio_list_add(return_bi, wbi);
+ }
+ wbi = wbi2;
+ }
+}
+
+void r5c_handle_cached_data_endio(struct r5conf *conf,
+ struct stripe_head *sh, int disks, struct bio_list *return_bi)
+{
+ int i;
+
+ for (i = sh->disks; i--; ) {
+ if (sh->dev[i].written) {
+ set_bit(R5_UPTODATE, &sh->dev[i].flags);
+ r5c_return_dev_pending_writes(conf, &sh->dev[i],
+ return_bi);
+ bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+ STRIPE_SECTORS,
+ !test_bit(STRIPE_DEGRADED, &sh->state),
+ 0);
+ }
+ }
+}
+
/*
* Put the stripe into writing-out phase by clearing STRIPE_R5C_CACHING.
* This function should only be called in write-back mode.
@@ -231,6 +269,44 @@ static void r5c_make_stripe_write_out(struct stripe_head *sh)
WARN_ON(!test_bit(STRIPE_R5C_CACHING, &sh->state));
clear_bit(STRIPE_R5C_CACHING, &sh->state);
+
+ if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ atomic_inc(&conf->preread_active_stripes);
+
+ if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) {
+ BUG_ON(atomic_read(&conf->r5c_cached_partial_stripes) == 0);
+ atomic_dec(&conf->r5c_cached_partial_stripes);
+ }
+
+ if (test_and_clear_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) {
+ BUG_ON(atomic_read(&conf->r5c_cached_full_stripes) == 0);
+ atomic_dec(&conf->r5c_cached_full_stripes);
+ }
+}
+
+static void r5c_handle_data_cached(struct stripe_head *sh)
+{
+ int i;
+
+ for (i = sh->disks; i--; )
+ if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
+ set_bit(R5_InJournal, &sh->dev[i].flags);
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ }
+ clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
+}
+
+/*
+ * this journal write must contain full parity,
+ * it may also contain some data pages
+ */
+static void r5c_handle_parity_cached(struct stripe_head *sh)
+{
+ int i;
+
+ for (i = sh->disks; i--; )
+ if (test_bit(R5_InJournal, &sh->dev[i].flags))
+ set_bit(R5_Wantwrite, &sh->dev[i].flags);
}
/*
@@ -250,8 +326,12 @@ static void r5c_finish_cache_stripe(struct stripe_head *sh)
* two parities are written out together.
*/
set_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags);
- } else
- BUG(); /* write-back logic in next patch */
+ } else if (test_bit(STRIPE_R5C_CACHING, &sh->state)) {
+ r5c_handle_data_cached(sh);
+ } else {
+ r5c_handle_parity_cached(sh);
+ set_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags);
+ }
}
static void r5l_io_run_stripes(struct r5l_io_unit *io)
@@ -491,7 +571,8 @@ static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh,
io = log->current_io;
for (i = 0; i < sh->disks; i++) {
- if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
+ if (!test_bit(R5_Wantwrite, &sh->dev[i].flags) ||
+ test_bit(R5_InJournal, &sh->dev[i].flags))
continue;
if (i == sh->pd_idx || i == sh->qd_idx)
continue;
@@ -550,8 +631,10 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
for (i = 0; i < sh->disks; i++) {
void *addr;
- if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
+ if (!test_bit(R5_Wantwrite, &sh->dev[i].flags) ||
+ test_bit(R5_InJournal, &sh->dev[i].flags))
continue;
+
write_disks++;
/* checksum is already calculated in last run */
if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
@@ -817,7 +900,6 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
}
}
-
static void r5l_do_reclaim(struct r5l_log *log)
{
sector_t reclaim_target = xchg(&log->reclaim_target, 0);
@@ -1218,12 +1300,80 @@ int r5c_try_caching_write(struct r5conf *conf,
int disks)
{
struct r5l_log *log = conf->log;
+ int i;
+ struct r5dev *dev;
+ int to_cache = 0;
BUG_ON(!r5c_is_writeback(log));
- /* more write-back logic in next patches */
- r5c_make_stripe_write_out(sh);
- return -EAGAIN;
+ if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) {
+ /*
+ * There are two different scenarios here:
+ * 1. The stripe has some data cached, and it is sent to
+ * write-out phase for reclaim
+ * 2. The stripe is clean, and this is the first write
+ *
+ * For 1, return -EAGAIN, so we continue with
+ * handle_stripe_dirtying().
+ *
+ * For 2, set STRIPE_R5C_CACHING and continue with caching
+ * write.
+ */
+
+ /* case 1: anything injournal or anything in written */
+ if (s->injournal > 0 || s->written > 0)
+ return -EAGAIN;
+ /* case 2 */
+ set_bit(STRIPE_R5C_CACHING, &sh->state);
+ }
+
+ for (i = disks; i--; ) {
+ dev = &sh->dev[i];
+ /* if non-overwrite, use writing-out phase */
+ if (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags) &&
+ !test_bit(R5_InJournal, &dev->flags)) {
+ r5c_make_stripe_write_out(sh);
+ return -EAGAIN;
+ }
+ }
+
+ for (i = disks; i--; ) {
+ dev = &sh->dev[i];
+ if (dev->towrite) {
+ set_bit(R5_Wantwrite, &dev->flags);
+ set_bit(R5_Wantdrain, &dev->flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ to_cache++;
+ }
+ }
+
+ if (to_cache) {
+ set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+ /*
+ * set STRIPE_LOG_TRAPPED, which triggers r5c_cache_data()
+ * in ops_run_io(). STRIPE_LOG_TRAPPED will be cleared in
+ * r5c_handle_data_cached()
+ */
+ set_bit(STRIPE_LOG_TRAPPED, &sh->state);
+ }
+
+ return 0;
+}
+
+/*
+ * free extra pages (orig_page) we allocated for prexor
+ */
+void r5c_release_extra_page(struct stripe_head *sh)
+{
+ int i;
+
+ for (i = sh->disks; i--; )
+ if (sh->dev[i].page != sh->dev[i].orig_page) {
+ struct page *p = sh->dev[i].orig_page;
+
+ sh->dev[i].orig_page = sh->dev[i].page;
+ put_page(p);
+ }
}
/*
@@ -1234,6 +1384,9 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
struct stripe_head *sh,
struct stripe_head_state *s)
{
+ int i;
+ int do_wakeup = 0;
+
if (!conf->log ||
!test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
return;
@@ -1243,7 +1396,78 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
return;
- BUG(); /* write-back logic in following patches */
+
+ for (i = sh->disks; i--; ) {
+ clear_bit(R5_InJournal, &sh->dev[i].flags);
+ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ do_wakeup = 1;
+ }
+
+ /*
+ * analyse_stripe() runs before r5c_finish_stripe_write_out(),
+ * We updated R5_InJournal, so we also update s->injournal.
+ */
+ s->injournal = 0;
+
+ if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
+ if (atomic_dec_and_test(&conf->pending_full_writes))
+ md_wakeup_thread(conf->mddev->thread);
+
+ if (do_wakeup)
+ wake_up(&conf->wait_for_overlap);
+}
+
+int
+r5c_cache_data(struct r5l_log *log, struct stripe_head *sh,
+ struct stripe_head_state *s)
+{
+ int pages = 0;
+ int reserve;
+ int i;
+ int ret = 0;
+
+ BUG_ON(!log);
+
+ for (i = 0; i < sh->disks; i++) {
+ void *addr;
+
+ if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
+ continue;
+ addr = kmap_atomic(sh->dev[i].page);
+ sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
+ addr, PAGE_SIZE);
+ kunmap_atomic(addr);
+ pages++;
+ }
+ WARN_ON(pages == 0);
+
+ /*
+ * The stripe must enter state machine again to call endio, so
+ * don't delay.
+ */
+ clear_bit(STRIPE_DELAYED, &sh->state);
+ atomic_inc(&sh->count);
+
+ mutex_lock(&log->io_mutex);
+ /* meta + data */
+ reserve = (1 + pages) << (PAGE_SHIFT - 9);
+ if (!r5l_has_free_space(log, reserve)) {
+ spin_lock(&log->no_space_stripes_lock);
+ list_add_tail(&sh->log_list, &log->no_space_stripes);
+ spin_unlock(&log->no_space_stripes_lock);
+
+ r5l_wake_reclaim(log, reserve);
+ } else {
+ ret = r5l_log_stripe(log, sh, pages, 0);
+ if (ret) {
+ spin_lock_irq(&log->io_list_lock);
+ list_add_tail(&sh->log_list, &log->no_mem_stripes);
+ spin_unlock_irq(&log->io_list_lock);
+ }
+ }
+
+ mutex_unlock(&log->io_mutex);
+ return 0;
}