diff options
Diffstat (limited to 'fs/nilfs2/segment.c')
-rw-r--r-- | fs/nilfs2/segment.c | 161 |
1 files changed, 107 insertions, 54 deletions
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index aa5290cb7467..0ca3110d6386 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_ge(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(a) - (__s32)(b) >= 0)) + ((__s32)((a) - (b)) >= 0)) static int nilfs_prepare_segment_lock(struct super_block *sb, struct nilfs_transaction_info *ti) @@ -1639,39 +1639,30 @@ static void nilfs_begin_folio_io(struct folio *folio) folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; - - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, - b_assoc_buffers) { - if (bh->b_folio != bd_folio) { - if (bd_folio) { - folio_lock(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - } - bd_folio = bh->b_folio; - } - } - + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == segbuf->sb_super_root) { - if (bh->b_folio != bd_folio) { - folio_lock(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - bd_folio = bh->b_folio; - } + if (bh == segbuf->sb_super_root) break; - } set_buffer_async_write(bh); if (bh->b_folio != fs_folio) { nilfs_begin_folio_io(fs_folio); @@ -1679,13 +1670,49 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } } } + nilfs_begin_folio_io(fs_folio); + + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + mark_buffer_dirty(bh); + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + } + bd_folio = bh->b_folio; + } + } + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + mark_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + if (bd_folio) { folio_lock(bd_folio); + folio_wait_writeback(bd_folio); folio_clear_dirty_for_io(bd_folio); folio_start_writeback(bd_folio); folio_unlock(bd_folio); } - nilfs_begin_folio_io(fs_folio); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -1725,14 +1752,8 @@ static void nilfs_end_folio_io(struct folio *folio, int err) return; } - if (!err) { - if (!nilfs_folio_buffers_clean(folio)) - filemap_dirty_folio(folio->mapping, folio); - folio_clear_error(folio); - } else { + if (err || !nilfs_folio_buffers_clean(folio)) filemap_dirty_folio(folio->mapping, folio); - folio_set_error(folio); - } folio_end_writeback(folio); } @@ -2073,10 +2094,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) @@ -2124,8 +2142,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) { spin_lock(&sci->sc_state_lock); if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { - sci->sc_timer.expires = jiffies + sci->sc_interval; - add_timer(&sci->sc_timer); + if (sci->sc_task) { + sci->sc_timer.expires = jiffies + sci->sc_interval; + add_timer(&sci->sc_timer); + } sci->sc_state |= NILFS_SEGCTOR_COMMIT; } spin_unlock(&sci->sc_state_lock); @@ -2172,19 +2192,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) struct nilfs_segctor_wait_request wait_req; int err = 0; - spin_lock(&sci->sc_state_lock); init_wait(&wait_req.wq); wait_req.err = 0; atomic_set(&wait_req.done, 0); + init_waitqueue_entry(&wait_req.wq, current); + + /* + * To prevent a race issue where completion notifications from the + * log writer thread are missed, increment the request sequence count + * "sc_seq_request" and insert a wait queue entry using the current + * sequence number into the "sc_wait_request" queue at the same time + * within the lock section of "sc_state_lock". + */ + spin_lock(&sci->sc_state_lock); wait_req.seq = ++sci->sc_seq_request; + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); spin_unlock(&sci->sc_state_lock); - init_waitqueue_entry(&wait_req.wq, current); - add_wait_queue(&sci->sc_wait_request, &wait_req.wq); - set_current_state(TASK_INTERRUPTIBLE); wake_up(&sci->sc_wait_daemon); for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Synchronize only while the log writer thread is alive. + * Leave flushing out after the log writer thread exits to + * the cleanup work in nilfs_segctor_destroy(). + */ + if (!sci->sc_task) + break; + if (atomic_read(&wait_req.done)) { err = wait_req.err; break; @@ -2200,7 +2237,7 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) return err; } -static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force) { struct nilfs_segctor_wait_request *wrq, *n; unsigned long flags; @@ -2208,7 +2245,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) spin_lock_irqsave(&sci->sc_wait_request.lock, flags); list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && - nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) { wrq->err = err; atomic_set(&wrq->done, 1); } @@ -2326,10 +2363,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, */ static void nilfs_segctor_accept(struct nilfs_sc_info *sci) { + bool thread_is_alive; + spin_lock(&sci->sc_state_lock); sci->sc_seq_accepted = sci->sc_seq_request; + thread_is_alive = (bool)sci->sc_task; spin_unlock(&sci->sc_state_lock); - del_timer_sync(&sci->sc_timer); + + /* + * This function does not race with the log writer thread's + * termination. Therefore, deleting sc_timer, which should not be + * done after the log writer thread exits, can be done safely outside + * the area protected by sc_state_lock. + */ + if (thread_is_alive) + del_timer_sync(&sci->sc_timer); } /** @@ -2346,7 +2394,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) if (mode == SC_LSEG_SR) { sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; sci->sc_seq_done = sci->sc_seq_accepted; - nilfs_segctor_wakeup(sci, err); + nilfs_segctor_wakeup(sci, err, false); sci->sc_flush_request = 0; } else { if (mode == SC_FLUSH_FILE) @@ -2355,7 +2403,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) sci->sc_flush_request &= ~FLUSH_DAT_BIT; /* re-enable timer if checkpoint creation was not done */ - if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task && time_before(jiffies, sci->sc_timer.expires)) add_timer(&sci->sc_timer); } @@ -2545,6 +2593,7 @@ static int nilfs_segctor_thread(void *arg) int timeout = 0; sci->sc_timer_task = current; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); /* start sync. */ sci->sc_task = current; @@ -2612,6 +2661,7 @@ static int nilfs_segctor_thread(void *arg) end_thread: /* end sync. */ sci->sc_task = NULL; + timer_shutdown_sync(&sci->sc_timer); wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ spin_unlock(&sci->sc_state_lock); return 0; @@ -2675,7 +2725,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_iput_queue); INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; @@ -2729,6 +2778,13 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + /* + * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can + * be called from delayed iput() via nilfs_evict_inode() and can race + * with the above log writer thread termination. + */ + nilfs_segctor_wakeup(sci, 0, true); + if (flush_work(&sci->sc_iput_work)) flag = true; @@ -2754,7 +2810,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) down_write(&nilfs->ns_segctor_sem); - timer_shutdown_sync(&sci->sc_timer); kfree(sci); } @@ -2790,8 +2845,6 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (!nilfs->ns_writer) return -ENOMEM; - inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); - err = nilfs_segctor_start_thread(nilfs->ns_writer); if (unlikely(err)) nilfs_detach_log_writer(sb); |