diff options
Diffstat (limited to 'mm/page-writeback.c')
| -rw-r--r-- | mm/page-writeback.c | 487 | 
1 files changed, 271 insertions, 216 deletions
| diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4812a17b288c..2d498bb62248 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -562,12 +562,12 @@ static unsigned long wp_next_time(unsigned long cur_time)  	return cur_time;  } -static void wb_domain_writeout_inc(struct wb_domain *dom, +static void wb_domain_writeout_add(struct wb_domain *dom,  				   struct fprop_local_percpu *completions, -				   unsigned int max_prop_frac) +				   unsigned int max_prop_frac, long nr)  { -	__fprop_inc_percpu_max(&dom->completions, completions, -			       max_prop_frac); +	__fprop_add_percpu_max(&dom->completions, completions, +			       max_prop_frac, nr);  	/* First event after period switching was turned off? */  	if (unlikely(!dom->period_time)) {  		/* @@ -583,20 +583,20 @@ static void wb_domain_writeout_inc(struct wb_domain *dom,  /*   * Increment @wb's writeout completion count and the global writeout - * completion count. Called from test_clear_page_writeback(). + * completion count. Called from __folio_end_writeback().   */ -static inline void __wb_writeout_inc(struct bdi_writeback *wb) +static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr)  {  	struct wb_domain *cgdom; -	inc_wb_stat(wb, WB_WRITTEN); -	wb_domain_writeout_inc(&global_wb_domain, &wb->completions, -			       wb->bdi->max_prop_frac); +	wb_stat_mod(wb, WB_WRITTEN, nr); +	wb_domain_writeout_add(&global_wb_domain, &wb->completions, +			       wb->bdi->max_prop_frac, nr);  	cgdom = mem_cgroup_wb_domain(wb);  	if (cgdom) -		wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb), -				       wb->bdi->max_prop_frac); +		wb_domain_writeout_add(cgdom, wb_memcg_completions(wb), +				       wb->bdi->max_prop_frac, nr);  }  void wb_writeout_inc(struct bdi_writeback *wb) @@ -604,7 +604,7 @@ void wb_writeout_inc(struct bdi_writeback *wb)  	unsigned long flags;  	local_irq_save(flags); -	__wb_writeout_inc(wb); +	__wb_writeout_add(wb, 1);  	local_irq_restore(flags);  }  EXPORT_SYMBOL_GPL(wb_writeout_inc); @@ -1084,7 +1084,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,  	 * write_bandwidth = ---------------------------------------------------  	 *                                          period  	 * -	 * @written may have decreased due to account_page_redirty(). +	 * @written may have decreased due to folio_account_redirty().  	 * Avoid underflowing @bw calculation.  	 */  	bw = written - min(written, wb->written_stamp); @@ -2366,8 +2366,15 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)  			ret = generic_writepages(mapping, wbc);  		if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))  			break; -		cond_resched(); -		congestion_wait(BLK_RW_ASYNC, HZ/50); + +		/* +		 * Lacking an allocation context or the locality or writeback +		 * state of any of the inode's pages, throttle based on +		 * writeback activity on the local node. It's as good a +		 * guess as any. +		 */ +		reclaim_throttle(NODE_DATA(numa_node_id()), +			VMSCAN_THROTTLE_WRITEBACK);  	}  	/*  	 * Usually few pages are written by now from those we've just submitted @@ -2381,44 +2388,44 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)  }  /** - * write_one_page - write out a single page and wait on I/O - * @page: the page to write + * folio_write_one - write out a single folio and wait on I/O. + * @folio: The folio to write.   * - * The page must be locked by the caller and will be unlocked upon return. + * The folio must be locked by the caller and will be unlocked upon return.   *   * Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this   * function returns.   *   * Return: %0 on success, negative error code otherwise   */ -int write_one_page(struct page *page) +int folio_write_one(struct folio *folio)  { -	struct address_space *mapping = page->mapping; +	struct address_space *mapping = folio->mapping;  	int ret = 0;  	struct writeback_control wbc = {  		.sync_mode = WB_SYNC_ALL, -		.nr_to_write = 1, +		.nr_to_write = folio_nr_pages(folio),  	}; -	BUG_ON(!PageLocked(page)); +	BUG_ON(!folio_test_locked(folio)); -	wait_on_page_writeback(page); +	folio_wait_writeback(folio); -	if (clear_page_dirty_for_io(page)) { -		get_page(page); -		ret = mapping->a_ops->writepage(page, &wbc); +	if (folio_clear_dirty_for_io(folio)) { +		folio_get(folio); +		ret = mapping->a_ops->writepage(&folio->page, &wbc);  		if (ret == 0) -			wait_on_page_writeback(page); -		put_page(page); +			folio_wait_writeback(folio); +		folio_put(folio);  	} else { -		unlock_page(page); +		folio_unlock(folio);  	}  	if (!ret)  		ret = filemap_check_errors(mapping);  	return ret;  } -EXPORT_SYMBOL(write_one_page); +EXPORT_SYMBOL(folio_write_one);  /*   * For address_spaces which do not use buffers nor write back. @@ -2438,29 +2445,30 @@ EXPORT_SYMBOL(__set_page_dirty_no_writeback);   *   * NOTE: This relies on being atomic wrt interrupts.   */ -static void account_page_dirtied(struct page *page, +static void folio_account_dirtied(struct folio *folio,  		struct address_space *mapping)  {  	struct inode *inode = mapping->host; -	trace_writeback_dirty_page(page, mapping); +	trace_writeback_dirty_folio(folio, mapping);  	if (mapping_can_writeback(mapping)) {  		struct bdi_writeback *wb; +		long nr = folio_nr_pages(folio); -		inode_attach_wb(inode, page); +		inode_attach_wb(inode, &folio->page);  		wb = inode_to_wb(inode); -		__inc_lruvec_page_state(page, NR_FILE_DIRTY); -		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); -		__inc_node_page_state(page, NR_DIRTIED); -		inc_wb_stat(wb, WB_RECLAIMABLE); -		inc_wb_stat(wb, WB_DIRTIED); -		task_io_account_write(PAGE_SIZE); -		current->nr_dirtied++; -		__this_cpu_inc(bdp_ratelimits); +		__lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); +		__zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); +		__node_stat_mod_folio(folio, NR_DIRTIED, nr); +		wb_stat_mod(wb, WB_RECLAIMABLE, nr); +		wb_stat_mod(wb, WB_DIRTIED, nr); +		task_io_account_write(nr * PAGE_SIZE); +		current->nr_dirtied += nr; +		__this_cpu_add(bdp_ratelimits, nr); -		mem_cgroup_track_foreign_dirty(page, wb); +		mem_cgroup_track_foreign_dirty(folio, wb);  	}  } @@ -2469,130 +2477,152 @@ static void account_page_dirtied(struct page *page,   *   * Caller must hold lock_page_memcg().   */ -void account_page_cleaned(struct page *page, struct address_space *mapping, +void folio_account_cleaned(struct folio *folio, struct address_space *mapping,  			  struct bdi_writeback *wb)  {  	if (mapping_can_writeback(mapping)) { -		dec_lruvec_page_state(page, NR_FILE_DIRTY); -		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); -		dec_wb_stat(wb, WB_RECLAIMABLE); -		task_io_account_cancelled_write(PAGE_SIZE); +		long nr = folio_nr_pages(folio); +		lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); +		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); +		wb_stat_mod(wb, WB_RECLAIMABLE, -nr); +		task_io_account_cancelled_write(nr * PAGE_SIZE);  	}  }  /* - * Mark the page dirty, and set it dirty in the page cache, and mark the inode - * dirty. + * Mark the folio dirty, and set it dirty in the page cache, and mark + * the inode dirty.   * - * If warn is true, then emit a warning if the page is not uptodate and has + * If warn is true, then emit a warning if the folio is not uptodate and has   * not been truncated.   *   * The caller must hold lock_page_memcg().   */ -void __set_page_dirty(struct page *page, struct address_space *mapping, +void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,  			     int warn)  {  	unsigned long flags;  	xa_lock_irqsave(&mapping->i_pages, flags); -	if (page->mapping) {	/* Race with truncate? */ -		WARN_ON_ONCE(warn && !PageUptodate(page)); -		account_page_dirtied(page, mapping); -		__xa_set_mark(&mapping->i_pages, page_index(page), +	if (folio->mapping) {	/* Race with truncate? */ +		WARN_ON_ONCE(warn && !folio_test_uptodate(folio)); +		folio_account_dirtied(folio, mapping); +		__xa_set_mark(&mapping->i_pages, folio_index(folio),  				PAGECACHE_TAG_DIRTY);  	}  	xa_unlock_irqrestore(&mapping->i_pages, flags);  } -/* - * For address_spaces which do not use buffers.  Just tag the page as dirty in - * the xarray. +/** + * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads. + * @mapping: Address space this folio belongs to. + * @folio: Folio to be marked as dirty.   * - * This is also used when a single buffer is being dirtied: we want to set the - * page dirty in that case, but not all the buffers.  This is a "bottom-up" - * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. + * Filesystems which do not use buffer heads should call this function + * from their set_page_dirty address space operation.  It ignores the + * contents of folio_get_private(), so if the filesystem marks individual + * blocks as dirty, the filesystem should handle that itself.   * - * The caller must ensure this doesn't race with truncation.  Most will simply - * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and - * the pte lock held, which also locks out truncation. + * This is also sometimes used by filesystems which use buffer_heads when + * a single buffer is being dirtied: we want to set the folio dirty in + * that case, but not all the buffers.  This is a "bottom-up" dirtying, + * whereas __set_page_dirty_buffers() is a "top-down" dirtying. + * + * The caller must ensure this doesn't race with truncation.  Most will + * simply hold the folio lock, but e.g. zap_pte_range() calls with the + * folio mapped and the pte lock held, which also locks out truncation.   */ -int __set_page_dirty_nobuffers(struct page *page) +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)  { -	lock_page_memcg(page); -	if (!TestSetPageDirty(page)) { -		struct address_space *mapping = page_mapping(page); +	folio_memcg_lock(folio); +	if (folio_test_set_dirty(folio)) { +		folio_memcg_unlock(folio); +		return false; +	} -		if (!mapping) { -			unlock_page_memcg(page); -			return 1; -		} -		__set_page_dirty(page, mapping, !PagePrivate(page)); -		unlock_page_memcg(page); +	__folio_mark_dirty(folio, mapping, !folio_test_private(folio)); +	folio_memcg_unlock(folio); -		if (mapping->host) { -			/* !PageAnon && !swapper_space */ -			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); -		} -		return 1; +	if (mapping->host) { +		/* !PageAnon && !swapper_space */ +		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);  	} -	unlock_page_memcg(page); -	return 0; +	return true;  } -EXPORT_SYMBOL(__set_page_dirty_nobuffers); +EXPORT_SYMBOL(filemap_dirty_folio); -/* - * Call this whenever redirtying a page, to de-account the dirty counters - * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written - * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to - * systematic errors in balanced_dirty_ratelimit and the dirty pages position - * control. +/** + * folio_account_redirty - Manually account for redirtying a page. + * @folio: The folio which is being redirtied. + * + * Most filesystems should call folio_redirty_for_writepage() instead + * of this fuction.  If your filesystem is doing writeback outside the + * context of a writeback_control(), it can call this when redirtying + * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED, + * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN, + * WB_WRITTEN) in long term. The mismatches will lead to systematic errors + * in balanced_dirty_ratelimit and the dirty pages position control.   */ -void account_page_redirty(struct page *page) +void folio_account_redirty(struct folio *folio)  { -	struct address_space *mapping = page->mapping; +	struct address_space *mapping = folio->mapping;  	if (mapping && mapping_can_writeback(mapping)) {  		struct inode *inode = mapping->host;  		struct bdi_writeback *wb;  		struct wb_lock_cookie cookie = {}; +		long nr = folio_nr_pages(folio);  		wb = unlocked_inode_to_wb_begin(inode, &cookie); -		current->nr_dirtied--; -		dec_node_page_state(page, NR_DIRTIED); -		dec_wb_stat(wb, WB_DIRTIED); +		current->nr_dirtied -= nr; +		node_stat_mod_folio(folio, NR_DIRTIED, -nr); +		wb_stat_mod(wb, WB_DIRTIED, -nr);  		unlocked_inode_to_wb_end(inode, &cookie);  	}  } -EXPORT_SYMBOL(account_page_redirty); +EXPORT_SYMBOL(folio_account_redirty); -/* - * When a writepage implementation decides that it doesn't want to write this - * page for some reason, it should redirty the locked page via - * redirty_page_for_writepage() and it should then unlock the page and return 0 +/** + * folio_redirty_for_writepage - Decline to write a dirty folio. + * @wbc: The writeback control. + * @folio: The folio. + * + * When a writepage implementation decides that it doesn't want to write + * @folio for some reason, it should call this function, unlock @folio and + * return 0. + * + * Return: True if we redirtied the folio.  False if someone else dirtied + * it first.   */ -int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) +bool folio_redirty_for_writepage(struct writeback_control *wbc, +		struct folio *folio)  { -	int ret; +	bool ret; +	long nr = folio_nr_pages(folio); + +	wbc->pages_skipped += nr; +	ret = filemap_dirty_folio(folio->mapping, folio); +	folio_account_redirty(folio); -	wbc->pages_skipped++; -	ret = __set_page_dirty_nobuffers(page); -	account_page_redirty(page);  	return ret;  } -EXPORT_SYMBOL(redirty_page_for_writepage); +EXPORT_SYMBOL(folio_redirty_for_writepage); -/* - * Dirty a page. +/** + * folio_mark_dirty - Mark a folio as being modified. + * @folio: The folio. + * + * For folios with a mapping this should be done under the page lock + * for the benefit of asynchronous memory errors who prefer a consistent + * dirty state. This rule can be broken in some special cases, + * but should be better not to.   * - * For pages with a mapping this should be done under the page lock for the - * benefit of asynchronous memory errors who prefer a consistent dirty state. - * This rule can be broken in some special cases, but should be better not to. + * Return: True if the folio was newly dirtied, false if it was already dirty.   */ -int set_page_dirty(struct page *page) +bool folio_mark_dirty(struct folio *folio)  { -	struct address_space *mapping = page_mapping(page); +	struct address_space *mapping = folio_mapping(folio); -	page = compound_head(page);  	if (likely(mapping)) {  		/*  		 * readahead/lru_deactivate_page could remain @@ -2604,17 +2634,17 @@ int set_page_dirty(struct page *page)  		 * it will confuse readahead and make it restart the size rampup  		 * process. But it's a trivial problem.  		 */ -		if (PageReclaim(page)) -			ClearPageReclaim(page); -		return mapping->a_ops->set_page_dirty(page); +		if (folio_test_reclaim(folio)) +			folio_clear_reclaim(folio); +		return mapping->a_ops->set_page_dirty(&folio->page);  	} -	if (!PageDirty(page)) { -		if (!TestSetPageDirty(page)) -			return 1; +	if (!folio_test_dirty(folio)) { +		if (!folio_test_set_dirty(folio)) +			return true;  	} -	return 0; +	return false;  } -EXPORT_SYMBOL(set_page_dirty); +EXPORT_SYMBOL(folio_mark_dirty);  /*   * set_page_dirty() is racy if the caller has no reference against @@ -2650,49 +2680,49 @@ EXPORT_SYMBOL(set_page_dirty_lock);   * page without actually doing it through the VM. Can you say "ext3 is   * horribly ugly"? Thought you could.   */ -void __cancel_dirty_page(struct page *page) +void __folio_cancel_dirty(struct folio *folio)  { -	struct address_space *mapping = page_mapping(page); +	struct address_space *mapping = folio_mapping(folio);  	if (mapping_can_writeback(mapping)) {  		struct inode *inode = mapping->host;  		struct bdi_writeback *wb;  		struct wb_lock_cookie cookie = {}; -		lock_page_memcg(page); +		folio_memcg_lock(folio);  		wb = unlocked_inode_to_wb_begin(inode, &cookie); -		if (TestClearPageDirty(page)) -			account_page_cleaned(page, mapping, wb); +		if (folio_test_clear_dirty(folio)) +			folio_account_cleaned(folio, mapping, wb);  		unlocked_inode_to_wb_end(inode, &cookie); -		unlock_page_memcg(page); +		folio_memcg_unlock(folio);  	} else { -		ClearPageDirty(page); +		folio_clear_dirty(folio);  	}  } -EXPORT_SYMBOL(__cancel_dirty_page); +EXPORT_SYMBOL(__folio_cancel_dirty);  /* - * Clear a page's dirty flag, while caring for dirty memory accounting. - * Returns true if the page was previously dirty. - * - * This is for preparing to put the page under writeout.  We leave the page - * tagged as dirty in the xarray so that a concurrent write-for-sync - * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage - * implementation will run either set_page_writeback() or set_page_dirty(), - * at which stage we bring the page's dirty flag and xarray dirty tag - * back into sync. - * - * This incoherency between the page's dirty flag and xarray tag is - * unfortunate, but it only exists while the page is locked. + * Clear a folio's dirty flag, while caring for dirty memory accounting. + * Returns true if the folio was previously dirty. + * + * This is for preparing to put the folio under writeout.  We leave + * the folio tagged as dirty in the xarray so that a concurrent + * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk. + * The ->writepage implementation will run either folio_start_writeback() + * or folio_mark_dirty(), at which stage we bring the folio's dirty flag + * and xarray dirty tag back into sync. + * + * This incoherency between the folio's dirty flag and xarray tag is + * unfortunate, but it only exists while the folio is locked.   */ -int clear_page_dirty_for_io(struct page *page) +bool folio_clear_dirty_for_io(struct folio *folio)  { -	struct address_space *mapping = page_mapping(page); -	int ret = 0; +	struct address_space *mapping = folio_mapping(folio); +	bool ret = false; -	VM_BUG_ON_PAGE(!PageLocked(page), page); +	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);  	if (mapping && mapping_can_writeback(mapping)) {  		struct inode *inode = mapping->host; @@ -2705,48 +2735,49 @@ int clear_page_dirty_for_io(struct page *page)  		 * We use this sequence to make sure that  		 *  (a) we account for dirty stats properly  		 *  (b) we tell the low-level filesystem to -		 *      mark the whole page dirty if it was +		 *      mark the whole folio dirty if it was  		 *      dirty in a pagetable. Only to then -		 *  (c) clean the page again and return 1 to +		 *  (c) clean the folio again and return 1 to  		 *      cause the writeback.  		 *  		 * This way we avoid all nasty races with the  		 * dirty bit in multiple places and clearing  		 * them concurrently from different threads.  		 * -		 * Note! Normally the "set_page_dirty(page)" +		 * Note! Normally the "folio_mark_dirty(folio)"  		 * has no effect on the actual dirty bit - since  		 * that will already usually be set. But we  		 * need the side effects, and it can help us  		 * avoid races.  		 * -		 * We basically use the page "master dirty bit" +		 * We basically use the folio "master dirty bit"  		 * as a serialization point for all the different  		 * threads doing their things.  		 */ -		if (page_mkclean(page)) -			set_page_dirty(page); +		if (folio_mkclean(folio)) +			folio_mark_dirty(folio);  		/*  		 * We carefully synchronise fault handlers against -		 * installing a dirty pte and marking the page dirty +		 * installing a dirty pte and marking the folio dirty  		 * at this point.  We do this by having them hold the -		 * page lock while dirtying the page, and pages are +		 * page lock while dirtying the folio, and folios are  		 * always locked coming in here, so we get the desired  		 * exclusion.  		 */  		wb = unlocked_inode_to_wb_begin(inode, &cookie); -		if (TestClearPageDirty(page)) { -			dec_lruvec_page_state(page, NR_FILE_DIRTY); -			dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); -			dec_wb_stat(wb, WB_RECLAIMABLE); -			ret = 1; +		if (folio_test_clear_dirty(folio)) { +			long nr = folio_nr_pages(folio); +			lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); +			zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); +			wb_stat_mod(wb, WB_RECLAIMABLE, -nr); +			ret = true;  		}  		unlocked_inode_to_wb_end(inode, &cookie);  		return ret;  	} -	return TestClearPageDirty(page); +	return folio_test_clear_dirty(folio);  } -EXPORT_SYMBOL(clear_page_dirty_for_io); +EXPORT_SYMBOL(folio_clear_dirty_for_io);  static void wb_inode_writeback_start(struct bdi_writeback *wb)  { @@ -2766,27 +2797,28 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)  	queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);  } -int test_clear_page_writeback(struct page *page) +bool __folio_end_writeback(struct folio *folio)  { -	struct address_space *mapping = page_mapping(page); -	int ret; +	long nr = folio_nr_pages(folio); +	struct address_space *mapping = folio_mapping(folio); +	bool ret; -	lock_page_memcg(page); +	folio_memcg_lock(folio);  	if (mapping && mapping_use_writeback_tags(mapping)) {  		struct inode *inode = mapping->host;  		struct backing_dev_info *bdi = inode_to_bdi(inode);  		unsigned long flags;  		xa_lock_irqsave(&mapping->i_pages, flags); -		ret = TestClearPageWriteback(page); +		ret = folio_test_clear_writeback(folio);  		if (ret) { -			__xa_clear_mark(&mapping->i_pages, page_index(page), +			__xa_clear_mark(&mapping->i_pages, folio_index(folio),  						PAGECACHE_TAG_WRITEBACK);  			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {  				struct bdi_writeback *wb = inode_to_wb(inode); -				dec_wb_stat(wb, WB_WRITEBACK); -				__wb_writeout_inc(wb); +				wb_stat_mod(wb, WB_WRITEBACK, -nr); +				__wb_writeout_add(wb, nr);  				if (!mapping_tagged(mapping,  						    PAGECACHE_TAG_WRITEBACK))  					wb_inode_writeback_end(wb); @@ -2799,32 +2831,34 @@ int test_clear_page_writeback(struct page *page)  		xa_unlock_irqrestore(&mapping->i_pages, flags);  	} else { -		ret = TestClearPageWriteback(page); +		ret = folio_test_clear_writeback(folio);  	}  	if (ret) { -		dec_lruvec_page_state(page, NR_WRITEBACK); -		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); -		inc_node_page_state(page, NR_WRITTEN); +		lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); +		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); +		node_stat_mod_folio(folio, NR_WRITTEN, nr);  	} -	unlock_page_memcg(page); +	folio_memcg_unlock(folio);  	return ret;  } -int __test_set_page_writeback(struct page *page, bool keep_write) +bool __folio_start_writeback(struct folio *folio, bool keep_write)  { -	struct address_space *mapping = page_mapping(page); -	int ret, access_ret; +	long nr = folio_nr_pages(folio); +	struct address_space *mapping = folio_mapping(folio); +	bool ret; +	int access_ret; -	lock_page_memcg(page); +	folio_memcg_lock(folio);  	if (mapping && mapping_use_writeback_tags(mapping)) { -		XA_STATE(xas, &mapping->i_pages, page_index(page)); +		XA_STATE(xas, &mapping->i_pages, folio_index(folio));  		struct inode *inode = mapping->host;  		struct backing_dev_info *bdi = inode_to_bdi(inode);  		unsigned long flags;  		xas_lock_irqsave(&xas, flags);  		xas_load(&xas); -		ret = TestSetPageWriteback(page); +		ret = folio_test_set_writeback(folio);  		if (!ret) {  			bool on_wblist; @@ -2835,84 +2869,105 @@ int __test_set_page_writeback(struct page *page, bool keep_write)  			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {  				struct bdi_writeback *wb = inode_to_wb(inode); -				inc_wb_stat(wb, WB_WRITEBACK); +				wb_stat_mod(wb, WB_WRITEBACK, nr);  				if (!on_wblist)  					wb_inode_writeback_start(wb);  			}  			/* -			 * We can come through here when swapping anonymous -			 * pages, so we don't necessarily have an inode to track -			 * for sync. +			 * We can come through here when swapping +			 * anonymous folios, so we don't necessarily +			 * have an inode to track for sync.  			 */  			if (mapping->host && !on_wblist)  				sb_mark_inode_writeback(mapping->host);  		} -		if (!PageDirty(page)) +		if (!folio_test_dirty(folio))  			xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);  		if (!keep_write)  			xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);  		xas_unlock_irqrestore(&xas, flags);  	} else { -		ret = TestSetPageWriteback(page); +		ret = folio_test_set_writeback(folio);  	}  	if (!ret) { -		inc_lruvec_page_state(page, NR_WRITEBACK); -		inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); +		lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr); +		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);  	} -	unlock_page_memcg(page); -	access_ret = arch_make_page_accessible(page); +	folio_memcg_unlock(folio); +	access_ret = arch_make_folio_accessible(folio);  	/*  	 * If writeback has been triggered on a page that cannot be made  	 * accessible, it is too late to recover here.  	 */ -	VM_BUG_ON_PAGE(access_ret != 0, page); +	VM_BUG_ON_FOLIO(access_ret != 0, folio);  	return ret; -  } -EXPORT_SYMBOL(__test_set_page_writeback); +EXPORT_SYMBOL(__folio_start_writeback); -/* - * Wait for a page to complete writeback +/** + * folio_wait_writeback - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete. + * + * Context: Sleeps.  Must be called in process context and with + * no spinlocks held.  Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished.   */ -void wait_on_page_writeback(struct page *page) +void folio_wait_writeback(struct folio *folio)  { -	while (PageWriteback(page)) { -		trace_wait_on_page_writeback(page, page_mapping(page)); -		wait_on_page_bit(page, PG_writeback); +	while (folio_test_writeback(folio)) { +		trace_folio_wait_writeback(folio, folio_mapping(folio)); +		folio_wait_bit(folio, PG_writeback);  	}  } -EXPORT_SYMBOL_GPL(wait_on_page_writeback); +EXPORT_SYMBOL_GPL(folio_wait_writeback); -/* - * Wait for a page to complete writeback.  Returns -EINTR if we get a - * fatal signal while waiting. +/** + * folio_wait_writeback_killable - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete or a fatal signal to arrive. + * + * Context: Sleeps.  Must be called in process context and with + * no spinlocks held.  Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. + * Return: 0 on success, -EINTR if we get a fatal signal while waiting.   */ -int wait_on_page_writeback_killable(struct page *page) +int folio_wait_writeback_killable(struct folio *folio)  { -	while (PageWriteback(page)) { -		trace_wait_on_page_writeback(page, page_mapping(page)); -		if (wait_on_page_bit_killable(page, PG_writeback)) +	while (folio_test_writeback(folio)) { +		trace_folio_wait_writeback(folio, folio_mapping(folio)); +		if (folio_wait_bit_killable(folio, PG_writeback))  			return -EINTR;  	}  	return 0;  } -EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); +EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);  /** - * wait_for_stable_page() - wait for writeback to finish, if necessary. - * @page:	The page to wait on. + * folio_wait_stable() - wait for writeback to finish, if necessary. + * @folio: The folio to wait on. + * + * This function determines if the given folio is related to a backing + * device that requires folio contents to be held stable during writeback. + * If so, then it will wait for any pending writeback to complete.   * - * This function determines if the given page is related to a backing device - * that requires page contents to be held stable during writeback.  If so, then - * it will wait for any pending writeback to complete. + * Context: Sleeps.  Must be called in process context and with + * no spinlocks held.  Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished.   */ -void wait_for_stable_page(struct page *page) +void folio_wait_stable(struct folio *folio)  { -	page = thp_head(page); -	if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) -		wait_on_page_writeback(page); +	if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) +		folio_wait_writeback(folio);  } -EXPORT_SYMBOL_GPL(wait_for_stable_page); +EXPORT_SYMBOL_GPL(folio_wait_stable); |