diff options
Diffstat (limited to 'mm/compaction.c')
| -rw-r--r-- | mm/compaction.c | 189 | 
1 files changed, 181 insertions, 8 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 86375605faa9..176dcded298e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -50,6 +50,24 @@ static inline void count_compact_events(enum vm_event_item item, long delta)  #define pageblock_start_pfn(pfn)	block_start_pfn(pfn, pageblock_order)  #define pageblock_end_pfn(pfn)		block_end_pfn(pfn, pageblock_order) +/* + * Fragmentation score check interval for proactive compaction purposes. + */ +static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; + +/* + * Page order with-respect-to which proactive compaction + * calculates external fragmentation, which is used as + * the "fragmentation score" of a node/zone. + */ +#if defined CONFIG_TRANSPARENT_HUGEPAGE +#define COMPACTION_HPAGE_ORDER	HPAGE_PMD_ORDER +#elif defined CONFIG_HUGETLBFS +#define COMPACTION_HPAGE_ORDER	HUGETLB_PAGE_ORDER +#else +#define COMPACTION_HPAGE_ORDER	(PMD_SHIFT - PAGE_SHIFT) +#endif +  static unsigned long release_freepages(struct list_head *freelist)  {  	struct page *page, *next; @@ -136,7 +154,7 @@ EXPORT_SYMBOL(__ClearPageMovable);  /*   * Compaction is deferred when compaction fails to result in a page - * allocation success. 1 << compact_defer_limit compactions are skipped up + * allocation success. 1 << compact_defer_shift, compactions are skipped up   * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT   */  void defer_compaction(struct zone *zone, int order) @@ -991,7 +1009,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,  		del_page_from_lru_list(page, lruvec, page_lru(page));  		mod_node_page_state(page_pgdat(page),  				NR_ISOLATED_ANON + page_is_file_lru(page), -				hpage_nr_pages(page)); +				thp_nr_pages(page));  isolate_success:  		list_add(&page->lru, &cc->migratepages); @@ -1459,7 +1477,7 @@ static void isolate_freepages(struct compact_control *cc)  	 * this pfn aligned down to the pageblock boundary, because we do  	 * block_start_pfn -= pageblock_nr_pages in the for loop.  	 * For ending point, take care when isolating in last pageblock of a -	 * a zone which ends in the middle of a pageblock. +	 * zone which ends in the middle of a pageblock.  	 * The low boundary is the end of the pageblock the migration scanner  	 * is using.  	 */ @@ -1857,6 +1875,76 @@ static inline bool is_via_compact_memory(int order)  	return order == -1;  } +static bool kswapd_is_running(pg_data_t *pgdat) +{ +	return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING); +} + +/* + * A zone's fragmentation score is the external fragmentation wrt to the + * COMPACTION_HPAGE_ORDER scaled by the zone's size. It returns a value + * in the range [0, 100]. + * + * The scaling factor ensures that proactive compaction focuses on larger + * zones like ZONE_NORMAL, rather than smaller, specialized zones like + * ZONE_DMA32. For smaller zones, the score value remains close to zero, + * and thus never exceeds the high threshold for proactive compaction. + */ +static unsigned int fragmentation_score_zone(struct zone *zone) +{ +	unsigned long score; + +	score = zone->present_pages * +			extfrag_for_order(zone, COMPACTION_HPAGE_ORDER); +	return div64_ul(score, zone->zone_pgdat->node_present_pages + 1); +} + +/* + * The per-node proactive (background) compaction process is started by its + * corresponding kcompactd thread when the node's fragmentation score + * exceeds the high threshold. The compaction process remains active till + * the node's score falls below the low threshold, or one of the back-off + * conditions is met. + */ +static unsigned int fragmentation_score_node(pg_data_t *pgdat) +{ +	unsigned int score = 0; +	int zoneid; + +	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { +		struct zone *zone; + +		zone = &pgdat->node_zones[zoneid]; +		score += fragmentation_score_zone(zone); +	} + +	return score; +} + +static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low) +{ +	unsigned int wmark_low; + +	/* +	 * Cap the low watermak to avoid excessive compaction +	 * activity in case a user sets the proactivess tunable +	 * close to 100 (maximum). +	 */ +	wmark_low = max(100U - sysctl_compaction_proactiveness, 5U); +	return low ? wmark_low : min(wmark_low + 10, 100U); +} + +static bool should_proactive_compact_node(pg_data_t *pgdat) +{ +	int wmark_high; + +	if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat)) +		return false; + +	wmark_high = fragmentation_score_wmark(pgdat, false); +	return fragmentation_score_node(pgdat) > wmark_high; +} +  static enum compact_result __compact_finished(struct compact_control *cc)  {  	unsigned int order; @@ -1883,6 +1971,25 @@ static enum compact_result __compact_finished(struct compact_control *cc)  			return COMPACT_PARTIAL_SKIPPED;  	} +	if (cc->proactive_compaction) { +		int score, wmark_low; +		pg_data_t *pgdat; + +		pgdat = cc->zone->zone_pgdat; +		if (kswapd_is_running(pgdat)) +			return COMPACT_PARTIAL_SKIPPED; + +		score = fragmentation_score_zone(cc->zone); +		wmark_low = fragmentation_score_wmark(pgdat, true); + +		if (score > wmark_low) +			ret = COMPACT_CONTINUE; +		else +			ret = COMPACT_SUCCESS; + +		goto out; +	} +  	if (is_via_compact_memory(cc->order))  		return COMPACT_CONTINUE; @@ -1941,6 +2048,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)  		}  	} +out:  	if (cc->contended || fatal_signal_pending(current))  		ret = COMPACT_CONTENDED; @@ -2421,6 +2529,41 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,  	return rc;  } +/* + * Compact all zones within a node till each zone's fragmentation score + * reaches within proactive compaction thresholds (as determined by the + * proactiveness tunable). + * + * It is possible that the function returns before reaching score targets + * due to various back-off conditions, such as, contention on per-node or + * per-zone locks. + */ +static void proactive_compact_node(pg_data_t *pgdat) +{ +	int zoneid; +	struct zone *zone; +	struct compact_control cc = { +		.order = -1, +		.mode = MIGRATE_SYNC_LIGHT, +		.ignore_skip_hint = true, +		.whole_zone = true, +		.gfp_mask = GFP_KERNEL, +		.proactive_compaction = true, +	}; + +	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { +		zone = &pgdat->node_zones[zoneid]; +		if (!populated_zone(zone)) +			continue; + +		cc.zone = zone; + +		compact_zone(&cc, NULL); + +		VM_BUG_ON(!list_empty(&cc.freepages)); +		VM_BUG_ON(!list_empty(&cc.migratepages)); +	} +}  /* Compact all zones within a node */  static void compact_node(int nid) @@ -2468,6 +2611,13 @@ static void compact_nodes(void)  int sysctl_compact_memory;  /* + * Tunable for proactive compaction. It determines how + * aggressively the kernel should compact memory in the + * background. It takes values in the range [0, 100]. + */ +unsigned int __read_mostly sysctl_compaction_proactiveness = 20; + +/*   * This is the entry point for compacting all nodes via   * /proc/sys/vm/compact_memory   */ @@ -2646,6 +2796,7 @@ static int kcompactd(void *p)  {  	pg_data_t *pgdat = (pg_data_t*)p;  	struct task_struct *tsk = current; +	unsigned int proactive_defer = 0;  	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); @@ -2661,12 +2812,34 @@ static int kcompactd(void *p)  		unsigned long pflags;  		trace_mm_compaction_kcompactd_sleep(pgdat->node_id); -		wait_event_freezable(pgdat->kcompactd_wait, -				kcompactd_work_requested(pgdat)); +		if (wait_event_freezable_timeout(pgdat->kcompactd_wait, +			kcompactd_work_requested(pgdat), +			msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) { + +			psi_memstall_enter(&pflags); +			kcompactd_do_work(pgdat); +			psi_memstall_leave(&pflags); +			continue; +		} -		psi_memstall_enter(&pflags); -		kcompactd_do_work(pgdat); -		psi_memstall_leave(&pflags); +		/* kcompactd wait timeout */ +		if (should_proactive_compact_node(pgdat)) { +			unsigned int prev_score, score; + +			if (proactive_defer) { +				proactive_defer--; +				continue; +			} +			prev_score = fragmentation_score_node(pgdat); +			proactive_compact_node(pgdat); +			score = fragmentation_score_node(pgdat); +			/* +			 * Defer proactive compaction if the fragmentation +			 * score did not go down i.e. no progress made. +			 */ +			proactive_defer = score < prev_score ? +					0 : 1 << COMPACT_MAX_DEFER_SHIFT; +		}  	}  	return 0;  |