diff --git a/include/linux/compaction.h b/include/linux/compaction.h index b2e4c92d0445..60bdf8dc02a3 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -13,6 +13,14 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 4 +/* Used to signal whether compaction detected need_sched() or lock contention */ +/* No contention detected */ +#define COMPACT_CONTENDED_NONE 0 +/* Either need_sched() was true or fatal signal pending */ +#define COMPACT_CONTENDED_SCHED 1 +/* Zone lock or lru_lock was contended in async compaction */ +#define COMPACT_CONTENDED_LOCK 2 + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -24,7 +32,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - enum migrate_mode mode, bool *contended, + enum migrate_mode mode, int *contended, struct zone **candidate_zone); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); @@ -94,7 +102,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - enum migrate_mode mode, bool *contended, + enum migrate_mode mode, int *contended, struct zone **candidate_zone) { return COMPACT_CONTINUE; diff --git a/mm/compaction.c b/mm/compaction.c index 1067c07cb33d..26bb20ef853d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -223,9 +223,21 @@ static void update_pageblock_skip(struct compact_control *cc, } #endif /* CONFIG_COMPACTION */ -static inline bool should_release_lock(spinlock_t *lock) +static int should_release_lock(spinlock_t *lock) { - return need_resched() || spin_is_contended(lock); + /* + * Sched contention has higher priority here as we may potentially + * have to abort whole compaction ASAP. Returning with lock contention + * means we will try another zone, and further decisions are + * influenced only when all zones are lock contended. That means + * potentially missing a lock contention is less critical. + */ + if (need_resched()) + return COMPACT_CONTENDED_SCHED; + else if (spin_is_contended(lock)) + return COMPACT_CONTENDED_LOCK; + + return COMPACT_CONTENDED_NONE; } /* @@ -240,7 +252,9 @@ static inline bool should_release_lock(spinlock_t *lock) static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, bool locked, struct compact_control *cc) { - if (should_release_lock(lock)) { + int contended = should_release_lock(lock); + + if (contended) { if (locked) { spin_unlock_irqrestore(lock, *flags); locked = false; @@ -248,7 +262,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, /* async aborts if taking too long or contended */ if (cc->mode == MIGRATE_ASYNC) { - cc->contended = true; + cc->contended = contended; return false; } @@ -274,7 +288,7 @@ static inline bool compact_should_abort(struct compact_control *cc) /* async compaction aborts if contended */ if (need_resched()) { if (cc->mode == MIGRATE_ASYNC) { - cc->contended = true; + cc->contended = COMPACT_CONTENDED_SCHED; return true; } @@ -1140,7 +1154,7 @@ out: } static unsigned long compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, enum migrate_mode mode, bool *contended) + gfp_t gfp_mask, enum migrate_mode mode, int *contended) { unsigned long ret; struct compact_control cc = { @@ -1172,14 +1186,15 @@ int sysctl_extfrag_threshold = 500; * @gfp_mask: The GFP mask of the current allocation * @nodemask: The allowed nodes to allocate from * @mode: The migration mode for async, sync light, or sync migration - * @contended: Return value that is true if compaction was aborted due to lock contention + * @contended: Return value that determines if compaction was aborted due to + * need_resched() or lock contention * @candidate_zone: Return the zone where we think allocation should succeed * * This is the main entry point for direct page compaction. */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - enum migrate_mode mode, bool *contended, + enum migrate_mode mode, int *contended, struct zone **candidate_zone) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); @@ -1189,6 +1204,9 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, struct zone *zone; int rc = COMPACT_DEFERRED; int alloc_flags = 0; + int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ + + *contended = COMPACT_CONTENDED_NONE; /* Check if the GFP flags allow compaction */ if (!order || !may_enter_fs || !may_perform_io) @@ -1202,13 +1220,19 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { int status; + int zone_contended; if (compaction_deferred(zone, order)) continue; status = compact_zone_order(zone, order, gfp_mask, mode, - contended); + &zone_contended); rc = max(status, rc); + /* + * It takes at least one zone that wasn't lock contended + * to clear all_zones_contended. + */ + all_zones_contended &= zone_contended; /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, @@ -1221,8 +1245,21 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, * succeeds in this zone. */ compaction_defer_reset(zone, order, false); - break; - } else if (mode != MIGRATE_ASYNC) { + /* + * It is possible that async compaction aborted due to + * need_resched() and the watermarks were ok thanks to + * somebody else freeing memory. The allocation can + * however still fail so we better signal the + * need_resched() contention anyway (this will not + * prevent the allocation attempt). + */ + if (zone_contended == COMPACT_CONTENDED_SCHED) + *contended = COMPACT_CONTENDED_SCHED; + + goto break_loop; + } + + if (mode != MIGRATE_ASYNC) { /* * We think that allocation won't succeed in this zone * so we defer compaction there. If it ends up @@ -1230,8 +1267,36 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, */ defer_compaction(zone, order); } + + /* + * We might have stopped compacting due to need_resched() in + * async compaction, or due to a fatal signal detected. In that + * case do not try further zones and signal need_resched() + * contention. + */ + if ((zone_contended == COMPACT_CONTENDED_SCHED) + || fatal_signal_pending(current)) { + *contended = COMPACT_CONTENDED_SCHED; + goto break_loop; + } + + continue; +break_loop: + /* + * We might not have tried all the zones, so be conservative + * and assume they are not all lock contended. + */ + all_zones_contended = 0; + break; } + /* + * If at least one zone wasn't deferred or skipped, we report if all + * zones that were tried were lock contended. + */ + if (rc > COMPACT_SKIPPED && all_zones_contended) + *contended = COMPACT_CONTENDED_LOCK; + return rc; } diff --git a/mm/internal.h b/mm/internal.h index 5a0738fa649c..4c1d604c396c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -144,8 +144,8 @@ struct compact_control { int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; - bool contended; /* True if a lock was contended, or - * need_resched() true during async + int contended; /* Signal need_sched() or lock + * contention detected during * compaction */ }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dfbf54b51649..313338d74095 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2297,7 +2297,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, int classzone_idx, int migratetype, enum migrate_mode mode, - bool *contended_compaction, bool *deferred_compaction) + int *contended_compaction, bool *deferred_compaction) { struct zone *last_compact_zone = NULL; unsigned long compact_result; @@ -2371,7 +2371,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, int classzone_idx, int migratetype, enum migrate_mode mode, - bool *contended_compaction, bool *deferred_compaction) + int *contended_compaction, bool *deferred_compaction) { return NULL; } @@ -2547,7 +2547,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned long did_some_progress; enum migrate_mode migration_mode = MIGRATE_ASYNC; bool deferred_compaction = false; - bool contended_compaction = false; + int contended_compaction = COMPACT_CONTENDED_NONE; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -2651,15 +2651,36 @@ rebalance: if (page) goto got_pg; - /* - * If compaction is deferred for high-order allocations, it is because - * sync compaction recently failed. In this is the case and the caller - * requested a movable allocation that does not heavily disrupt the - * system then fail the allocation instead of entering direct reclaim. - */ - if ((deferred_compaction || contended_compaction) && - (gfp_mask & __GFP_NO_KSWAPD)) - goto nopage; + /* Checks for THP-specific high-order allocations */ + if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) { + /* + * If compaction is deferred for high-order allocations, it is + * because sync compaction recently failed. If this is the case + * and the caller requested a THP allocation, we do not want + * to heavily disrupt the system, so we fail the allocation + * instead of entering direct reclaim. + */ + if (deferred_compaction) + goto nopage; + + /* + * In all zones where compaction was attempted (and not + * deferred or skipped), lock contention has been detected. + * For THP allocation we do not want to disrupt the others + * so we fallback to base pages instead. + */ + if (contended_compaction == COMPACT_CONTENDED_LOCK) + goto nopage; + + /* + * If compaction was aborted due to need_resched(), we do not + * want to further increase allocation latency, unless it is + * khugepaged trying to collapse. + */ + if (contended_compaction == COMPACT_CONTENDED_SCHED + && !(current->flags & PF_KTHREAD)) + goto nopage; + } /* * It can become very expensive to allocate transparent hugepages at