From c603844bdcb5238980de8d58b393f52d7729d651 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:13:38 -0700 Subject: mm, page_alloc: convert alloc_flags to unsigned alloc_flags is a bitmask of flags but it is signed which does not necessarily generate the best code depending on the compiler. Even without an impact, it makes more sense that this be unsigned. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/compaction.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index d7c8de583a23..242b660f64e6 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -39,12 +39,12 @@ extern int sysctl_compact_unevictable_allowed; extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, - int alloc_flags, const struct alloc_context *ac, - enum migrate_mode mode, int *contended); + unsigned int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order, - int alloc_flags, int classzone_idx); + unsigned int alloc_flags, int classzone_idx); extern void defer_compaction(struct zone *zone, int order); extern bool compaction_deferred(struct zone *zone, int order); -- cgit From ea7ab982b6bdb7ce218fd3a7850bb2e2b414fdd0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:38 -0700 Subject: mm, compaction: change COMPACT_ constants into enum Compaction code is doing weird dances between COMPACT_FOO -> int -> unsigned long But there doesn't seem to be any reason for that. All functions which return/use one of those constants are not expecting any other value so it really makes sense to define an enum for them and make it clear that no other values are expected. This is a pure cleanup and shouldn't introduce any functional changes. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 45 +++++++++++++++++++++++++++------------------ mm/compaction.c | 27 ++++++++++++++------------- mm/page_alloc.c | 2 +- 3 files changed, 42 insertions(+), 32 deletions(-) (limited to 'include/linux/compaction.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 242b660f64e6..706cbf00e919 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -2,21 +2,29 @@ #define _LINUX_COMPACTION_H /* Return values for compact_zone() and try_to_compact_pages() */ -/* compaction didn't start as it was deferred due to past failures */ -#define COMPACT_DEFERRED 0 -/* compaction didn't start as it was not possible or direct reclaim was more suitable */ -#define COMPACT_SKIPPED 1 -/* compaction should continue to another pageblock */ -#define COMPACT_CONTINUE 2 -/* direct compaction partially compacted a zone and there are suitable pages */ -#define COMPACT_PARTIAL 3 -/* The full zone was compacted */ -#define COMPACT_COMPLETE 4 -/* For more detailed tracepoint output */ -#define COMPACT_NO_SUITABLE_PAGE 5 -#define COMPACT_NOT_SUITABLE_ZONE 6 -#define COMPACT_CONTENDED 7 /* When adding new states, please adjust include/trace/events/compaction.h */ +enum compact_result { + /* compaction didn't start as it was deferred due to past failures */ + COMPACT_DEFERRED, + /* + * compaction didn't start as it was not possible or direct reclaim + * was more suitable + */ + COMPACT_SKIPPED, + /* compaction should continue to another pageblock */ + COMPACT_CONTINUE, + /* + * direct compaction partially compacted a zone and there are suitable + * pages + */ + COMPACT_PARTIAL, + /* The full zone was compacted */ + COMPACT_COMPLETE, + /* For more detailed tracepoint output */ + COMPACT_NO_SUITABLE_PAGE, + COMPACT_NOT_SUITABLE_ZONE, + COMPACT_CONTENDED, +}; /* Used to signal whether compaction detected need_sched() or lock contention */ /* No contention detected */ @@ -38,12 +46,13 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int sysctl_compact_unevictable_allowed; extern int fragmentation_index(struct zone *zone, unsigned int order); -extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, +extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, + unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum migrate_mode mode, int *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); -extern unsigned long compaction_suitable(struct zone *zone, int order, +extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int classzone_idx); extern void defer_compaction(struct zone *zone, int order); @@ -57,7 +66,7 @@ extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); #else -static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, +static inline enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, int alloc_flags, const struct alloc_context *ac, enum migrate_mode mode, int *contended) @@ -73,7 +82,7 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat) { } -static inline unsigned long compaction_suitable(struct zone *zone, int order, +static inline enum compact_result compaction_suitable(struct zone *zone, int order, int alloc_flags, int classzone_idx) { return COMPACT_SKIPPED; diff --git a/mm/compaction.c b/mm/compaction.c index eda3c2244f30..e721d252c5d2 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1229,7 +1229,7 @@ static inline bool is_via_compact_memory(int order) return order == -1; } -static int __compact_finished(struct zone *zone, struct compact_control *cc, +static enum compact_result __compact_finished(struct zone *zone, struct compact_control *cc, const int migratetype) { unsigned int order; @@ -1292,8 +1292,9 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc, return COMPACT_NO_SUITABLE_PAGE; } -static int compact_finished(struct zone *zone, struct compact_control *cc, - const int migratetype) +static enum compact_result compact_finished(struct zone *zone, + struct compact_control *cc, + const int migratetype) { int ret; @@ -1312,7 +1313,7 @@ static int compact_finished(struct zone *zone, struct compact_control *cc, * COMPACT_PARTIAL - If the allocation would succeed without compaction * COMPACT_CONTINUE - If compaction should run now */ -static unsigned long __compaction_suitable(struct zone *zone, int order, +static enum compact_result __compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int classzone_idx) { @@ -1358,11 +1359,11 @@ static unsigned long __compaction_suitable(struct zone *zone, int order, return COMPACT_CONTINUE; } -unsigned long compaction_suitable(struct zone *zone, int order, +enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int classzone_idx) { - unsigned long ret; + enum compact_result ret; ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx); trace_mm_compaction_suitable(zone, order, ret); @@ -1372,9 +1373,9 @@ unsigned long compaction_suitable(struct zone *zone, int order, return ret; } -static int compact_zone(struct zone *zone, struct compact_control *cc) +static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc) { - int ret; + enum compact_result ret; unsigned long start_pfn = zone->zone_start_pfn; unsigned long end_pfn = zone_end_pfn(zone); const int migratetype = gfpflags_to_migratetype(cc->gfp_mask); @@ -1530,11 +1531,11 @@ out: return ret; } -static unsigned long compact_zone_order(struct zone *zone, int order, +static enum compact_result compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, enum migrate_mode mode, int *contended, unsigned int alloc_flags, int classzone_idx) { - unsigned long ret; + enum compact_result ret; struct compact_control cc = { .nr_freepages = 0, .nr_migratepages = 0, @@ -1572,7 +1573,7 @@ int sysctl_extfrag_threshold = 500; * * This is the main entry point for direct page compaction. */ -unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, +enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum migrate_mode mode, int *contended) { @@ -1580,7 +1581,7 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; struct zone *zone; - int rc = COMPACT_DEFERRED; + enum compact_result rc = COMPACT_DEFERRED; int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ *contended = COMPACT_CONTENDED_NONE; @@ -1594,7 +1595,7 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, /* Compact each zone in the list */ for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, ac->nodemask) { - int status; + enum compact_result status; int zone_contended; if (compaction_deferred(zone, order)) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index edbdf56b3c9b..ed62c4b90598 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3188,7 +3188,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, enum migrate_mode mode, int *contended_compaction, bool *deferred_compaction) { - unsigned long compact_result; + enum compact_result compact_result; struct page *page; if (!order) -- cgit From 1d4746d395975e0ff5103e20ab169d1a95b4ef9e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:44 -0700 Subject: mm, compaction: distinguish COMPACT_DEFERRED from COMPACT_SKIPPED try_to_compact_pages() can currently return COMPACT_SKIPPED even when the compaction is defered for some zone just because zone DMA is skipped in 99% of cases due to watermark checks. This makes COMPACT_DEFERRED basically unusable for the page allocator as a feedback mechanism. Make sure we distinguish those two states properly and switch their ordering in the enum. This would mean that the COMPACT_SKIPPED will be returned only when all eligible zones are skipped. As a result COMPACT_DEFERRED handling for THP in __alloc_pages_slowpath will be more precise and we would bail out rather than reclaim. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 7 +++++-- include/trace/events/compaction.h | 2 +- mm/compaction.c | 8 +++++--- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux/compaction.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 706cbf00e919..11f228712ed5 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -4,13 +4,16 @@ /* Return values for compact_zone() and try_to_compact_pages() */ /* When adding new states, please adjust include/trace/events/compaction.h */ enum compact_result { - /* compaction didn't start as it was deferred due to past failures */ - COMPACT_DEFERRED, /* * compaction didn't start as it was not possible or direct reclaim * was more suitable */ COMPACT_SKIPPED, + /* compaction didn't start as it was deferred due to past failures */ + COMPACT_DEFERRED, + /* compaction not active last round */ + COMPACT_INACTIVE = COMPACT_DEFERRED, + /* compaction should continue to another pageblock */ COMPACT_CONTINUE, /* diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index e215bf68f521..6ba16c86d7db 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -10,8 +10,8 @@ #include #define COMPACTION_STATUS \ - EM( COMPACT_DEFERRED, "deferred") \ EM( COMPACT_SKIPPED, "skipped") \ + EM( COMPACT_DEFERRED, "deferred") \ EM( COMPACT_CONTINUE, "continue") \ EM( COMPACT_PARTIAL, "partial") \ EM( COMPACT_COMPLETE, "complete") \ diff --git a/mm/compaction.c b/mm/compaction.c index 455ecd87f48d..b2b94474dd28 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1578,7 +1578,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; struct zone *zone; - enum compact_result rc = COMPACT_DEFERRED; + enum compact_result rc = COMPACT_SKIPPED; int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ *contended = COMPACT_CONTENDED_NONE; @@ -1595,8 +1595,10 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, enum compact_result status; int zone_contended; - if (compaction_deferred(zone, order)) + if (compaction_deferred(zone, order)) { + rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); continue; + } status = compact_zone_order(zone, order, gfp_mask, mode, &zone_contended, alloc_flags, @@ -1667,7 +1669,7 @@ break_loop: * If at least one zone wasn't deferred or skipped, we report if all * zones that were tried were lock contended. */ - if (rc > COMPACT_SKIPPED && all_zones_contended) + if (rc > COMPACT_INACTIVE && all_zones_contended) *contended = COMPACT_CONTENDED_LOCK; return rc; -- cgit From c8f7de0bfae36e8532e5e25a39d15407f02aca78 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:47 -0700 Subject: mm, compaction: distinguish between full and partial COMPACT_COMPLETE COMPACT_COMPLETE now means that compaction and free scanner met. This is not very useful information if somebody just wants to use this feedback and make any decisions based on that. The current caller might be a poor guy who just happened to scan tiny portion of the zone and that could be the reason no suitable pages were compacted. Make sure we distinguish the full and partial zone walks. Consumers should treat COMPACT_PARTIAL_SKIPPED as a potential success and be optimistic in retrying. The existing users of COMPACT_COMPLETE are conservatively changed to use COMPACT_PARTIAL_SKIPPED as well but some of them should be probably reconsidered and only defer the compaction only for COMPACT_COMPLETE with the new semantic. This patch shouldn't introduce any functional changes. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 10 +++++++++- include/trace/events/compaction.h | 1 + mm/compaction.c | 14 +++++++++++--- mm/internal.h | 1 + 4 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux/compaction.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 11f228712ed5..9b37f9d3f7a8 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -21,7 +21,15 @@ enum compact_result { * pages */ COMPACT_PARTIAL, - /* The full zone was compacted */ + /* + * direct compaction has scanned part of the zone but wasn't successfull + * to compact suitable pages. + */ + COMPACT_PARTIAL_SKIPPED, + /* + * The full zone was compacted scanned but wasn't successfull to compact + * suitable pages. + */ COMPACT_COMPLETE, /* For more detailed tracepoint output */ COMPACT_NO_SUITABLE_PAGE, diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 6ba16c86d7db..36e2d6fb1360 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -14,6 +14,7 @@ EM( COMPACT_DEFERRED, "deferred") \ EM( COMPACT_CONTINUE, "continue") \ EM( COMPACT_PARTIAL, "partial") \ + EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \ EM( COMPACT_COMPLETE, "complete") \ EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \ EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \ diff --git a/mm/compaction.c b/mm/compaction.c index b2b94474dd28..4af1577adb5c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1252,7 +1252,10 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_ if (cc->direct_compaction) zone->compact_blockskip_flush = true; - return COMPACT_COMPLETE; + if (cc->whole_zone) + return COMPACT_COMPLETE; + else + return COMPACT_PARTIAL_SKIPPED; } if (is_via_compact_memory(cc->order)) @@ -1413,6 +1416,10 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; } + + if (cc->migrate_pfn == start_pfn) + cc->whole_zone = true; + cc->last_migrated_pfn = 0; trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, @@ -1634,7 +1641,8 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, goto break_loop; } - if (mode != MIGRATE_ASYNC && status == COMPACT_COMPLETE) { + if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE || + status == COMPACT_PARTIAL_SKIPPED)) { /* * We think that allocation won't succeed in this zone * so we defer compaction there. If it ends up @@ -1881,7 +1889,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) cc.classzone_idx, 0)) { success = true; compaction_defer_reset(zone, cc.order, false); - } else if (status == COMPACT_COMPLETE) { + } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { /* * We use sync migration mode here, so we defer like * sync direct compaction does. diff --git a/mm/internal.h b/mm/internal.h index 3ac544f1963f..f6f3353b0868 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -174,6 +174,7 @@ struct compact_control { enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool direct_compaction; /* False from kcompactd or /proc/... */ + bool whole_zone; /* Whole zone has been scanned */ int order; /* order a direct compactor needs */ const gfp_t gfp_mask; /* gfp mask of a direct compactor */ const unsigned int alloc_flags; /* alloc flags of a direct compactor */ -- cgit From 4f9a358c36fcdad3ea1db263ec4d484a70ad543e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:50 -0700 Subject: mm, compaction: update compaction_result ordering compaction_result will be used as the primary feedback channel for compaction users. At the same time try_to_compact_pages (and potentially others) assume a certain ordering where a more specific feedback takes precendence. This gets a bit awkward when we have conflicting feedback from different zones. E.g one returing COMPACT_COMPLETE meaning the full zone has been scanned without any outcome while other returns with COMPACT_PARTIAL aka made some progress. The caller should get COMPACT_PARTIAL because that means that the compaction still can make some progress. The same applies for COMPACT_PARTIAL vs COMPACT_PARTIAL_SKIPPED. Reorder PARTIAL to be the largest one so the larger the value is the more progress we have done. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux/compaction.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 9b37f9d3f7a8..ff39fa0a1ede 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -4,6 +4,8 @@ /* Return values for compact_zone() and try_to_compact_pages() */ /* When adding new states, please adjust include/trace/events/compaction.h */ enum compact_result { + /* For more detailed tracepoint output - internal to compaction */ + COMPACT_NOT_SUITABLE_ZONE, /* * compaction didn't start as it was not possible or direct reclaim * was more suitable @@ -11,30 +13,34 @@ enum compact_result { COMPACT_SKIPPED, /* compaction didn't start as it was deferred due to past failures */ COMPACT_DEFERRED, + /* compaction not active last round */ COMPACT_INACTIVE = COMPACT_DEFERRED, + /* For more detailed tracepoint output - internal to compaction */ + COMPACT_NO_SUITABLE_PAGE, /* compaction should continue to another pageblock */ COMPACT_CONTINUE, + /* - * direct compaction partially compacted a zone and there are suitable - * pages + * The full zone was compacted scanned but wasn't successfull to compact + * suitable pages. */ - COMPACT_PARTIAL, + COMPACT_COMPLETE, /* * direct compaction has scanned part of the zone but wasn't successfull * to compact suitable pages. */ COMPACT_PARTIAL_SKIPPED, + + /* compaction terminated prematurely due to lock contentions */ + COMPACT_CONTENDED, + /* - * The full zone was compacted scanned but wasn't successfull to compact - * suitable pages. + * direct compaction partially compacted a zone and there might be + * suitable pages */ - COMPACT_COMPLETE, - /* For more detailed tracepoint output */ - COMPACT_NO_SUITABLE_PAGE, - COMPACT_NOT_SUITABLE_ZONE, - COMPACT_CONTENDED, + COMPACT_PARTIAL, }; /* Used to signal whether compaction detected need_sched() or lock contention */ -- cgit From cab1802b5f0dddea30547a7451fda8c7e4c593f0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:56 -0700 Subject: mm, compaction: abstract compaction feedback to helpers Compaction can provide a wild variation of feedback to the caller. Many of them are implementation specific and the caller of the compaction (especially the page allocator) shouldn't be bound to specifics of the current implementation. This patch abstracts the feedback into three basic types: - compaction_made_progress - compaction was active and made some progress. - compaction_failed - compaction failed and further attempts to invoke it would most probably fail and therefore it is not worth retrying - compaction_withdrawn - compaction wasn't invoked for an implementation specific reasons. In the current implementation it means that the compaction was deferred, contended or the page scanners met too early without any progress. Retrying is still worthwhile. [vbabka@suse.cz: do not change thp back off behavior] [akpm@linux-foundation.org: fix typo in comment, per Hillf] Signed-off-by: Michal Hocko Acked-by: Hillf Danton Acked-by: Vlastimil Babka Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'include/linux/compaction.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index ff39fa0a1ede..8d8c916fe67a 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -78,6 +78,70 @@ extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); extern bool compaction_restarting(struct zone *zone, int order); +/* Compaction has made some progress and retrying makes sense */ +static inline bool compaction_made_progress(enum compact_result result) +{ + /* + * Even though this might sound confusing this in fact tells us + * that the compaction successfully isolated and migrated some + * pageblocks. + */ + if (result == COMPACT_PARTIAL) + return true; + + return false; +} + +/* Compaction has failed and it doesn't make much sense to keep retrying. */ +static inline bool compaction_failed(enum compact_result result) +{ + /* All zones were scanned completely and still not result. */ + if (result == COMPACT_COMPLETE) + return true; + + return false; +} + +/* + * Compaction has backed off for some reason. It might be throttling or + * lock contention. Retrying is still worthwhile. + */ +static inline bool compaction_withdrawn(enum compact_result result) +{ + /* + * Compaction backed off due to watermark checks for order-0 + * so the regular reclaim has to try harder and reclaim something. + */ + if (result == COMPACT_SKIPPED) + return true; + + /* + * If compaction is deferred for high-order allocations, it is + * because sync compaction recently failed. If this is the case + * and the caller requested a THP allocation, we do not want + * to heavily disrupt the system, so we fail the allocation + * instead of entering direct reclaim. + */ + if (result == COMPACT_DEFERRED) + return true; + + /* + * If compaction in async mode encounters contention or blocks higher + * priority task we back off early rather than cause stalls. + */ + if (result == COMPACT_CONTENDED) + return true; + + /* + * Page scanners have met but we haven't scanned full zones so this + * is a back off in fact. + */ + if (result == COMPACT_PARTIAL_SKIPPED) + return true; + + return false; +} + extern int kcompactd_run(int nid); extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); @@ -114,6 +178,21 @@ static inline bool compaction_deferred(struct zone *zone, int order) return true; } +static inline bool compaction_made_progress(enum compact_result result) +{ + return false; +} + +static inline bool compaction_failed(enum compact_result result) +{ + return false; +} + +static inline bool compaction_withdrawn(enum compact_result result) +{ + return true; +} + static inline int kcompactd_run(int nid) { return 0; -- cgit From 86a294a81f93d6f36d00ec3ff779d36d218f852d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:57:12 -0700 Subject: mm, oom, compaction: prevent from should_compact_retry looping for ever for costly orders "mm: consider compaction feedback also for costly allocation" has removed the upper bound for the reclaim/compaction retries based on the number of reclaimed pages for costly orders. While this is desirable the patch did miss a mis interaction between reclaim, compaction and the retry logic. The direct reclaim tries to get zones over min watermark while compaction backs off and returns COMPACT_SKIPPED when all zones are below low watermark + 1< Acked-by: Hillf Danton Acked-by: Vlastimil Babka Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++++ include/linux/mmzone.h | 3 +++ mm/compaction.c | 42 +++++++++++++++++++++++++++++++++++++++--- mm/page_alloc.c | 23 +++++++++++++---------- 4 files changed, 59 insertions(+), 13 deletions(-) (limited to 'include/linux/compaction.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 8d8c916fe67a..a58c852a268f 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -142,6 +142,10 @@ static inline bool compaction_withdrawn(enum compact_result result) return false; } + +bool compaction_zonelist_suitable(struct alloc_context *ac, int order, + int alloc_flags); + extern int kcompactd_run(int nid); extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c60db2096fd8..8dd0333b01dc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -739,6 +739,9 @@ static inline bool is_dev_zone(const struct zone *zone) extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); +bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + int classzone_idx, unsigned int alloc_flags, + long free_pages); bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags); diff --git a/mm/compaction.c b/mm/compaction.c index 4af1577adb5c..d8a20fcf8678 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1318,7 +1318,8 @@ static enum compact_result compact_finished(struct zone *zone, */ static enum compact_result __compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, - int classzone_idx) + int classzone_idx, + unsigned long wmark_target) { int fragindex; unsigned long watermark; @@ -1341,7 +1342,8 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order, * allocated and for a short time, the footprint is higher */ watermark += (2UL << order); - if (!zone_watermark_ok(zone, 0, watermark, classzone_idx, alloc_flags)) + if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, + alloc_flags, wmark_target)) return COMPACT_SKIPPED; /* @@ -1368,7 +1370,8 @@ enum compact_result compaction_suitable(struct zone *zone, int order, { enum compact_result ret; - ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx); + ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx, + zone_page_state(zone, NR_FREE_PAGES)); trace_mm_compaction_suitable(zone, order, ret); if (ret == COMPACT_NOT_SUITABLE_ZONE) ret = COMPACT_SKIPPED; @@ -1376,6 +1379,39 @@ enum compact_result compaction_suitable(struct zone *zone, int order, return ret; } +bool compaction_zonelist_suitable(struct alloc_context *ac, int order, + int alloc_flags) +{ + struct zone *zone; + struct zoneref *z; + + /* + * Make sure at least one zone would pass __compaction_suitable if we continue + * retrying the reclaim. + */ + for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, + ac->nodemask) { + unsigned long available; + enum compact_result compact_result; + + /* + * Do not consider all the reclaimable memory because we do not + * want to trash just for a single high order allocation which + * is even not guaranteed to appear even if __compaction_suitable + * is happy about the watermark check. + */ + available = zone_reclaimable_pages(zone) / order; + available += zone_page_state_snapshot(zone, NR_FREE_PAGES); + compact_result = __compaction_suitable(zone, order, alloc_flags, + ac_classzone_idx(ac), available); + if (compact_result != COMPACT_SKIPPED && + compact_result != COMPACT_NOT_SUITABLE_ZONE) + return true; + } + + return false; +} + static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc) { enum compact_result ret; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dea406a62e3d..089f760ce64a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2750,10 +2750,9 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) * one free page of a suitable size. Checking now avoids taking the zone lock * to check in the allocation paths if no pages are free. */ -static bool __zone_watermark_ok(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx, - unsigned int alloc_flags, - long free_pages) +bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + int classzone_idx, unsigned int alloc_flags, + long free_pages) { long min = mark; int o; @@ -3256,8 +3255,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } static inline bool -should_compact_retry(unsigned int order, enum compact_result compact_result, - enum migrate_mode *migrate_mode, +should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, + enum compact_result compact_result, enum migrate_mode *migrate_mode, int compaction_retries) { int max_retries = MAX_COMPACT_RETRIES; @@ -3281,9 +3280,11 @@ should_compact_retry(unsigned int order, enum compact_result compact_result, /* * make sure the compaction wasn't deferred or didn't bail out early * due to locks contention before we declare that we should give up. + * But do not retry if the given zonelist is not suitable for + * compaction. */ if (compaction_withdrawn(compact_result)) - return true; + return compaction_zonelist_suitable(ac, order, alloc_flags); /* * !costly requests are much more important than __GFP_REPEAT @@ -3311,7 +3312,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } static inline bool -should_compact_retry(unsigned int order, enum compact_result compact_result, +should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags, + enum compact_result compact_result, enum migrate_mode *migrate_mode, int compaction_retries) { @@ -3706,8 +3708,9 @@ retry: * of free memory (see __compaction_suitable) */ if (did_some_progress > 0 && - should_compact_retry(order, compact_result, - &migration_mode, compaction_retries)) + should_compact_retry(ac, order, alloc_flags, + compact_result, &migration_mode, + compaction_retries)) goto retry; /* Reclaim has failed us, start killing things */ -- cgit