diff options
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r-- | include/linux/mmzone.h | 227 |
1 files changed, 136 insertions, 91 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c60df9257cc7..f2e4e90621ec 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -85,13 +85,6 @@ extern int page_group_by_mobility_disabled; get_pfnblock_flags_mask(page, page_to_pfn(page), \ PB_migrate_end, MIGRATETYPE_MASK) -static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn) -{ - BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2); - return get_pfnblock_flags_mask(page, pfn, PB_migrate_end, - MIGRATETYPE_MASK); -} - struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; @@ -100,7 +93,7 @@ struct free_area { struct pglist_data; /* - * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. + * zone->lock and the zone lru_lock are two of the hottest locks in the kernel. * So add a wild amount of padding here to ensure that they fall into separate * cachelines. There are very few zone structures in the machine, so space * consumption is not a concern here. @@ -117,36 +110,23 @@ struct zone_padding { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, - NR_ALLOC_BATCH, - NR_LRU_BASE, - NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ - NR_ACTIVE_ANON, /* " " " " " */ - NR_INACTIVE_FILE, /* " " " " " */ - NR_ACTIVE_FILE, /* " " " " " */ - NR_UNEVICTABLE, /* " " " " " */ + NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ + NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, + NR_ZONE_ACTIVE_ANON, + NR_ZONE_INACTIVE_FILE, + NR_ZONE_ACTIVE_FILE, + NR_ZONE_UNEVICTABLE, + NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_ANON_PAGES, /* Mapped anonymous pages */ - NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. - only modified from process context */ - NR_FILE_PAGES, - NR_FILE_DIRTY, - NR_WRITEBACK, NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ - NR_KERNEL_STACK, + NR_KERNEL_STACK_KB, /* measured in KiB */ /* Second 128 byte cacheline */ - NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, - NR_VMSCAN_WRITE, - NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ - NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ - NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ - NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ - NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ - NR_DIRTIED, /* page dirtyings since bootup */ - NR_WRITTEN, /* page writings since bootup */ - NR_PAGES_SCANNED, /* pages scanned since last reclaim */ +#if IS_ENABLED(CONFIG_ZSMALLOC) + NR_ZSPAGES, /* allocated in zsmalloc */ +#endif #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ @@ -155,12 +135,40 @@ enum zone_stat_item { NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ #endif + NR_FREE_CMA_PAGES, + NR_VM_ZONE_STAT_ITEMS }; + +enum node_stat_item { + NR_LRU_BASE, + NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ + NR_ACTIVE_ANON, /* " " " " " */ + NR_INACTIVE_FILE, /* " " " " " */ + NR_ACTIVE_FILE, /* " " " " " */ + NR_UNEVICTABLE, /* " " " " " */ + NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ + NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ + NR_PAGES_SCANNED, /* pages scanned since last reclaim */ WORKINGSET_REFAULT, WORKINGSET_ACTIVATE, WORKINGSET_NODERECLAIM, - NR_ANON_TRANSPARENT_HUGEPAGES, - NR_FREE_CMA_PAGES, - NR_VM_ZONE_STAT_ITEMS }; + NR_ANON_MAPPED, /* Mapped anonymous pages */ + NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. + only modified from process context */ + NR_FILE_PAGES, + NR_FILE_DIRTY, + NR_WRITEBACK, + NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ + NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ + NR_SHMEM_THPS, + NR_SHMEM_PMDMAPPED, + NR_ANON_THPS, + NR_UNSTABLE_NFS, /* NFS unstable pages */ + NR_VMSCAN_WRITE, + NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ + NR_DIRTIED, /* page dirtyings since bootup */ + NR_WRITTEN, /* page writings since bootup */ + NR_VM_NODE_STAT_ITEMS +}; /* * We do arithmetic on the LRU lists in various places in the code, @@ -217,7 +225,7 @@ struct lruvec { /* Evictions & activations on the inactive file list */ atomic_long_t inactive_age; #ifdef CONFIG_MEMCG - struct zone *zone; + struct pglist_data *pgdat; #endif }; @@ -269,6 +277,11 @@ struct per_cpu_pageset { #endif }; +struct per_cpu_nodestat { + s8 stat_threshold; + s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS]; +}; + #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { @@ -350,22 +363,9 @@ struct zone { #ifdef CONFIG_NUMA int node; #endif - - /* - * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on - * this zone's LRU. Maintained by the pageout code. - */ - unsigned int inactive_ratio; - struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; - /* - * This is a per-zone reserve of pages that are not available - * to userspace allocations. - */ - unsigned long totalreserve_pages; - #ifndef CONFIG_SPARSEMEM /* * Flags for a pageblock_nr_pages block. See pageblock-flags.h. @@ -374,14 +374,6 @@ struct zone { unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_NUMA - /* - * zone reclaim becomes active if more unmapped pages exist. - */ - unsigned long min_unmapped_pages; - unsigned long min_slab_pages; -#endif /* CONFIG_NUMA */ - /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; @@ -474,24 +466,21 @@ struct zone { unsigned long wait_table_hash_nr_entries; unsigned long wait_table_bits; + /* Write-intensive fields used from the page allocator */ ZONE_PADDING(_pad1_) + /* free areas of different sizes */ struct free_area free_area[MAX_ORDER]; /* zone flags, see below */ unsigned long flags; - /* Write-intensive fields used from the page allocator */ + /* Primarily protects free_area */ spinlock_t lock; + /* Write-intensive fields used by compaction and vmstats. */ ZONE_PADDING(_pad2_) - /* Write-intensive fields used by page reclaim */ - - /* Fields commonly accessed by the page reclaim scanner */ - spinlock_t lru_lock; - struct lruvec lruvec; - /* * When free pages are below this point, additional steps are taken * when reading the number of free pages to avoid per-cpu counter @@ -529,20 +518,18 @@ struct zone { atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; -enum zone_flags { - ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ - ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ - ZONE_CONGESTED, /* zone has many dirty pages backed by +enum pgdat_flags { + PGDAT_CONGESTED, /* pgdat has many dirty pages backed by * a congested BDI */ - ZONE_DIRTY, /* reclaim scanning has recently found + PGDAT_DIRTY, /* reclaim scanning has recently found * many dirty file pages at the tail * of the LRU. */ - ZONE_WRITEBACK, /* reclaim scanning has recently found + PGDAT_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ - ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ + PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ }; static inline unsigned long zone_end_pfn(const struct zone *zone) @@ -666,8 +653,9 @@ typedef struct pglist_data { wait_queue_head_t pfmemalloc_wait; struct task_struct *kswapd; /* Protected by mem_hotplug_begin/end() */ - int kswapd_max_order; - enum zone_type classzone_idx; + int kswapd_order; + enum zone_type kswapd_classzone_idx; + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; @@ -684,6 +672,23 @@ typedef struct pglist_data { /* Number of pages migrated during the rate limiting time interval */ unsigned long numabalancing_migrate_nr_pages; #endif + /* + * This is a per-node reserve of pages that are not available + * to userspace allocations. + */ + unsigned long totalreserve_pages; + +#ifdef CONFIG_NUMA + /* + * zone reclaim becomes active if more unmapped pages exist. + */ + unsigned long min_unmapped_pages; + unsigned long min_slab_pages; +#endif /* CONFIG_NUMA */ + + /* Write-intensive fields used by page reclaim */ + ZONE_PADDING(_pad1_) + spinlock_t lru_lock; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -698,6 +703,23 @@ typedef struct pglist_data { struct list_head split_queue; unsigned long split_queue_len; #endif + + /* Fields commonly accessed by the page reclaim scanner */ + struct lruvec lruvec; + + /* + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on + * this node's LRU. Maintained by the pageout code. + */ + unsigned int inactive_ratio; + + unsigned long flags; + + ZONE_PADDING(_pad2_) + + /* Per-node vmstats */ + struct per_cpu_nodestat __percpu *per_cpu_nodestats; + atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) @@ -711,6 +733,15 @@ typedef struct pglist_data { #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) +static inline spinlock_t *zone_lru_lock(struct zone *zone) +{ + return &zone->zone_pgdat->lru_lock; +} + +static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) +{ + return &pgdat->lruvec; +} static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { @@ -746,8 +777,12 @@ static inline bool is_dev_zone(const struct zone *zone) extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); +bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + int classzone_idx, unsigned int alloc_flags, + long free_pages); bool zone_watermark_ok(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx, int alloc_flags); + unsigned long mark, int classzone_idx, + unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx); enum memmap_context { @@ -759,12 +794,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, extern void lruvec_init(struct lruvec *lruvec); -static inline struct zone *lruvec_zone(struct lruvec *lruvec) +static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) { #ifdef CONFIG_MEMCG - return lruvec->zone; + return lruvec->pgdat; #else - return container_of(lruvec, struct zone, lruvec); + return container_of(lruvec, struct pglist_data, lruvec); #endif } @@ -828,10 +863,7 @@ static inline int is_highmem_idx(enum zone_type idx) static inline int is_highmem(struct zone *zone) { #ifdef CONFIG_HIGHMEM - int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones; - return zone_off == ZONE_HIGHMEM * sizeof(*zone) || - (zone_off == ZONE_MOVABLE * sizeof(*zone) && - zone_movable_is_highmem()); + return is_highmem_idx(zone_idx(zone)); #else return 0; #endif @@ -922,6 +954,10 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) #endif /* CONFIG_NUMA */ } +struct zoneref *__next_zones_zonelist(struct zoneref *z, + enum zone_type highest_zoneidx, + nodemask_t *nodes); + /** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point * @z - The cursor used as a starting point for the search @@ -934,9 +970,14 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) * being examined. It should be advanced by one before calling * next_zones_zonelist again. */ -struct zoneref *next_zones_zonelist(struct zoneref *z, +static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, - nodemask_t *nodes); + nodemask_t *nodes) +{ + if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx)) + return z; + return __next_zones_zonelist(z, highest_zoneidx, nodes); +} /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist @@ -952,13 +993,10 @@ struct zoneref *next_zones_zonelist(struct zoneref *z, */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, - nodemask_t *nodes, - struct zone **zone) + nodemask_t *nodes) { - struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs, + return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes); - *zone = zonelist_zone(z); - return z; } /** @@ -973,10 +1011,17 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, * within a given nodemask */ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ - for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ + for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ - zone = zonelist_zone(z)) \ + zone = zonelist_zone(z)) + +#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ + for (zone = z->zone; \ + zone; \ + z = next_zones_zonelist(++z, highidx, nodemask), \ + zone = zonelist_zone(z)) + /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index @@ -1056,7 +1101,7 @@ struct mem_section { unsigned long *pageblock_flags; #ifdef CONFIG_PAGE_EXTENSION /* - * If !SPARSEMEM, pgdat doesn't have page_ext pointer. We use + * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use * section. (see page_ext.h about this.) */ struct page_ext *page_ext; |