aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--include/linux/bpf.h57
-rw-r--r--include/linux/memcontrol.h215
-rw-r--r--include/linux/mm.h22
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/page-flags.h11
-rw-r--r--include/trace/events/writeback.h2
-rw-r--r--kernel/bpf/arraymap.c30
-rw-r--r--kernel/bpf/bpf_local_storage.c20
-rw-r--r--kernel/bpf/bpf_struct_ops.c19
-rw-r--r--kernel/bpf/core.c22
-rw-r--r--kernel/bpf/cpumap.c37
-rw-r--r--kernel/bpf/devmap.c25
-rw-r--r--kernel/bpf/hashtab.c43
-rw-r--r--kernel/bpf/local_storage.c44
-rw-r--r--kernel/bpf/lpm_trie.c19
-rw-r--r--kernel/bpf/queue_stack_maps.c16
-rw-r--r--kernel/bpf/reuseport_array.c12
-rw-r--r--kernel/bpf/ringbuf.c35
-rw-r--r--kernel/bpf/stackmap.c16
-rw-r--r--kernel/bpf/syscall.c234
-rw-r--r--kernel/fork.c7
-rw-r--r--mm/debug.c4
-rw-r--r--mm/huge_memory.c4
-rw-r--r--mm/memcontrol.c139
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_io.c6
-rw-r--r--mm/slab.h38
-rw-r--r--mm/workingset.c2
-rw-r--r--net/core/sock_map.c42
-rw-r--r--net/xdp/xskmap.c15
-rw-r--r--samples/bpf/map_perf_test_user.c6
-rw-r--r--samples/bpf/offwaketime_user.c6
-rw-r--r--samples/bpf/sockex2_user.c2
-rw-r--r--samples/bpf/sockex3_user.c2
-rw-r--r--samples/bpf/spintest_user.c6
-rw-r--r--samples/bpf/syscall_tp_user.c2
-rw-r--r--samples/bpf/task_fd_query_user.c6
-rw-r--r--samples/bpf/test_lru_dist.c3
-rw-r--r--samples/bpf/test_map_in_map_user.c6
-rw-r--r--samples/bpf/test_overhead_user.c2
-rw-r--r--samples/bpf/trace_event_user.c2
-rw-r--r--samples/bpf/tracex2_user.c6
-rw-r--r--samples/bpf/tracex3_user.c6
-rw-r--r--samples/bpf/tracex4_user.c6
-rw-r--r--samples/bpf/tracex5_user.c3
-rw-r--r--samples/bpf/tracex6_user.c3
-rw-r--r--samples/bpf/xdp1_user.c6
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c6
-rw-r--r--samples/bpf/xdp_monitor_user.c5
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c6
-rw-r--r--samples/bpf/xdp_redirect_map_user.c6
-rw-r--r--samples/bpf/xdp_redirect_user.c6
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c6
-rw-r--r--samples/bpf/xdp_rxq_info_user.c6
-rw-r--r--samples/bpf/xdp_sample_pkts_user.c6
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c6
-rw-r--r--samples/bpf/xdpsock_user.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c2
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c7
61 files changed, 533 insertions, 762 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 23f645657488..b56f99f82b5b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -657,7 +657,7 @@ int __set_page_dirty_buffers(struct page *page)
} while (bh != head);
}
/*
- * Lock out page->mem_cgroup migration to keep PageDirty
+ * Lock out page's memcg migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
lock_page_memcg(page);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 10cc7979ce38..16a1e82e3aeb 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -650,7 +650,7 @@ iomap_set_page_dirty(struct page *page)
return !TestSetPageDirty(page);
/*
- * Lock out page->mem_cgroup migration to keep PageDirty
+ * Lock out page's memcg migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
lock_page_memcg(page);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e1bcb6d7345c..a9de5711b23f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -20,6 +20,8 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/capability.h>
+#include <linux/sched/mm.h>
+#include <linux/slab.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -37,6 +39,7 @@ struct bpf_iter_aux_info;
struct bpf_local_storage;
struct bpf_local_storage_map;
struct kobject;
+struct mem_cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -135,11 +138,6 @@ struct bpf_map_ops {
const struct bpf_iter_seq_info *iter_seq_info;
};
-struct bpf_map_memory {
- u32 pages;
- struct user_struct *user;
-};
-
struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
@@ -160,7 +158,9 @@ struct bpf_map {
u32 btf_key_type_id;
u32 btf_value_type_id;
struct btf *btf;
- struct bpf_map_memory memory;
+#ifdef CONFIG_MEMCG_KMEM
+ struct mem_cgroup *memcg;
+#endif
char name[BPF_OBJ_NAME_LEN];
u32 btf_vmlinux_value_type_id;
bool bypass_spec_v1;
@@ -1202,8 +1202,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i);
void bpf_prog_inc(struct bpf_prog *prog);
struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
-int __bpf_prog_charge(struct user_struct *user, u32 pages);
-void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
void __bpf_free_used_maps(struct bpf_prog_aux *aux,
struct bpf_map **used_maps, u32 len);
@@ -1218,12 +1216,6 @@ void bpf_map_inc_with_uref(struct bpf_map *map);
struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
-int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
-void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
-int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size);
-void bpf_map_charge_finish(struct bpf_map_memory *mem);
-void bpf_map_charge_move(struct bpf_map_memory *dst,
- struct bpf_map_memory *src);
void *bpf_map_area_alloc(u64 size, int numa_node);
void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
void bpf_map_area_free(void *base);
@@ -1240,6 +1232,34 @@ int generic_map_delete_batch(struct bpf_map *map,
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
+#ifdef CONFIG_MEMCG_KMEM
+void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
+ int node);
+void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
+void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
+ size_t align, gfp_t flags);
+#else
+static inline void *
+bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
+ int node)
+{
+ return kmalloc_node(size, flags, node);
+}
+
+static inline void *
+bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
+{
+ return kzalloc(size, flags);
+}
+
+static inline void __percpu *
+bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
+ gfp_t flags)
+{
+ return __alloc_percpu_gfp(size, align, flags);
+}
+#endif
+
extern int sysctl_unprivileged_bpf_disabled;
static inline bool bpf_allow_ptr_leaks(void)
@@ -1490,15 +1510,6 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog)
return ERR_PTR(-EOPNOTSUPP);
}
-static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
-{
- return 0;
-}
-
-static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
-{
-}
-
static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops,
struct bpf_prog *prog)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e391e3c56de5..7c9d43476166 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -343,6 +343,175 @@ struct mem_cgroup {
extern struct mem_cgroup *root_mem_cgroup;
+enum page_memcg_data_flags {
+ /* page->memcg_data is a pointer to an objcgs vector */
+ MEMCG_DATA_OBJCGS = (1UL << 0),
+ /* page has been accounted as a non-slab kernel page */
+ MEMCG_DATA_KMEM = (1UL << 1),
+ /* the next bit after the last actual flag */
+ __NR_MEMCG_DATA_FLAGS = (1UL << 2),
+};
+
+#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
+
+/*
+ * page_memcg - get the memory cgroup associated with a page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the memory cgroup associated with the page,
+ * or NULL. This function assumes that the page is known to have a
+ * proper memory cgroup pointer. It's not safe to call this function
+ * against some type of pages, e.g. slab pages or ex-slab pages.
+ *
+ * Any of the following ensures page and memcg binding stability:
+ * - the page lock
+ * - LRU isolation
+ * - lock_page_memcg()
+ * - exclusive reference
+ */
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+ unsigned long memcg_data = page->memcg_data;
+
+ VM_BUG_ON_PAGE(PageSlab(page), page);
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
+
+ return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
+/*
+ * page_memcg_rcu - locklessly get the memory cgroup associated with a page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the memory cgroup associated with the page,
+ * or NULL. This function assumes that the page is known to have a
+ * proper memory cgroup pointer. It's not safe to call this function
+ * against some type of pages, e.g. slab pages or ex-slab pages.
+ */
+static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+{
+ VM_BUG_ON_PAGE(PageSlab(page), page);
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) &
+ ~MEMCG_DATA_FLAGS_MASK);
+}
+
+/*
+ * page_memcg_check - get the memory cgroup associated with a page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the memory cgroup associated with the page,
+ * or NULL. This function unlike page_memcg() can take any page
+ * as an argument. It has to be used in cases when it's not known if a page
+ * has an associated memory cgroup pointer or an object cgroups vector.
+ *
+ * Any of the following ensures page and memcg binding stability:
+ * - the page lock
+ * - LRU isolation
+ * - lock_page_memcg()
+ * - exclusive reference
+ */
+static inline struct mem_cgroup *page_memcg_check(struct page *page)
+{
+ /*
+ * Because page->memcg_data might be changed asynchronously
+ * for slab pages, READ_ONCE() should be used here.
+ */
+ unsigned long memcg_data = READ_ONCE(page->memcg_data);
+
+ if (memcg_data & MEMCG_DATA_OBJCGS)
+ return NULL;
+
+ return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
+/*
+ * PageMemcgKmem - check if the page has MemcgKmem flag set
+ * @page: a pointer to the page struct
+ *
+ * Checks if the page has MemcgKmem flag set. The caller must ensure that
+ * the page has an associated memory cgroup. It's not safe to call this function
+ * against some types of pages, e.g. slab pages.
+ */
+static inline bool PageMemcgKmem(struct page *page)
+{
+ VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page);
+ return page->memcg_data & MEMCG_DATA_KMEM;
+}
+
+#ifdef CONFIG_MEMCG_KMEM
+/*
+ * page_objcgs - get the object cgroups vector associated with a page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the object cgroups vector associated with the page,
+ * or NULL. This function assumes that the page is known to have an
+ * associated object cgroups vector. It's not safe to call this function
+ * against pages, which might have an associated memory cgroup: e.g.
+ * kernel stack pages.
+ */
+static inline struct obj_cgroup **page_objcgs(struct page *page)
+{
+ unsigned long memcg_data = READ_ONCE(page->memcg_data);
+
+ VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+
+ return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
+/*
+ * page_objcgs_check - get the object cgroups vector associated with a page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the object cgroups vector associated with the page,
+ * or NULL. This function is safe to use if the page can be directly associated
+ * with a memory cgroup.
+ */
+static inline struct obj_cgroup **page_objcgs_check(struct page *page)
+{
+ unsigned long memcg_data = READ_ONCE(page->memcg_data);
+
+ if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
+ return NULL;
+
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+
+ return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
+/*
+ * set_page_objcgs - associate a page with a object cgroups vector
+ * @page: a pointer to the page struct
+ * @objcgs: a pointer to the object cgroups vector
+ *
+ * Atomically associates a page with a vector of object cgroups.
+ */
+static inline bool set_page_objcgs(struct page *page,
+ struct obj_cgroup **objcgs)
+{
+ return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs |
+ MEMCG_DATA_OBJCGS);
+}
+#else
+static inline struct obj_cgroup **page_objcgs(struct page *page)
+{
+ return NULL;
+}
+
+static inline struct obj_cgroup **page_objcgs_check(struct page *page)
+{
+ return NULL;
+}
+
+static inline bool set_page_objcgs(struct page *page,
+ struct obj_cgroup **objcgs)
+{
+ return true;
+}
+#endif
+
static __always_inline bool memcg_stat_item_in_bytes(int idx)
{
if (idx == MEMCG_PERCPU_B)
@@ -743,15 +912,19 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
static inline void __mod_memcg_page_state(struct page *page,
int idx, int val)
{
- if (page->mem_cgroup)
- __mod_memcg_state(page->mem_cgroup, idx, val);
+ struct mem_cgroup *memcg = page_memcg(page);
+
+ if (memcg)
+ __mod_memcg_state(memcg, idx, val);
}
static inline void mod_memcg_page_state(struct page *page,
int idx, int val)
{
- if (page->mem_cgroup)
- mod_memcg_state(page->mem_cgroup, idx, val);
+ struct mem_cgroup *memcg = page_memcg(page);
+
+ if (memcg)
+ mod_memcg_state(memcg, idx, val);
}
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
@@ -834,16 +1007,17 @@ static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
struct page *head = compound_head(page); /* rmap on tail pages */
+ struct mem_cgroup *memcg = page_memcg(head);
pg_data_t *pgdat = page_pgdat(page);
struct lruvec *lruvec;
/* Untracked pages have no memcg, no lruvec. Update only the node */
- if (!head->mem_cgroup) {
+ if (!memcg) {
__mod_node_page_state(pgdat, idx, val);
return;
}
- lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat);
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
__mod_lruvec_state(lruvec, idx, val);
}
@@ -878,8 +1052,10 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
static inline void count_memcg_page_event(struct page *page,
enum vm_event_item idx)
{
- if (page->mem_cgroup)
- count_memcg_events(page->mem_cgroup, idx, 1);
+ struct mem_cgroup *memcg = page_memcg(page);
+
+ if (memcg)
+ count_memcg_events(memcg, idx, 1);
}
static inline void count_memcg_event_mm(struct mm_struct *mm,
@@ -941,6 +1117,27 @@ void mem_cgroup_split_huge_fixup(struct page *head);
struct mem_cgroup;
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+ return NULL;
+}
+
+static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return NULL;
+}
+
+static inline struct mem_cgroup *page_memcg_check(struct page *page)
+{
+ return NULL;
+}
+
+static inline bool PageMemcgKmem(struct page *page)
+{
+ return false;
+}
+
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
return true;
@@ -1430,7 +1627,7 @@ static inline void mem_cgroup_track_foreign_dirty(struct page *page,
if (mem_cgroup_disabled())
return;
- if (unlikely(&page->mem_cgroup->css != wb->memcg_css))
+ if (unlikely(&page_memcg(page)->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(page, wb);
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db6ae4d3fb4e..6b0c9d2c1d10 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1484,28 +1484,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif
}
-#ifdef CONFIG_MEMCG
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
- return page->mem_cgroup;
-}
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
- return READ_ONCE(page->mem_cgroup);
-}
-#else
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
- return NULL;
-}
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
- return NULL;
-}
-#endif
-
/*
* Some inline functions in vmstat.h depend on page_zone()
*/
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5a9238f6caad..80f5d755c037 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -199,10 +199,7 @@ struct page {
atomic_t _refcount;
#ifdef CONFIG_MEMCG
- union {
- struct mem_cgroup *mem_cgroup;
- struct obj_cgroup **obj_cgroups;
- };
+ unsigned long memcg_data;
#endif
/*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4f6ba9379112..fc0e1bd48e73 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -715,9 +715,8 @@ PAGEFLAG_FALSE(DoubleMap)
#define PAGE_MAPCOUNT_RESERVE -128
#define PG_buddy 0x00000080
#define PG_offline 0x00000100
-#define PG_kmemcg 0x00000200
-#define PG_table 0x00000400
-#define PG_guard 0x00000800
+#define PG_table 0x00000200
+#define PG_guard 0x00000400
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -769,12 +768,6 @@ PAGE_TYPE_OPS(Buddy, buddy)
PAGE_TYPE_OPS(Offline, offline)
/*
- * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
- * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
- */
-PAGE_TYPE_OPS(Kmemcg, kmemcg)
-
-/*
* Marks pages in use as page tables.
*/
PAGE_TYPE_OPS(Table, table)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index e7cbccc7c14c..39a40dfb578a 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -257,7 +257,7 @@ TRACE_EVENT(track_foreign_dirty,
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
- __entry->page_cgroup_ino = cgroup_ino(page->mem_cgroup->css.cgroup);
+ __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup);
),
TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c6c81eceb68f..1f8453343bf2 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -34,8 +34,8 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
int i;
for (i = 0; i < array->map.max_entries; i++) {
- ptr = __alloc_percpu_gfp(array->elem_size, 8,
- GFP_USER | __GFP_NOWARN);
+ ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
+ GFP_USER | __GFP_NOWARN);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
@@ -81,11 +81,10 @@ int array_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
- int ret, numa_node = bpf_map_attr_numa_node(attr);
+ int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool bypass_spec_v1 = bpf_bypass_spec_v1();
- u64 cost, array_size, mask64;
- struct bpf_map_memory mem;
+ u64 array_size, mask64;
struct bpf_array *array;
elem_size = round_up(attr->value_size, 8);
@@ -126,44 +125,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
}
}
- /* make sure there is no u32 overflow later in round_up() */
- cost = array_size;
- if (percpu)
- cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
-
- ret = bpf_map_charge_init(&mem, cost);
- if (ret < 0)
- return ERR_PTR(ret);
-
/* allocate all map elements and zero-initialize them */
if (attr->map_flags & BPF_F_MMAPABLE) {
void *data;
/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
data = bpf_map_area_mmapable_alloc(array_size, numa_node);
- if (!data) {
- bpf_map_charge_finish(&mem);
+ if (!data)
return ERR_PTR(-ENOMEM);
- }
array = data + PAGE_ALIGN(sizeof(struct bpf_array))
- offsetof(struct bpf_array, value);
} else {
array = bpf_map_area_alloc(array_size, numa_node);
}
- if (!array) {
- bpf_map_charge_finish(&mem);
+ if (!array)
return ERR_PTR(-ENOMEM);
- }
array->index_mask = index_mask;
array->map.bypass_spec_v1 = bypass_spec_v1;
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
- bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) {
- bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
@@ -1018,7 +1002,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
struct bpf_array_aux *aux;
struct bpf_map *map;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL);
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
if (!aux)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 5d3a7af9ba9b..dd5aedee99e7 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -67,7 +67,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;
- selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+ selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN);
if (selem) {
if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -264,7 +265,8 @@ int bpf_local_storage_alloc(void *owner,
if (err)
return err;
- storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+ storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
+ GFP_ATOMIC | __GFP_NOWARN);
if (!storage) {
err = -ENOMEM;
goto uncharge;
@@ -543,10 +545,8 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
struct bpf_local_storage_map *smap;
unsigned int i;
u32 nbuckets;
- u64 cost;
- int ret;
- smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
+ smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
@@ -555,18 +555,10 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
- cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
-
- ret = bpf_map_charge_init(&smap->map.memory, cost);
- if (ret < 0) {
- kfree(smap);
- return ERR_PTR(ret);
- }
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
- GFP_USER | __GFP_NOWARN);
+ GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap->buckets) {
- bpf_map_charge_finish(&smap->map.memory);
kfree(smap);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 4c3b543bb33b..1a666a975416 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -548,12 +548,10 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
{
const struct bpf_struct_ops *st_ops;
- size_t map_total_size, st_map_size;
+ size_t st_map_size;
struct bpf_struct_ops_map *st_map;
const struct btf_type *t, *vt;
- struct bpf_map_memory mem;
struct bpf_map *map;
- int err;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -573,20 +571,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
* struct bpf_struct_ops_tcp_congestions_ops
*/
(vt->size - sizeof(struct bpf_struct_ops_value));
- map_total_size = st_map_size +
- /* uvalue */
- sizeof(vt->size) +
- /* struct bpf_progs **progs */
- btf_type_vlen(t) * sizeof(struct bpf_prog *);
- err = bpf_map_charge_init(&mem, map_total_size);
- if (err < 0)
- return ERR_PTR(err);
st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
- if (!st_map) {
- bpf_map_charge_finish(&mem);
+ if (!st_map)
return ERR_PTR(-ENOMEM);
- }
+
st_map->st_ops = st_ops;
map = &st_map->map;
@@ -597,14 +586,12 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!st_map->uvalue || !st_map->progs || !st_map->image) {
bpf_struct_ops_map_free(map);
- bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
mutex_init(&st_map->lock);
set_vm_flush_reset_perms(st_map->image);
bpf_map_init_from_attr(map, attr);
- bpf_map_charge_move(&map->memory, &mem);
return map;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ff55cbcfbab4..261f8692d0d2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -77,7 +77,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
@@ -86,7 +86,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
if (fp == NULL)
return NULL;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
if (aux == NULL) {
vfree(fp);
return NULL;
@@ -106,7 +106,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *prog;
int cpu;
@@ -138,7 +138,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
sizeof(*prog->aux->jited_linfo),
- GFP_KERNEL | __GFP_NOWARN);
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!prog->aux->jited_linfo)
return -ENOMEM;
@@ -219,25 +219,17 @@ void bpf_prog_free_linfo(struct bpf_prog *prog)
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
- u32 pages, delta;
- int ret;
+ u32 pages;
size = round_up(size, PAGE_SIZE);
pages = size / PAGE_SIZE;
if (pages <= fp_old->pages)
return fp_old;
- delta = pages - fp_old->pages;
- ret = __bpf_prog_charge(fp_old->aux->user, delta);
- if (ret)
- return NULL;
-
fp = __vmalloc(size, gfp_flags);
- if (fp == NULL) {
- __bpf_prog_uncharge(fp_old->aux->user, delta);
- } else {
+ if (fp) {
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
fp->pages = pages;
fp->aux->prog = fp;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index c61a23b564aa..747313698178 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -84,8 +84,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
u32 value_size = attr->value_size;
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
- u64 cost;
- int ret;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -97,7 +95,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
- cmap = kzalloc(sizeof(*cmap), GFP_USER);
+ cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_ACCOUNT);
if (!cmap)
return ERR_PTR(-ENOMEM);
@@ -109,26 +107,14 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
goto free_cmap;
}
- /* make sure page count doesn't overflow */
- cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
-
- /* Notice returns -EPERM on if map size is larger than memlock limit */
- ret = bpf_map_charge_init(&cmap->map.memory, cost);
- if (ret) {
- err = ret;
- goto free_cmap;
- }
-
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
cmap->map.numa_node);
if (!cmap->cpu_map)
- goto free_charge;
+ goto free_cmap;
return &cmap->map;
-free_charge:
- bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
kfree(cmap);
return ERR_PTR(err);
@@ -412,7 +398,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
}
static struct bpf_cpu_map_entry *
-__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
+__cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
+ u32 cpu)
{
int numa, err, i, fd = value->bpf_prog.fd;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
@@ -422,13 +409,13 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
/* Have map->numa_node, but choose node of redirect target CPU */
numa = cpu_to_node(cpu);
- rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa);
+ rcpu = bpf_map_kmalloc_node(map, sizeof(*rcpu), gfp | __GFP_ZERO, numa);
if (!rcpu)
return NULL;
/* Alloc percpu bulkq */
- rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq),
- sizeof(void *), gfp);
+ rcpu->bulkq = bpf_map_alloc_percpu(map, sizeof(*rcpu->bulkq),
+ sizeof(void *), gfp);
if (!rcpu->bulkq)
goto free_rcu;
@@ -438,7 +425,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
}
/* Alloc queue */
- rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa);
+ rcpu->queue = bpf_map_kmalloc_node(map, sizeof(*rcpu->queue), gfp,
+ numa);
if (!rcpu->queue)
goto free_bulkq;
@@ -447,7 +435,7 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
goto free_queue;
rcpu->cpu = cpu;
- rcpu->map_id = map_id;
+ rcpu->map_id = map->id;
rcpu->value.qsize = value->qsize;
if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
@@ -455,7 +443,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
- "cpumap/%d/map:%d", cpu, map_id);
+ "cpumap/%d/map:%d", cpu,
+ map->id);
if (IS_ERR(rcpu->kthread))
goto free_prog;
@@ -571,7 +560,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
rcpu = NULL; /* Same as deleting */
} else {
/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
- rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);
+ rcpu = __cpu_map_entry_alloc(map, &cpumap_value, key_cpu);
if (!rcpu)
return -ENOMEM;
rcpu->cmap = cmap;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2b5ca93c17de..f6e9c68afdd4 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -109,8 +109,6 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
u32 valsize = attr->value_size;
- u64 cost = 0;
- int err;
/* check sanity of attributes. 2 value sizes supported:
* 4 bytes: ifindex
@@ -135,21 +133,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (!dtab->n_buckets) /* Overflow check */
return -EINVAL;
- cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
- } else {
- cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
}
- /* if map size is larger than memlock limit, reject it */
- err = bpf_map_charge_init(&dtab->map.memory, cost);
- if (err)
- return -EINVAL;
-
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
dtab->map.numa_node);
if (!dtab->dev_index_head)
- goto free_charge;
+ return -ENOMEM;
spin_lock_init(&dtab->index_lock);
} else {
@@ -157,14 +147,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
- goto free_charge;
+ return -ENOMEM;
}
return 0;
-
-free_charge:
- bpf_map_charge_finish(&dtab->map.memory);
- return -ENOMEM;
}
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -175,7 +161,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
- dtab = kzalloc(sizeof(*dtab), GFP_USER);
+ dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_ACCOUNT);
if (!dtab)
return ERR_PTR(-ENOMEM);
@@ -602,8 +588,9 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_prog *prog = NULL;
struct bpf_dtab_netdev *dev;
- dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
- dtab->map.numa_node);
+ dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
+ GFP_ATOMIC | __GFP_NOWARN,
+ dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index ec46266aaf1c..fe7a0733a63a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -292,7 +292,8 @@ static int prealloc_init(struct bpf_htab *htab)
u32 size = round_up(htab->map.value_size, 8);
void __percpu *pptr;
- pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
+ GFP_USER | __GFP_NOWARN);
if (!pptr)
goto free_elems;
htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
@@ -346,8 +347,8 @@ static int alloc_extra_elems(struct bpf_htab *htab)
struct pcpu_freelist_node *l;
int cpu;
- pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
- GFP_USER | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8,
+ GFP_USER | __GFP_NOWARN);
if (!pptr)
return -ENOMEM;
@@ -442,9 +443,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
struct bpf_htab *htab;
int err, i;
- u64 cost;
- htab = kzalloc(sizeof(*htab), GFP_USER);
+ htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -480,30 +480,18 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
htab->n_buckets > U32_MAX / sizeof(struct bucket))
goto free_htab;
- cost = (u64) htab->n_buckets * sizeof(struct bucket) +
- (u64) htab->elem_size * htab->map.max_entries;
-
- if (percpu)
- cost += (u64) round_up(htab->map.value_size, 8) *
- num_possible_cpus() * htab->map.max_entries;
- else
- cost += (u64) htab->elem_size * num_possible_cpus();
-
- /* if map size is larger than memlock limit, reject it */
- err = bpf_map_charge_init(&htab->map.memory, cost);
- if (err)
- goto free_htab;
-
err = -ENOMEM;
htab->buckets = bpf_map_area_alloc(htab->n_buckets *
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
- goto free_charge;
+ goto free_htab;
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
- htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int),
- sizeof(int), GFP_USER);
+ htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map,
+ sizeof(int),
+ sizeof(int),
+ GFP_USER);
if (!htab->map_locked[i])
goto free_map_locked;
}
@@ -538,8 +526,6 @@ free_map_locked:
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
free_percpu(htab->map_locked[i]);
bpf_map_area_free(htab->buckets);
-free_charge:
- bpf_map_charge_finish(&htab->map.memory);
free_htab:
lockdep_unregister_key(&htab->lockdep_key);
kfree(htab);
@@ -925,8 +911,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
l_new = ERR_PTR(-E2BIG);
goto dec_count;
}
- l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
- htab->map.numa_node);
+ l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN,
+ htab->map.numa_node);
if (!l_new) {
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
@@ -942,8 +929,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
pptr = htab_elem_get_ptr(l_new, key_size);
} else {
/* alloc_percpu zero-fills */
- pptr = __alloc_percpu_gfp(size, 8,
- GFP_ATOMIC | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
+ GFP_ATOMIC | __GFP_NOWARN);
if (!pptr) {
kfree(l_new);
l_new = ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 571bb351ed3b..2d4f9ac12377 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -164,10 +164,10 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
return 0;
}
- new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
- map->value_size,
- __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
- map->numa_node);
+ new = bpf_map_kmalloc_node(map, sizeof(struct bpf_storage_buffer) +
+ map->value_size,
+ __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
+ map->numa_node);
if (!new)
return -ENOMEM;
@@ -287,8 +287,6 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
- struct bpf_map_memory mem;
- int ret;
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
attr->key_size != sizeof(__u64))
@@ -308,18 +306,10 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);
- ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
- if (ret < 0)
- return ERR_PTR(ret);
-
map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
- __GFP_ZERO | GFP_USER, numa_node);
- if (!map) {
- bpf_map_charge_finish(&mem);
+ __GFP_ZERO | GFP_USER | __GFP_ACCOUNT, numa_node);
+ if (!map)
return ERR_PTR(-ENOMEM);
- }
-
- bpf_map_charge_move(&map->map.memory, &mem);
/* copy mandatory map attributes */
bpf_map_init_from_attr(&map->map, attr);
@@ -496,9 +486,9 @@ static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
enum bpf_cgroup_storage_type stype)
{
+ const gfp_t gfp = __GFP_ZERO | GFP_USER;
struct bpf_cgroup_storage *storage;
struct bpf_map *map;
- gfp_t flags;
size_t size;
u32 pages;
@@ -508,23 +498,19 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
size = bpf_cgroup_storage_calculate_size(map, &pages);
- if (bpf_map_charge_memlock(map, pages))
- return ERR_PTR(-EPERM);
-
- storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
- __GFP_ZERO | GFP_USER, map->numa_node);
+ storage = bpf_map_kmalloc_node(map, sizeof(struct bpf_cgroup_storage),
+ gfp, map->numa_node);
if (!storage)
goto enomem;
- flags = __GFP_ZERO | GFP_USER;
-
if (stype == BPF_CGROUP_STORAGE_SHARED) {
- storage->buf = kmalloc_node(size, flags, map->numa_node);
+ storage->buf = bpf_map_kmalloc_node(map, size, gfp,
+ map->numa_node);
if (!storage->buf)
goto enomem;
check_and_init_map_lock(map, storage->buf->data);
} else {
- storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
+ storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp);
if (!storage->percpu_buf)
goto enomem;
}
@@ -534,7 +520,6 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
return storage;
enomem:
- bpf_map_uncharge_memlock(map, pages);
kfree(storage);
return ERR_PTR(-ENOMEM);
}
@@ -561,16 +546,11 @@ void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
{
enum bpf_cgroup_storage_type stype;
struct bpf_map *map;
- u32 pages;
if (!storage)
return;
map = &storage->map->map;
-
- bpf_cgroup_storage_calculate_size(map, &pages);
- bpf_map_uncharge_memlock(map, pages);
-
stype = cgroup_storage_type(map);
if (stype == BPF_CGROUP_STORAGE_SHARED)
call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 00e32f2ec3e6..cec792a17e5f 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -282,8 +282,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
if (value)
size += trie->map.value_size;
- node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN,
- trie->map.numa_node);
+ node = bpf_map_kmalloc_node(&trie->map, size, GFP_ATOMIC | __GFP_NOWARN,
+ trie->map.numa_node);
if (!node)
return NULL;
@@ -540,8 +540,6 @@ out:
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
struct lpm_trie *trie;
- u64 cost = sizeof(*trie), cost_per_node;
- int ret;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -557,7 +555,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
attr->value_size > LPM_VAL_SIZE_MAX)
return ERR_PTR(-EINVAL);
- trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN);
+ trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!trie)
return ERR_PTR(-ENOMEM);
@@ -567,20 +565,9 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key, data);
trie->max_prefixlen = trie->data_size * 8;
- cost_per_node = sizeof(struct lpm_trie_node) +
- attr->value_size + trie->data_size;
- cost += (u64) attr->max_entries * cost_per_node;
-
- ret = bpf_map_charge_init(&trie->map.memory, cost);
- if (ret)
- goto out_err;
-
spin_lock_init(&trie->lock);
return &trie->map;
-out_err:
- kfree(trie);
- return ERR_PTR(ret);
}
static void trie_free(struct bpf_map *map)
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 0ee2347ba510..f9c734aaa990 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -66,29 +66,21 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
{
- int ret, numa_node = bpf_map_attr_numa_node(attr);
- struct bpf_map_memory mem = {0};
+ int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_queue_stack *qs;
- u64 size, queue_size, cost;
+ u64 size, queue_size;
size = (u64) attr->max_entries + 1;
- cost = queue_size = sizeof(*qs) + size * attr->value_size;
-
- ret = bpf_map_charge_init(&mem, cost);
- if (ret < 0)
- return ERR_PTR(ret);
+ queue_size = sizeof(*qs) + size * attr->value_size;
qs = bpf_map_area_alloc(queue_size, numa_node);
- if (!qs) {
- bpf_map_charge_finish(&mem);
+ if (!qs)
return ERR_PTR(-ENOMEM);
- }
memset(qs, 0, sizeof(*qs));
bpf_map_init_from_attr(&qs->map, attr);
- bpf_map_charge_move(&qs->map.memory, &mem);
qs->size = size;
raw_spin_lock_init(&qs->lock);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index a55cd542f2ce..4838922f723d 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -150,9 +150,8 @@ static void reuseport_array_free(struct bpf_map *map)
static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
{
- int err, numa_node = bpf_map_attr_numa_node(attr);
+ int numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array;
- struct bpf_map_memory mem;
u64 array_size;
if (!bpf_capable())
@@ -161,20 +160,13 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
array_size = sizeof(*array);
array_size += (u64)attr->max_entries * sizeof(struct sock *);
- err = bpf_map_charge_init(&mem, array_size);
- if (err)
- return ERR_PTR(err);
-
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
- if (!array) {
- bpf_map_charge_finish(&mem);
+ if (!array)
return ERR_PTR(-ENOMEM);
- }
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
- bpf_map_charge_move(&array->map.memory, &mem);
return &array->map;
}
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 31cb04a4dd2d..f25b719ac786 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -48,7 +48,6 @@ struct bpf_ringbuf {
struct bpf_ringbuf_map {
struct bpf_map map;
- struct bpf_map_memory memory;
struct bpf_ringbuf *rb;
};
@@ -60,8 +59,8 @@ struct bpf_ringbuf_hdr {
static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
{
- const gfp_t flags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
- __GFP_ZERO;
+ const gfp_t flags = GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL |
+ __GFP_NOWARN | __GFP_ZERO;
int nr_meta_pages = RINGBUF_PGOFF + RINGBUF_POS_PAGES;
int nr_data_pages = data_sz >> PAGE_SHIFT;
int nr_pages = nr_meta_pages + nr_data_pages;
@@ -88,10 +87,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
* user-space implementations significantly.
*/
array_size = (nr_meta_pages + 2 * nr_data_pages) * sizeof(*pages);
- if (array_size > PAGE_SIZE)
- pages = vmalloc_node(array_size, numa_node);
- else
- pages = kmalloc_node(array_size, flags, numa_node);
+ pages = bpf_map_area_alloc(array_size, numa_node);
if (!pages)
return NULL;
@@ -134,7 +130,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb = bpf_ringbuf_area_alloc(data_sz, numa_node);
if (!rb)
- return ERR_PTR(-ENOMEM);
+ return NULL;
spin_lock_init(&rb->spinlock);
init_waitqueue_head(&rb->waitq);
@@ -150,8 +146,6 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
{
struct bpf_ringbuf_map *rb_map;
- u64 cost;
- int err;
if (attr->map_flags & ~RINGBUF_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
@@ -167,32 +161,19 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
return ERR_PTR(-E2BIG);
#endif
- rb_map = kzalloc(sizeof(*rb_map), GFP_USER);
+ rb_map = kzalloc(sizeof(*rb_map), GFP_USER | __GFP_ACCOUNT);
if (!rb_map)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&rb_map->map, attr);
- cost = sizeof(struct bpf_ringbuf_map) +
- sizeof(struct bpf_ringbuf) +
- attr->max_entries;
- err = bpf_map_charge_init(&rb_map->map.memory, cost);
- if (err)
- goto err_free_map;
-
rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node);
- if (IS_ERR(rb_map->rb)) {
- err = PTR_ERR(rb_map->rb);
- goto err_uncharge;
+ if (!rb_map->rb) {
+ kfree(rb_map);
+ return ERR_PTR(-ENOMEM);
}
return &rb_map->map;
-
-err_uncharge:
- bpf_map_charge_finish(&rb_map->map.memory);
-err_free_map:
- kfree(rb_map);
- return ERR_PTR(err);
}
static void bpf_ringbuf_free(struct bpf_ringbuf *rb)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 06065fa27124..3325add8e629 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -90,7 +90,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
struct bpf_stack_map *smap;
- struct bpf_map_memory mem;
u64 cost, n_buckets;
int err;
@@ -119,15 +118,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
- err = bpf_map_charge_init(&mem, cost);
- if (err)
- return ERR_PTR(err);
-
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
- if (!smap) {
- bpf_map_charge_finish(&mem);
+ if (!smap)
return ERR_PTR(-ENOMEM);
- }
bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
@@ -135,20 +128,17 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
- goto free_charge;
+ goto free_smap;
err = prealloc_elems_and_freelist(smap);
if (err)
goto put_buffers;
- bpf_map_charge_move(&smap->map.memory, &mem);
-
return &smap->map;
put_buffers:
put_callchain_buffers();
-free_charge:
- bpf_map_charge_finish(&mem);
+free_smap:
bpf_map_area_free(smap);
return ERR_PTR(err);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f3fe9f53f93c..d16dd4945100 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -31,6 +31,7 @@
#include <linux/poll.h>
#include <linux/bpf-netns.h>
#include <linux/rcupdate_trace.h>
+#include <linux/memcontrol.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -127,7 +128,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
-static u32 bpf_map_value_size(struct bpf_map *map)
+static u32 bpf_map_value_size(const struct bpf_map *map)
{
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -267,6 +268,10 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
return err;
}
+/* Please, do not use this function outside from the map creation path
+ * (e.g. in map update path) without taking care of setting the active
+ * memory cgroup (see at bpf_map_kmalloc_node() for example).
+ */
static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
{
/* We really just want to fail instead of triggering OOM killer
@@ -279,7 +284,7 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
* __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO;
+ const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
unsigned int flags = 0;
unsigned long align = 1;
void *area;
@@ -341,77 +346,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
map->numa_node = bpf_map_attr_numa_node(attr);
}
-static int bpf_charge_memlock(struct user_struct *user, u32 pages)
-{
- unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
- atomic_long_sub(pages, &user->locked_vm);
- return -EPERM;
- }
- return 0;
-}
-
-static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
-{
- if (user)
- atomic_long_sub(pages, &user->locked_vm);
-}
-
-int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
-{
- u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
- struct user_struct *user;
- int ret;
-
- if (size >= U32_MAX - PAGE_SIZE)
- return -E2BIG;
-
- user = get_current_user();
- ret = bpf_charge_memlock(user, pages);
- if (ret) {
- free_uid(user);
- return ret;
- }
-
- mem->pages = pages;
- mem->user = user;
-
- return 0;
-}
-
-void bpf_map_charge_finish(struct bpf_map_memory *mem)
-{
- bpf_uncharge_memlock(mem->user, mem->pages);
- free_uid(mem->user);
-}
-
-void bpf_map_charge_move(struct bpf_map_memory *dst,
- struct bpf_map_memory *src)
-{
- *dst = *src;
-
- /* Make sure src will not be used for the redundant uncharging. */
- memset(src, 0, sizeof(struct bpf_map_memory));
-}
-
-int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
-{
- int ret;
-
- ret = bpf_charge_memlock(map->memory.user, pages);
- if (ret)
- return ret;
- map->memory.pages += pages;
- return ret;
-}
-
-void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
-{
- bpf_uncharge_memlock(map->memory.user, pages);
- map->memory.pages -= pages;
-}
-
static int bpf_map_alloc_id(struct bpf_map *map)
{
int id;
@@ -456,17 +390,74 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
__release(&map_idr_lock);
}
+#ifdef CONFIG_MEMCG_KMEM
+static void bpf_map_save_memcg(struct bpf_map *map)
+{
+ map->memcg = get_mem_cgroup_from_mm(current->mm);
+}
+
+static void bpf_map_release_memcg(struct bpf_map *map)
+{
+ mem_cgroup_put(map->memcg);
+}
+
+void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
+ int node)
+{
+ struct mem_cgroup *old_memcg;
+ void *ptr;
+
+ old_memcg = set_active_memcg(map->memcg);
+ ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
+ set_active_memcg(old_memcg);
+
+ return ptr;
+}
+
+void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
+{
+ struct mem_cgroup *old_memcg;
+ void *ptr;
+
+ old_memcg = set_active_memcg(map->memcg);
+ ptr = kzalloc(size, flags | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+
+ return ptr;
+}
+
+void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
+ size_t align, gfp_t flags)
+{
+ struct mem_cgroup *old_memcg;
+ void __percpu *ptr;
+
+ old_memcg = set_active_memcg(map->memcg);
+ ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+
+ return ptr;
+}
+
+#else
+static void bpf_map_save_memcg(struct bpf_map *map)
+{
+}
+
+static void bpf_map_release_memcg(struct bpf_map *map)
+{
+}
+#endif
+
/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
- struct bpf_map_memory mem;
- bpf_map_charge_move(&mem, &map->memory);
security_bpf_map_free(map);
+ bpf_map_release_memcg(map);
/* implementation dependent freeing */
map->ops->map_free(map);
- bpf_map_charge_finish(&mem);
}
static void bpf_map_put_uref(struct bpf_map *map)
@@ -527,6 +518,19 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
}
#ifdef CONFIG_PROC_FS
+/* Provides an approximation of the map's memory footprint.
+ * Used only to provide a backward compatibility and display
+ * a reasonable "memlock" info.
+ */
+static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
+{
+ unsigned long size;
+
+ size = round_up(map->key_size + bpf_map_value_size(map), 8);
+
+ return round_up(map->max_entries * size, PAGE_SIZE);
+}
+
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_map *map = filp->private_data;
@@ -545,7 +549,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
"value_size:\t%u\n"
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
- "memlock:\t%llu\n"
+ "memlock:\t%lu\n"
"map_id:\t%u\n"
"frozen:\t%u\n",
map->map_type,
@@ -553,7 +557,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->value_size,
map->max_entries,
map->map_flags,
- map->memory.pages * 1ULL << PAGE_SHIFT,
+ bpf_map_memory_footprint(map),
map->id,
READ_ONCE(map->frozen));
if (type) {
@@ -796,7 +800,6 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
- struct bpf_map_memory mem;
struct bpf_map *map;
int f_flags;
int err;
@@ -875,6 +878,8 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map_sec;
+ bpf_map_save_memcg(map);
+
err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
/* failed to allocate fd.
@@ -893,9 +898,7 @@ free_map_sec:
security_bpf_map_free(map);
free_map:
btf_put(map->btf);
- bpf_map_charge_move(&mem, &map->memory);
map->ops->map_free(map);
- bpf_map_charge_finish(&mem);
return err;
}
@@ -1629,51 +1632,6 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
audit_log_end(ab);
}
-int __bpf_prog_charge(struct user_struct *user, u32 pages)
-{
- unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- unsigned long user_bufs;
-
- if (user) {
- user_bufs = atomic_long_add_return(pages, &user->locked_vm);
- if (user_bufs > memlock_limit) {
- atomic_long_sub(pages, &user->locked_vm);
- return -EPERM;
- }
- }
-
- return 0;
-}
-
-void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
-{
- if (user)
- atomic_long_sub(pages, &user->locked_vm);
-}
-
-static int bpf_prog_charge_memlock(struct bpf_prog *prog)
-{
- struct user_struct *user = get_current_user();
- int ret;
-
- ret = __bpf_prog_charge(user, prog->pages);
- if (ret) {
- free_uid(user);
- return ret;
- }
-
- prog->aux->user = user;
- return 0;
-}
-
-static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
-{
- struct user_struct *user = prog->aux->user;
-
- __bpf_prog_uncharge(user, prog->pages);
- free_uid(user);
-}
-
static int bpf_prog_alloc_id(struct bpf_prog *prog)
{
int id;
@@ -1723,7 +1681,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
kvfree(aux->func_info);
kfree(aux->func_info_aux);
- bpf_prog_uncharge_memlock(aux->prog);
+ free_uid(aux->user);
security_bpf_prog_free(aux);
bpf_prog_free(aux->prog);
}
@@ -2161,7 +2119,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
dst_prog = bpf_prog_get(attr->attach_prog_fd);
if (IS_ERR(dst_prog)) {
err = PTR_ERR(dst_prog);
- goto free_prog_nouncharge;
+ goto free_prog;
}
prog->aux->dst_prog = dst_prog;
}
@@ -2171,18 +2129,15 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
err = security_bpf_prog_alloc(prog->aux);
if (err)
- goto free_prog_nouncharge;
-
- err = bpf_prog_charge_memlock(prog);
- if (err)
- goto free_prog_sec;
+ goto free_prog;
+ prog->aux->user = get_current_user();
prog->len = attr->insn_cnt;
err = -EFAULT;
if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
bpf_prog_insn_size(prog)) != 0)
- goto free_prog;
+ goto free_prog_sec;
prog->orig_prog = NULL;
prog->jited = 0;
@@ -2193,19 +2148,19 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_offload_init(prog, attr);
if (err)
- goto free_prog;
+ goto free_prog_sec;
}
/* find program type: socket_filter vs tracing_filter */
err = find_prog_type(type, prog);
if (err < 0)
- goto free_prog;
+ goto free_prog_sec;
prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
sizeof(attr->prog_name));
if (err < 0)
- goto free_prog;
+ goto free_prog_sec;
/* run eBPF verifier */
err = bpf_check(&prog, attr, uattr);
@@ -2250,11 +2205,10 @@ free_used_maps:
*/
__bpf_prog_put_noref(prog, prog->aux->func_cnt);
return err;
-free_prog:
- bpf_prog_uncharge_memlock(prog);
free_prog_sec:
+ free_uid(prog->aux->user);
security_bpf_prog_free(prog->aux);
-free_prog_nouncharge:
+free_prog:
bpf_prog_free(prog);
return err;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 6d266388d380..cbd4f6f58409 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -404,9 +404,10 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
/*
- * If memcg_kmem_charge_page() fails, page->mem_cgroup
- * pointer is NULL, and memcg_kmem_uncharge_page() in
- * free_thread_stack() will ignore this page.
+ * If memcg_kmem_charge_page() fails, page's
+ * memory cgroup pointer is NULL, and
+ * memcg_kmem_uncharge_page() in free_thread_stack()
+ * will ignore this page.
*/
ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
0);
diff --git a/mm/debug.c b/mm/debug.c
index ccca576b2899..8a40b3fefbeb 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -182,8 +182,8 @@ hex_only:
pr_warn("page dumped because: %s\n", reason);
#ifdef CONFIG_MEMCG
- if (!page_poisoned && page->mem_cgroup)
- pr_warn("page->mem_cgroup:%px\n", page->mem_cgroup);
+ if (!page_poisoned && page->memcg_data)
+ pr_warn("pages's memcg:%lx\n", page->memcg_data);
#endif
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9474dbc150ed..cedfb3503411 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -470,7 +470,7 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
#ifdef CONFIG_MEMCG
static inline struct deferred_split *get_deferred_split_queue(struct page *page)
{
- struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(compound_head(page));
struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
if (memcg)
@@ -2765,7 +2765,7 @@ void deferred_split_huge_page(struct page *page)
{
struct deferred_split *ds_queue = get_deferred_split_queue(page);
#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(compound_head(page));
#endif
unsigned long flags;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3dcbf24d2227..e0366e306221 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -533,7 +533,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
{
struct mem_cgroup *memcg;
- memcg = page->mem_cgroup;
+ memcg = page_memcg(page);
if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
memcg = root_mem_cgroup;
@@ -560,16 +560,7 @@ ino_t page_cgroup_ino(struct page *page)
unsigned long ino = 0;
rcu_read_lock();
- memcg = page->mem_cgroup;
-
- /*
- * The lowest bit set means that memcg isn't a valid
- * memcg pointer, but a obj_cgroups pointer.
- * In this case the page is shared and doesn't belong
- * to any specific memory cgroup.
- */
- if ((unsigned long) memcg & 0x1UL)
- memcg = NULL;
+ memcg = page_memcg_check(page);
while (memcg && !(memcg->css.flags & CSS_ONLINE))
memcg = parent_mem_cgroup(memcg);
@@ -1050,7 +1041,7 @@ EXPORT_SYMBOL(get_mem_cgroup_from_mm);
*/
struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
{
- struct mem_cgroup *memcg = page->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(page);
if (mem_cgroup_disabled())
return NULL;
@@ -1349,7 +1340,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
goto out;
}
- memcg = page->mem_cgroup;
+ memcg = page_memcg(page);
/*
* Swapcache readahead pages are added to the LRU - and
* possibly migrated - before they are charged.
@@ -2109,7 +2100,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
}
/**
- * lock_page_memcg - lock a page->mem_cgroup binding
+ * lock_page_memcg - lock a page and memcg binding
* @page: the page
*
* This function protects unlocked LRU pages from being moved to
@@ -2141,7 +2132,7 @@ struct mem_cgroup *lock_page_memcg(struct page *page)
if (mem_cgroup_disabled())
return NULL;
again:
- memcg = head->mem_cgroup;
+ memcg = page_memcg(head);
if (unlikely(!memcg))
return NULL;
@@ -2149,7 +2140,7 @@ again:
return memcg;
spin_lock_irqsave(&memcg->move_lock, flags);
- if (memcg != head->mem_cgroup) {
+ if (memcg != page_memcg(head)) {
spin_unlock_irqrestore(&memcg->move_lock, flags);
goto again;
}
@@ -2187,14 +2178,14 @@ void __unlock_page_memcg(struct mem_cgroup *memcg)
}
/**
- * unlock_page_memcg - unlock a page->mem_cgroup binding
+ * unlock_page_memcg - unlock a page and memcg binding
* @page: the page
*/
void unlock_page_memcg(struct page *page)
{
struct page *head = compound_head(page);
- __unlock_page_memcg(head->mem_cgroup);
+ __unlock_page_memcg(page_memcg(head));
}
EXPORT_SYMBOL(unlock_page_memcg);
@@ -2884,7 +2875,7 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
static void commit_charge(struct page *page, struct mem_cgroup *memcg)
{
- VM_BUG_ON_PAGE(page->mem_cgroup, page);
+ VM_BUG_ON_PAGE(page_memcg(page), page);
/*
* Any of the following ensures page->mem_cgroup stability:
*
@@ -2893,7 +2884,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg)
* - lock_page_memcg()
* - exclusive reference
*/
- page->mem_cgroup = memcg;
+ page->memcg_data = (unsigned long)memcg;
}
#ifdef CONFIG_MEMCG_KMEM
@@ -2908,8 +2899,7 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
if (!vec)
return -ENOMEM;
- if (cmpxchg(&page->obj_cgroups, NULL,
- (struct obj_cgroup **) ((unsigned long)vec | 0x1UL)))
+ if (!set_page_objcgs(page, vec))
kfree(vec);
else
kmemleak_not_leak(vec);
@@ -2920,6 +2910,12 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
/*
* Returns a pointer to the memory cgroup to which the kernel object is charged.
*
+ * A passed kernel object can be a slab object or a generic kernel page, so
+ * different mechanisms for getting the memory cgroup pointer should be used.
+ * In certain cases (e.g. kernel stacks or large kmallocs with SLUB) the caller
+ * can not know for sure how the kernel object is implemented.
+ * mem_cgroup_from_obj() can be safely used in such cases.
+ *
* The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(),
* cgroup_mutex, etc.
*/
@@ -2933,35 +2929,30 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p)
page = virt_to_head_page(p);
/*
- * If page->mem_cgroup is set, it's either a simple mem_cgroup pointer
- * or a pointer to obj_cgroup vector. In the latter case the lowest
- * bit of the pointer is set.
- * The page->mem_cgroup pointer can be asynchronously changed
- * from NULL to (obj_cgroup_vec | 0x1UL), but can't be changed
- * from a valid memcg pointer to objcg vector or back.
- */
- if (!page->mem_cgroup)
- return NULL;
-
- /*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* the page->obj_cgroups.
*/
- if (page_has_obj_cgroups(page)) {
+ if (page_objcgs_check(page)) {
struct obj_cgroup *objcg;
unsigned int off;
off = obj_to_index(page->slab_cache, page, p);
- objcg = page_obj_cgroups(page)[off];
+ objcg = page_objcgs(page)[off];
if (objcg)
return obj_cgroup_memcg(objcg);
return NULL;
}
- /* All other pages use page->mem_cgroup */
- return page->mem_cgroup;
+ /*
+ * page_memcg_check() is used here, because page_has_obj_cgroups()
+ * check above could fail because the object cgroups vector wasn't set
+ * at that moment, but it can be set concurrently.
+ * page_memcg_check(page) will guarantee that a proper memory
+ * cgroup pointer or NULL will be returned.
+ */
+ return page_memcg_check(page);
}
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
@@ -3099,8 +3090,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
if (memcg && !mem_cgroup_is_root(memcg)) {
ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
if (!ret) {
- page->mem_cgroup = memcg;
- __SetPageKmemcg(page);
+ page->memcg_data = (unsigned long)memcg |
+ MEMCG_DATA_KMEM;
return 0;
}
css_put(&memcg->css);
@@ -3115,7 +3106,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
*/
void __memcg_kmem_uncharge_page(struct page *page, int order)
{
- struct mem_cgroup *memcg = page->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(page);
unsigned int nr_pages = 1 << order;
if (!memcg)
@@ -3123,12 +3114,8 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
__memcg_kmem_uncharge(memcg, nr_pages);
- page->mem_cgroup = NULL;
+ page->memcg_data = 0;
css_put(&memcg->css);
-
- /* slab pages do not have PageKmemcg flag set */
- if (PageKmemcg(page))
- __ClearPageKmemcg(page);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
@@ -3274,7 +3261,7 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
- struct mem_cgroup *memcg = head->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(head);
int i;
if (mem_cgroup_disabled())
@@ -3282,7 +3269,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
for (i = 1; i < HPAGE_PMD_NR; i++) {
css_get(&memcg->css);
- head[i].mem_cgroup = memcg;
+ head[i].memcg_data = (unsigned long)memcg;
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -4664,7 +4651,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
struct bdi_writeback *wb)
{
- struct mem_cgroup *memcg = page->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(page);
struct memcg_cgwb_frn *frn;
u64 now = get_jiffies_64();
u64 oldest_at = now;
@@ -5641,14 +5628,14 @@ static int mem_cgroup_move_account(struct page *page,
/*
* Prevent mem_cgroup_migrate() from looking at
- * page->mem_cgroup of its source page while we change it.
+ * page's memory cgroup of its source page while we change it.
*/
ret = -EBUSY;
if (!trylock_page(page))
goto out;
ret = -EINVAL;
- if (page->mem_cgroup != from)
+ if (page_memcg(page) != from)
goto out_unlock;
pgdat = page_pgdat(page);
@@ -5703,13 +5690,13 @@ static int mem_cgroup_move_account(struct page *page,
/*
* All state has been migrated, let's switch to the new memcg.
*
- * It is safe to change page->mem_cgroup here because the page
+ * It is safe to change page's memcg here because the page
* is referenced, charged, isolated, and locked: we can't race
* with (un)charging, migration, LRU putback, or anything else
- * that would rely on a stable page->mem_cgroup.
+ * that would rely on a stable page's memory cgroup.
*
* Note that lock_page_memcg is a memcg lock, not a page lock,
- * to save space. As soon as we switch page->mem_cgroup to a
+ * to save space. As soon as we switch page's memory cgroup to a
* new memcg that isn't locked, the above state can change
* concurrently again. Make sure we're truly done with it.
*/
@@ -5718,7 +5705,7 @@ static int mem_cgroup_move_account(struct page *page,
css_get(&to->css);
css_put(&from->css);
- page->mem_cgroup = to;
+ page->memcg_data = (unsigned long)to;
__unlock_page_memcg(from);
@@ -5784,7 +5771,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
* mem_cgroup_move_account() checks the page is valid or
* not under LRU exclusion.
*/
- if (page->mem_cgroup == mc.from) {
+ if (page_memcg(page) == mc.from) {
ret = MC_TARGET_PAGE;
if (is_device_private_page(page))
ret = MC_TARGET_DEVICE;
@@ -5828,7 +5815,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
VM_BUG_ON_PAGE(!page || !PageHead(page), page);
if (!(mc.flags & MOVE_ANON))
return ret;
- if (page->mem_cgroup == mc.from) {
+ if (page_memcg(page) == mc.from) {
ret = MC_TARGET_PAGE;
if (target) {
get_page(page);
@@ -6774,12 +6761,12 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
/*
* Every swap fault against a single page tries to charge the
* page, bail as early as possible. shmem_unuse() encounters
- * already charged pages, too. page->mem_cgroup is protected
- * by the page lock, which serializes swap cache removal, which
- * in turn serializes uncharging.
+ * already charged pages, too. page and memcg binding is
+ * protected by the page lock, which serializes swap cache
+ * removal, which in turn serializes uncharging.
*/
VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (compound_head(page)->mem_cgroup)
+ if (page_memcg(compound_head(page)))
goto out;
id = lookup_swap_cgroup_id(ent);
@@ -6863,21 +6850,21 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
VM_BUG_ON_PAGE(PageLRU(page), page);
- if (!page->mem_cgroup)
+ if (!page_memcg(page))
return;
/*
* Nobody should be changing or seriously looking at
- * page->mem_cgroup at this point, we have fully
+ * page_memcg(page) at this point, we have fully
* exclusive access to the page.
*/
- if (ug->memcg != page->mem_cgroup) {
+ if (ug->memcg != page_memcg(page)) {
if (ug->memcg) {
uncharge_batch(ug);
uncharge_gather_clear(ug);
}
- ug->memcg = page->mem_cgroup;
+ ug->memcg = page_memcg(page);
/* pairs with css_put in uncharge_batch */
css_get(&ug->memcg->css);
@@ -6886,15 +6873,13 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
nr_pages = compound_nr(page);
ug->nr_pages += nr_pages;
- if (!PageKmemcg(page)) {
- ug->pgpgout++;
- } else {
+ if (PageMemcgKmem(page))
ug->nr_kmem += nr_pages;
- __ClearPageKmemcg(page);
- }
+ else
+ ug->pgpgout++;
ug->dummy_page = page;
- page->mem_cgroup = NULL;
+ page->memcg_data = 0;
css_put(&ug->memcg->css);
}
@@ -6937,7 +6922,7 @@ void mem_cgroup_uncharge(struct page *page)
return;
/* Don't touch page->lru of any random page, pre-check: */
- if (!page->mem_cgroup)
+ if (!page_memcg(page))
return;
uncharge_gather_clear(&ug);
@@ -6987,11 +6972,11 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
return;
/* Page cache replacement: new page already charged? */
- if (newpage->mem_cgroup)
+ if (page_memcg(newpage))
return;
/* Swapcache readahead pages can get replaced before being charged */
- memcg = oldpage->mem_cgroup;
+ memcg = page_memcg(oldpage);
if (!memcg)
return;
@@ -7186,7 +7171,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
- memcg = page->mem_cgroup;
+ memcg = page_memcg(page);
/* Readahead page, never charged */
if (!memcg)
@@ -7207,7 +7192,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
VM_BUG_ON_PAGE(oldid, page);
mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
- page->mem_cgroup = NULL;
+ page->memcg_data = 0;
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, nr_entries);
@@ -7250,7 +7235,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return 0;
- memcg = page->mem_cgroup;
+ memcg = page_memcg(page);
/* Readahead page, never charged */
if (!memcg)
@@ -7331,7 +7316,7 @@ bool mem_cgroup_swap_full(struct page *page)
if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
return false;
- memcg = page->mem_cgroup;
+ memcg = page_memcg(page);
if (!memcg)
return false;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 23f5066bd4a5..3c53018c9c61 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1092,7 +1092,7 @@ static inline bool page_expected_state(struct page *page,
if (unlikely((unsigned long)page->mapping |
page_ref_count(page) |
#ifdef CONFIG_MEMCG
- (unsigned long)page->mem_cgroup |
+ (unsigned long)page_memcg(page) |
#endif
(page->flags & check_flags)))
return false;
@@ -1117,7 +1117,7 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
}
#ifdef CONFIG_MEMCG
- if (unlikely(page->mem_cgroup))
+ if (unlikely(page_memcg(page)))
bad_reason = "page still charged to cgroup";
#endif
return bad_reason;
@@ -1214,7 +1214,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
* Do not let hwpoison pages hit pcplists/buddy
* Untie memcg state and reset page's owner
*/
- if (memcg_kmem_enabled() && PageKmemcg(page))
+ if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
reset_page_owner(page, order);
return false;
@@ -1244,7 +1244,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
}
if (PageMappingFlags(page))
page->mapping = NULL;
- if (memcg_kmem_enabled() && PageKmemcg(page))
+ if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
if (check_free)
bad += check_free_page(page);
diff --git a/mm/page_io.c b/mm/page_io.c
index 433df1263349..9bca17ecc4df 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -291,12 +291,14 @@ static inline void count_swpout_vm_event(struct page *page)
static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
{
struct cgroup_subsys_state *css;
+ struct mem_cgroup *memcg;
- if (!page->mem_cgroup)
+ memcg = page_memcg(page);
+ if (!memcg)
return;
rcu_read_lock();
- css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
+ css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys);
bio_associate_blkg_from_css(bio, css);
rcu_read_unlock();
}
diff --git a/mm/slab.h b/mm/slab.h
index 6d7c6a5056ba..9a54a0cb5cca 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -239,30 +239,13 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
}
#ifdef CONFIG_MEMCG_KMEM
-static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
-{
- /*
- * page->mem_cgroup and page->obj_cgroups are sharing the same
- * space. To distinguish between them in case we don't know for sure
- * that the page is a slab page (e.g. page_cgroup_ino()), let's
- * always set the lowest bit of obj_cgroups.
- */
- return (struct obj_cgroup **)
- ((unsigned long)page->obj_cgroups & ~0x1UL);
-}
-
-static inline bool page_has_obj_cgroups(struct page *page)
-{
- return ((unsigned long)page->obj_cgroups & 0x1UL);
-}
-
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp);
static inline void memcg_free_page_obj_cgroups(struct page *page)
{
- kfree(page_obj_cgroups(page));
- page->obj_cgroups = NULL;
+ kfree(page_objcgs(page));
+ page->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
@@ -323,7 +306,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
if (likely(p[i])) {
page = virt_to_head_page(p[i]);
- if (!page_has_obj_cgroups(page) &&
+ if (!page_objcgs(page) &&
memcg_alloc_page_obj_cgroups(page, s, flags)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
@@ -331,7 +314,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
off = obj_to_index(s, page, p[i]);
obj_cgroup_get(objcg);
- page_obj_cgroups(page)[off] = objcg;
+ page_objcgs(page)[off] = objcg;
mod_objcg_state(objcg, page_pgdat(page),
cache_vmstat_idx(s), obj_full_size(s));
} else {
@@ -345,6 +328,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
void **p, int objects)
{
struct kmem_cache *s;
+ struct obj_cgroup **objcgs;
struct obj_cgroup *objcg;
struct page *page;
unsigned int off;
@@ -358,7 +342,8 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
continue;
page = virt_to_head_page(p[i]);
- if (!page_has_obj_cgroups(page))
+ objcgs = page_objcgs(page);
+ if (!objcgs)
continue;
if (!s_orig)
@@ -367,11 +352,11 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
s = s_orig;
off = obj_to_index(s, page, p[i]);
- objcg = page_obj_cgroups(page)[off];
+ objcg = objcgs[off];
if (!objcg)
continue;
- page_obj_cgroups(page)[off] = NULL;
+ objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
-obj_full_size(s));
@@ -380,11 +365,6 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
}
#else /* CONFIG_MEMCG_KMEM */
-static inline bool page_has_obj_cgroups(struct page *page)
-{
- return false;
-}
-
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
diff --git a/mm/workingset.c b/mm/workingset.c
index 975a4d2dd02e..130348cbf40a 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -257,7 +257,7 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
struct lruvec *lruvec;
int memcgid;
- /* Page is fully exclusive and pins page->mem_cgroup */
+ /* Page is fully exclusive and pins page's memory cgroup pointer */
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index ddc899e83313..64b5ec14ff50 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -27,8 +27,6 @@ struct bpf_stab {
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
struct bpf_stab *stab;
- u64 cost;
- int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -39,29 +37,22 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
- stab = kzalloc(sizeof(*stab), GFP_USER);
+ stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT);
if (!stab)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
raw_spin_lock_init(&stab->lock);
- /* Make sure page count doesn't overflow. */
- cost = (u64) stab->map.max_entries * sizeof(struct sock *);
- err = bpf_map_charge_init(&stab->map.memory, cost);
- if (err)
- goto free_stab;
-
stab->sks = bpf_map_area_alloc(stab->map.max_entries *
sizeof(struct sock *),
stab->map.numa_node);
- if (stab->sks)
- return &stab->map;
- err = -ENOMEM;
- bpf_map_charge_finish(&stab->map.memory);
-free_stab:
- kfree(stab);
- return ERR_PTR(err);
+ if (!stab->sks) {
+ kfree(stab);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return &stab->map;
}
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
@@ -975,8 +966,9 @@ static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab,
}
}
- new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
- htab->map.numa_node);
+ new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN,
+ htab->map.numa_node);
if (!new) {
atomic_dec(&htab->count);
return ERR_PTR(-ENOMEM);
@@ -1103,7 +1095,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
{
struct bpf_shtab *htab;
int i, err;
- u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -1116,7 +1107,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
if (attr->key_size > MAX_BPF_STACK)
return ERR_PTR(-E2BIG);
- htab = kzalloc(sizeof(*htab), GFP_USER);
+ htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -1131,21 +1122,10 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
goto free_htab;
}
- cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) +
- (u64) htab->elem_size * htab->map.max_entries;
- if (cost >= U32_MAX - PAGE_SIZE) {
- err = -EINVAL;
- goto free_htab;
- }
- err = bpf_map_charge_init(&htab->map.memory, cost);
- if (err)
- goto free_htab;
-
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_shtab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
- bpf_map_charge_finish(&htab->map.memory);
err = -ENOMEM;
goto free_htab;
}
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 66231ba6c348..113fd9017203 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -16,7 +16,8 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
{
struct xsk_map_node *node;
- node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
+ node = bpf_map_kzalloc(&map->map, sizeof(*node),
+ GFP_ATOMIC | __GFP_NOWARN);
if (!node)
return ERR_PTR(-ENOMEM);
@@ -57,9 +58,8 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
- struct bpf_map_memory mem;
- int err, numa_node;
struct xsk_map *m;
+ int numa_node;
u64 size;
if (!capable(CAP_NET_ADMIN))
@@ -73,18 +73,11 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
numa_node = bpf_map_attr_numa_node(attr);
size = struct_size(m, xsk_map, attr->max_entries);
- err = bpf_map_charge_init(&mem, size);
- if (err < 0)
- return ERR_PTR(err);
-
m = bpf_map_area_alloc(size, numa_node);
- if (!m) {
- bpf_map_charge_finish(&mem);
+ if (!m)
return ERR_PTR(-ENOMEM);
- }
bpf_map_init_from_attr(&m->map, attr);
- bpf_map_charge_move(&m->map.memory, &mem);
spin_lock_init(&m->lock);
return &m->map;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 8b13230b4c46..9db949290a78 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -421,7 +421,6 @@ static void fixup_map(struct bpf_object *obj)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
struct bpf_link *links[8];
struct bpf_program *prog;
@@ -430,11 +429,6 @@ int main(int argc, char **argv)
char filename[256];
int i = 0;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
if (argc > 1)
test_flags = atoi(argv[1]) ? : test_flags;
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 5734cfdaaacb..73a986876c1a 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -95,18 +95,12 @@ static void int_exit(int sig)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_object *obj = NULL;
struct bpf_link *links[2];
struct bpf_program *prog;
int delay = 1, i = 0;
char filename[256];
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index af925a5afd1d..bafa567b840c 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -16,7 +16,6 @@ struct pair {
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_object *obj;
int map_fd, prog_fd;
char filename[256];
@@ -24,7 +23,6 @@ int main(int ac, char **argv)
FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER,
&obj, &prog_fd))
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index 7793f6a6ae7e..6ae99ecc766c 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -26,7 +26,6 @@ struct pair {
int main(int argc, char **argv)
{
int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd;
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_program *prog;
struct bpf_object *obj;
const char *section;
@@ -34,7 +33,6 @@ int main(int argc, char **argv)
FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index f090d0dc60d6..0d7e1e5a8658 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -10,7 +10,6 @@
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256], symbol[256];
struct bpf_object *obj = NULL;
struct bpf_link *links[20];
@@ -20,11 +19,6 @@ int main(int ac, char **argv)
const char *section;
struct ksym *sym;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 76a1d00128fb..a0ebf1833ed3 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -115,7 +115,6 @@ cleanup:
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int opt, num_progs = 1;
char filename[256];
@@ -131,7 +130,6 @@ int main(int argc, char **argv)
}
}
- setrlimit(RLIMIT_MEMLOCK, &r);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
return test(filename, num_progs);
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index f6b772faa348..a78025b0026b 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -310,7 +310,6 @@ cleanup:
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
extern char __executable_start;
char filename[256], buf[256];
__u64 uprobe_file_offset;
@@ -318,11 +317,6 @@ int main(int argc, char **argv)
struct bpf_object *obj;
int i = 0, err = -1;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return err;
- }
-
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return err;
diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c
index b313dba4111b..c92c5c06b965 100644
--- a/samples/bpf/test_lru_dist.c
+++ b/samples/bpf/test_lru_dist.c
@@ -489,7 +489,6 @@ static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
const char *dist_file;
int nr_tasks = 1;
@@ -508,8 +507,6 @@ int main(int argc, char **argv)
setbuf(stdout, NULL);
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
srand(time(NULL));
nr_cpus = bpf_num_possible_cpus();
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index 98656de56b83..472d65c70354 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -114,17 +114,11 @@ static void test_map_in_map(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_link *link = NULL;
struct bpf_program *prog;
struct bpf_object *obj;
char filename[256];
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
index 819a6fe86f89..4821f9d99c1f 100644
--- a/samples/bpf/test_overhead_user.c
+++ b/samples/bpf/test_overhead_user.c
@@ -162,13 +162,11 @@ static void unload_progs(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
int test_flags = ~0;
char filename[256];
int err = 0;
- setrlimit(RLIMIT_MEMLOCK, &r);
if (argc > 1)
test_flags = atoi(argv[1]) ? : test_flags;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index ac1ba368195c..9664749bf618 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -294,13 +294,11 @@ static void test_bpf_perf_event(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_object *obj = NULL;
char filename[256];
int error = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
signal(SIGINT, err_exit);
signal(SIGTERM, err_exit);
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 3d6eab711d23..1626d51dfffd 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -116,7 +116,6 @@ static void int_exit(int sig)
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
long key, next_key, value;
struct bpf_link *links[2];
struct bpf_program *prog;
@@ -125,11 +124,6 @@ int main(int ac, char **argv)
int i, j = 0;
FILE *f;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 83e0fecbb01a..33e16ba39f25 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -107,7 +107,6 @@ static void print_hist(int fd)
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_link *links[2];
struct bpf_program *prog;
struct bpf_object *obj;
@@ -127,11 +126,6 @@ int main(int ac, char **argv)
}
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index e8faf8f184ae..cea399424bca 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -48,18 +48,12 @@ static void print_old_objects(int fd)
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_link *links[2];
struct bpf_program *prog;
struct bpf_object *obj;
char filename[256];
int map_fd, i, j = 0;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
- return 1;
- }
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index c17d3fb5fd64..08dfdc77ad2a 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -34,7 +34,6 @@ static void install_accept_all_seccomp(void)
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_link *link = NULL;
struct bpf_program *prog;
struct bpf_object *obj;
@@ -43,8 +42,6 @@ int main(int ac, char **argv)
char filename[256];
FILE *f;
- setrlimit(RLIMIT_MEMLOCK, &r);
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index 33df9784775d..28296f40c133 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -175,15 +175,12 @@ static void test_bpf_perf_event(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_link *links[2];
struct bpf_program *prog;
struct bpf_object *obj;
char filename[256];
int i = 0;
- setrlimit(RLIMIT_MEMLOCK, &r);
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index c447ad9e3a1d..116e39f6b666 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -79,7 +79,6 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -117,11 +116,6 @@ int main(int argc, char **argv)
return 1;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
ifindex = if_nametoindex(argv[optind]);
if (!ifindex) {
perror("if_nametoindex");
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index ba482dc3da33..a70b094c8ec5 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -82,7 +82,6 @@ static void usage(const char *cmd)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -143,11 +142,6 @@ int main(int argc, char **argv)
}
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
- return 1;
- }
-
if (!ifindex) {
fprintf(stderr, "Invalid ifname\n");
return 1;
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index 03d0a182913f..49ebc49aefc3 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -687,7 +687,6 @@ static void print_bpf_prog_info(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_program *prog;
int longindex = 0, opt;
int ret = EXIT_FAILURE;
@@ -719,10 +718,6 @@ int main(int argc, char **argv)
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return ret;
- }
/* Remove tracepoint program when program is interrupted or killed */
signal(SIGINT, int_exit);
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index f78cb18319aa..576411612523 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -765,7 +765,6 @@ static int load_cpumap_prog(char *file_name, char *prog_name,
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
char *mprog_filename = "xdp_redirect_kern.o";
char *redir_interface = NULL, *redir_map = NULL;
@@ -804,11 +803,6 @@ int main(int argc, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return err;
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 35e16dee613e..31131b6e7782 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -96,7 +96,6 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -135,11 +134,6 @@ int main(int argc, char **argv)
return 1;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
ifindex_in = if_nametoindex(argv[optind]);
if (!ifindex_in)
ifindex_in = strtoul(argv[optind], NULL, 0);
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 9ca2bf457cda..41d705c3a1f7 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -97,7 +97,6 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -136,11 +135,6 @@ int main(int argc, char **argv)
return 1;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
ifindex_in = if_nametoindex(argv[optind]);
if (!ifindex_in)
ifindex_in = strtoul(argv[optind], NULL, 0);
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index c2da1b51ff95..b5f03cb17a3c 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -625,7 +625,6 @@ static void usage(const char *prog)
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -670,11 +669,6 @@ int main(int ac, char **argv)
return 1;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 93fa1bc54f13..74a2926eba08 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -450,7 +450,6 @@ static void stats_poll(int interval, int action, __u32 cfg_opt)
int main(int argc, char **argv)
{
__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -474,11 +473,6 @@ int main(int argc, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return EXIT_FAIL;
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index 4b2a300c750c..706475e004cb 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -109,7 +109,6 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -143,11 +142,6 @@ int main(int argc, char **argv)
return 1;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index a419bee151a8..1d4f305d02aa 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -155,7 +155,6 @@ int main(int argc, char **argv)
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int min_port = 0, max_port = 0, vip2tnl_map_fd;
const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
unsigned char opt_flags[256] = {};
@@ -254,11 +253,6 @@ int main(int argc, char **argv)
}
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
- return 1;
- }
-
if (!ifindex) {
fprintf(stderr, "Invalid ifname\n");
return 1;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 036bd019e400..909f77647deb 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1489,7 +1489,6 @@ static void apply_setsockopt(struct xsk_socket_info *xsk)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
bool rx = false, tx = false;
struct xsk_umem_info *umem;
struct bpf_object *obj;
@@ -1499,12 +1498,6 @@ int main(int argc, char **argv)
parse_command_line(argc, argv);
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
- strerror(errno));
- exit(EXIT_FAILURE);
- }
-
if (opt_num_xsks > 1)
load_xdp_program(argv, &obj);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index 08651b23edba..b83b5d2e17dc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -23,6 +23,6 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
map->usercnt.counter,
- map->memory.user->locked_vm.counter);
+ 0LLU);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index c325405751e2..d8850bc6a9f1 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -26,17 +26,12 @@ __u32 g_line = 0;
return 0; \
})
-struct bpf_map_memory {
- __u32 pages;
-} __attribute__((preserve_access_index));
-
struct bpf_map {
enum bpf_map_type map_type;
__u32 key_size;
__u32 value_size;
__u32 max_entries;
__u32 id;
- struct bpf_map_memory memory;
} __attribute__((preserve_access_index));
static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
@@ -47,7 +42,6 @@ static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
VERIFY(map->value_size == value_size);
VERIFY(map->max_entries == max_entries);
VERIFY(map->id > 0);
- VERIFY(map->memory.pages > 0);
return 1;
}
@@ -60,7 +54,6 @@ static inline int check_bpf_map_ptr(struct bpf_map *indirect,
VERIFY(indirect->value_size == direct->value_size);
VERIFY(indirect->max_entries == direct->max_entries);
VERIFY(indirect->id == direct->id);
- VERIFY(indirect->memory.pages == direct->memory.pages);
return 1;
}