aboutsummaryrefslogtreecommitdiff
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c201
1 files changed, 145 insertions, 56 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4165304d3547..e163372d3967 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -74,7 +74,7 @@ static const bool vmap_allow_huge = false;
bool is_vmalloc_addr(const void *x)
{
- unsigned long addr = (unsigned long)x;
+ unsigned long addr = (unsigned long)kasan_reset_tag(x);
return addr >= VMALLOC_START && addr < VMALLOC_END;
}
@@ -118,7 +118,6 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (size != PAGE_SIZE) {
pte_t entry = pfn_pte(pfn, prot);
- entry = pte_mkhuge(entry);
entry = arch_make_huge_pte(entry, ilog2(size), 0);
set_huge_pte_at(&init_mm, addr, pte, entry);
pfn += PFN_DOWN(size);
@@ -632,7 +631,7 @@ int is_vmalloc_or_module_addr(const void *x)
* just put it in the vmalloc space.
*/
#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
- unsigned long addr = (unsigned long)x;
+ unsigned long addr = (unsigned long)kasan_reset_tag(x);
if (addr >= MODULES_VADDR && addr < MODULES_END)
return 1;
#endif
@@ -776,23 +775,13 @@ get_subtree_max_size(struct rb_node *node)
return va ? va->subtree_max_size : 0;
}
-/*
- * Gets called when remove the node and rotate.
- */
-static __always_inline unsigned long
-compute_subtree_max_size(struct vmap_area *va)
-{
- return max3(va_size(va),
- get_subtree_max_size(va->rb_node.rb_left),
- get_subtree_max_size(va->rb_node.rb_right));
-}
-
RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
static void purge_vmap_area_lazy(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
-static unsigned long lazy_max_pages(void);
+static void drain_vmap_area_work(struct work_struct *work);
+static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
static atomic_long_t nr_vmalloc_pages;
@@ -806,6 +795,8 @@ static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
struct vmap_area *va = NULL;
struct rb_node *n = vmap_area_root.rb_node;
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+
while (n) {
struct vmap_area *tmp;
@@ -827,6 +818,8 @@ static struct vmap_area *__find_vmap_area(unsigned long addr)
{
struct rb_node *n = vmap_area_root.rb_node;
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+
while (n) {
struct vmap_area *va;
@@ -973,6 +966,17 @@ unlink_va(struct vmap_area *va, struct rb_root *root)
}
#if DEBUG_AUGMENT_PROPAGATE_CHECK
+/*
+ * Gets called when remove the node and rotate.
+ */
+static __always_inline unsigned long
+compute_subtree_max_size(struct vmap_area *va)
+{
+ return max3(va_size(va),
+ get_subtree_max_size(va->rb_node.rb_left),
+ get_subtree_max_size(va->rb_node.rb_right));
+}
+
static void
augment_tree_propagate_check(void)
{
@@ -1189,22 +1193,28 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
/*
* Find the first free block(lowest start address) in the tree,
* that will accomplish the request corresponding to passing
- * parameters.
+ * parameters. Please note, with an alignment bigger than PAGE_SIZE,
+ * a search length is adjusted to account for worst case alignment
+ * overhead.
*/
static __always_inline struct vmap_area *
-find_vmap_lowest_match(unsigned long size,
- unsigned long align, unsigned long vstart)
+find_vmap_lowest_match(unsigned long size, unsigned long align,
+ unsigned long vstart, bool adjust_search_size)
{
struct vmap_area *va;
struct rb_node *node;
+ unsigned long length;
/* Start from the root. */
node = free_vmap_area_root.rb_node;
+ /* Adjust the search size for alignment overhead. */
+ length = adjust_search_size ? size + align - 1 : size;
+
while (node) {
va = rb_entry(node, struct vmap_area, rb_node);
- if (get_subtree_max_size(node->rb_left) >= size &&
+ if (get_subtree_max_size(node->rb_left) >= length &&
vstart < va->va_start) {
node = node->rb_left;
} else {
@@ -1214,9 +1224,9 @@ find_vmap_lowest_match(unsigned long size,
/*
* Does not make sense to go deeper towards the right
* sub-tree if it does not have a free block that is
- * equal or bigger to the requested search size.
+ * equal or bigger to the requested search length.
*/
- if (get_subtree_max_size(node->rb_right) >= size) {
+ if (get_subtree_max_size(node->rb_right) >= length) {
node = node->rb_right;
continue;
}
@@ -1232,7 +1242,7 @@ find_vmap_lowest_match(unsigned long size,
if (is_within_this_va(va, size, align, vstart))
return va;
- if (get_subtree_max_size(node->rb_right) >= size &&
+ if (get_subtree_max_size(node->rb_right) >= length &&
vstart <= va->va_start) {
/*
* Shift the vstart forward. Please note, we update it with
@@ -1280,7 +1290,7 @@ find_vmap_lowest_match_check(unsigned long size, unsigned long align)
get_random_bytes(&rnd, sizeof(rnd));
vstart = VMALLOC_START + rnd;
- va_1 = find_vmap_lowest_match(size, align, vstart);
+ va_1 = find_vmap_lowest_match(size, align, vstart, false);
va_2 = find_vmap_lowest_linear_match(size, align, vstart);
if (va_1 != va_2)
@@ -1431,12 +1441,25 @@ static __always_inline unsigned long
__alloc_vmap_area(unsigned long size, unsigned long align,
unsigned long vstart, unsigned long vend)
{
+ bool adjust_search_size = true;
unsigned long nva_start_addr;
struct vmap_area *va;
enum fit_type type;
int ret;
- va = find_vmap_lowest_match(size, align, vstart);
+ /*
+ * Do not adjust when:
+ * a) align <= PAGE_SIZE, because it does not make any sense.
+ * All blocks(their start addresses) are at least PAGE_SIZE
+ * aligned anyway;
+ * b) a short range where a requested size corresponds to exactly
+ * specified [vstart:vend] interval and an alignment > PAGE_SIZE.
+ * With adjusted search length an allocation would not succeed.
+ */
+ if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
+ adjust_search_size = false;
+
+ va = find_vmap_lowest_match(size, align, vstart, adjust_search_size);
if (unlikely(!va))
return vend;
@@ -1720,18 +1743,6 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
}
/*
- * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
- * is already purging.
- */
-static void try_purge_vmap_area_lazy(void)
-{
- if (mutex_trylock(&vmap_purge_lock)) {
- __purge_vmap_area_lazy(ULONG_MAX, 0);
- mutex_unlock(&vmap_purge_lock);
- }
-}
-
-/*
* Kick off a purge of the outstanding lazy areas.
*/
static void purge_vmap_area_lazy(void)
@@ -1742,6 +1753,20 @@ static void purge_vmap_area_lazy(void)
mutex_unlock(&vmap_purge_lock);
}
+static void drain_vmap_area_work(struct work_struct *work)
+{
+ unsigned long nr_lazy;
+
+ do {
+ mutex_lock(&vmap_purge_lock);
+ __purge_vmap_area_lazy(ULONG_MAX, 0);
+ mutex_unlock(&vmap_purge_lock);
+
+ /* Recheck if further work is required. */
+ nr_lazy = atomic_long_read(&vmap_lazy_nr);
+ } while (nr_lazy > lazy_max_pages());
+}
+
/*
* Free a vmap area, caller ensuring that the area has been unmapped
* and flush_cache_vunmap had been called for the correct range
@@ -1768,7 +1793,7 @@ static void free_vmap_area_noflush(struct vmap_area *va)
/* After this point, we may free va at any time */
if (unlikely(nr_lazy > lazy_max_pages()))
- try_purge_vmap_area_lazy();
+ schedule_work(&drain_vmap_work);
}
/*
@@ -2145,7 +2170,7 @@ EXPORT_SYMBOL_GPL(vm_unmap_aliases);
void vm_unmap_ram(const void *mem, unsigned int count)
{
unsigned long size = (unsigned long)count << PAGE_SHIFT;
- unsigned long addr = (unsigned long)mem;
+ unsigned long addr = (unsigned long)kasan_reset_tag(mem);
struct vmap_area *va;
might_sleep();
@@ -2206,14 +2231,19 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node)
mem = (void *)addr;
}
- kasan_unpoison_vmalloc(mem, size);
-
if (vmap_pages_range(addr, addr + size, PAGE_KERNEL,
pages, PAGE_SHIFT) < 0) {
vm_unmap_ram(mem, count);
return NULL;
}
+ /*
+ * Mark the pages as accessible, now that they are mapped.
+ * With hardware tag-based KASAN, marking is skipped for
+ * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
+ */
+ mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_PROT_NORMAL);
+
return mem;
}
EXPORT_SYMBOL(vm_map_ram);
@@ -2439,10 +2469,20 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
return NULL;
}
- kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
-
setup_vmalloc_vm(area, va, flags, caller);
+ /*
+ * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a
+ * best-effort approach, as they can be mapped outside of vmalloc code.
+ * For VM_ALLOC mappings, the pages are marked as accessible after
+ * getting mapped in __vmalloc_node_range().
+ * With hardware tag-based KASAN, marking is skipped for
+ * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
+ */
+ if (!(flags & VM_ALLOC))
+ area->addr = kasan_unpoison_vmalloc(area->addr, requested_size,
+ KASAN_VMALLOC_PROT_NORMAL);
+
return area;
}
@@ -2526,7 +2566,7 @@ struct vm_struct *remove_vm_area(const void *addr)
va->vm = NULL;
spin_unlock(&vmap_area_lock);
- kasan_free_shadow(vm);
+ kasan_free_module_shadow(vm);
free_unmap_vmap_area(va);
return vm;
@@ -2925,7 +2965,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
int node)
{
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- const gfp_t orig_gfp_mask = gfp_mask;
bool nofail = gfp_mask & __GFP_NOFAIL;
unsigned long addr = (unsigned long)area->addr;
unsigned long size = get_vm_area_size(area);
@@ -2949,7 +2988,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
}
if (!area->pages) {
- warn_alloc(orig_gfp_mask, NULL,
+ warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, failed to allocated page array size %lu",
nr_small_pages * PAGE_SIZE, array_size);
free_vm_area(area);
@@ -2959,8 +2998,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
page_order = vm_area_page_order(area);
- area->nr_pages = vm_area_alloc_pages(gfp_mask, node,
- page_order, nr_small_pages, area->pages);
+ area->nr_pages = vm_area_alloc_pages(gfp_mask | __GFP_NOWARN,
+ node, page_order, nr_small_pages, area->pages);
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
if (gfp_mask & __GFP_ACCOUNT) {
@@ -2976,7 +3015,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via __vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
- warn_alloc(orig_gfp_mask, NULL,
+ warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, page order %u, failed to allocate pages",
area->nr_pages * PAGE_SIZE, page_order);
goto fail;
@@ -3004,7 +3043,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
memalloc_noio_restore(flags);
if (ret < 0) {
- warn_alloc(orig_gfp_mask, NULL,
+ warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, failed to map pages",
area->nr_pages * PAGE_SIZE);
goto fail;
@@ -3051,7 +3090,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
const void *caller)
{
struct vm_struct *area;
- void *addr;
+ void *ret;
+ kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE;
unsigned long real_size = size;
unsigned long real_align = align;
unsigned int shift = PAGE_SHIFT;
@@ -3104,11 +3144,51 @@ again:
goto fail;
}
- addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
- if (!addr)
+ /*
+ * Prepare arguments for __vmalloc_area_node() and
+ * kasan_unpoison_vmalloc().
+ */
+ if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) {
+ if (kasan_hw_tags_enabled()) {
+ /*
+ * Modify protection bits to allow tagging.
+ * This must be done before mapping.
+ */
+ prot = arch_vmap_pgprot_tagged(prot);
+
+ /*
+ * Skip page_alloc poisoning and zeroing for physical
+ * pages backing VM_ALLOC mapping. Memory is instead
+ * poisoned and zeroed by kasan_unpoison_vmalloc().
+ */
+ gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO;
+ }
+
+ /* Take note that the mapping is PAGE_KERNEL. */
+ kasan_flags |= KASAN_VMALLOC_PROT_NORMAL;
+ }
+
+ /* Allocate physical pages and map them into vmalloc space. */
+ ret = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
+ if (!ret)
goto fail;
/*
+ * Mark the pages as accessible, now that they are mapped.
+ * The init condition should match the one in post_alloc_hook()
+ * (except for the should_skip_init() check) to make sure that memory
+ * is initialized under the same conditions regardless of the enabled
+ * KASAN mode.
+ * Tag-based KASAN modes only assign tags to normal non-executable
+ * allocations, see __kasan_unpoison_vmalloc().
+ */
+ kasan_flags |= KASAN_VMALLOC_VM_ALLOC;
+ if (!want_init_on_free() && want_init_on_alloc(gfp_mask))
+ kasan_flags |= KASAN_VMALLOC_INIT;
+ /* KASAN_VMALLOC_PROT_NORMAL already set if required. */
+ area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags);
+
+ /*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
* Now, it is fully initialized, so remove this flag here.
@@ -3119,7 +3199,7 @@ again:
if (!(vm_flags & VM_DEFER_KMEMLEAK))
kmemleak_vmalloc(area, size, gfp_mask);
- return addr;
+ return area->addr;
fail:
if (shift > PAGE_SHIFT) {
@@ -3404,6 +3484,8 @@ long vread(char *buf, char *addr, unsigned long count)
unsigned long buflen = count;
unsigned long n;
+ addr = kasan_reset_tag(addr);
+
/* Don't allow overflow */
if ((unsigned long) addr + count < count)
count = -(unsigned long) addr;
@@ -3789,9 +3871,6 @@ retry:
for (area = 0; area < nr_vms; area++) {
if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
goto err_free_shadow;
-
- kasan_unpoison_vmalloc((void *)vas[area]->va_start,
- sizes[area]);
}
/* insert all vm's */
@@ -3804,6 +3883,16 @@ retry:
}
spin_unlock(&vmap_area_lock);
+ /*
+ * Mark allocated areas as accessible. Do it now as a best-effort
+ * approach, as they can be mapped outside of vmalloc code.
+ * With hardware tag-based KASAN, marking is skipped for
+ * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
+ */
+ for (area = 0; area < nr_vms; area++)
+ vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
+ vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
+
kfree(vas);
return vms;