aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/configs/debug.config2
-rw-r--r--kernel/dma/swiotlb.c23
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/sys.c19
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/trace/blktrace.c26
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--kernel/trace/trace_events_hist.c6
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_osnoise.c31
-rw-r--r--kernel/user_namespace.c14
-rw-r--r--kernel/watch_queue.c22
13 files changed, 118 insertions, 46 deletions
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index e9ffb0cc1eec..07df6d93c4df 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -16,7 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y
#
# Compile-time checks and compiler options
#
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_FRAME_WARN=2048
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index bfc56cb21705..6db1c475ec82 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -627,10 +627,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
for (i = 0; i < nr_slots(alloc_size + offset); i++)
mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(mem->start, index) + offset;
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE ||
- dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ /*
+ * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
+ * to the tlb buffer, if we knew for sure the device will
+ * overwirte the entire current content. But we don't. Thus
+ * unconditional bounce may prevent leaking swiotlb content (i.e.
+ * kernel memory) to user-space.
+ */
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
return tlb_addr;
}
@@ -697,10 +701,13 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir)
{
- if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
- else
- BUG_ON(dir != DMA_FROM_DEVICE);
+ /*
+ * Unconditional bounce is necessary to avoid corruption on
+ * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite
+ * the whole lengt of the bounce buffer.
+ */
+ swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+ BUG_ON(!valid_dma_direction(dir));
}
void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
diff --git a/kernel/fork.c b/kernel/fork.c
index a024bf6254df..f1e89007f228 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -366,14 +366,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
*new = data_race(*orig);
INIT_LIST_HEAD(&new->anon_vma_chain);
new->vm_next = new->vm_prev = NULL;
- dup_vma_anon_name(orig, new);
+ dup_anon_vma_name(orig, new);
}
return new;
}
void vm_area_free(struct vm_area_struct *vma)
{
- free_vma_anon_name(vma);
+ free_anon_vma_name(vma);
kmem_cache_free(vm_area_cachep, vma);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 97dc9e5d6bf9..5b0e172c4d47 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -7,6 +7,7 @@
#include <linux/export.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/utsname.h>
#include <linux/mman.h>
#include <linux/reboot.h>
@@ -2286,15 +2287,16 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
{
struct mm_struct *mm = current->mm;
const char __user *uname;
- char *name, *pch;
+ struct anon_vma_name *anon_name = NULL;
int error;
switch (opt) {
case PR_SET_VMA_ANON_NAME:
uname = (const char __user *)arg;
if (uname) {
- name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
+ char *name, *pch;
+ name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
if (IS_ERR(name))
return PTR_ERR(name);
@@ -2304,15 +2306,18 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
return -EINVAL;
}
}
- } else {
- /* Reset the name */
- name = NULL;
+ /* anon_vma has its own copy */
+ anon_name = anon_vma_name_alloc(name);
+ kfree(name);
+ if (!anon_name)
+ return -ENOMEM;
+
}
mmap_write_lock(mm);
- error = madvise_set_anon_name(mm, addr, size, name);
+ error = madvise_set_anon_name(mm, addr, size, anon_name);
mmap_write_unlock(mm);
- kfree(name);
+ anon_vma_name_put(anon_name);
break;
default:
error = -EINVAL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5ae443b2882e..730ab56d9e92 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -180,6 +180,10 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
return ret;
}
+void __weak unpriv_ebpf_notify(int new_state)
+{
+}
+
static int bpf_unpriv_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -197,6 +201,9 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write,
return -EPERM;
*(int *)table->data = unpriv_enable;
}
+
+ unpriv_ebpf_notify(unpriv_enable);
+
return ret;
}
#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index af68a67179b4..21dea90eaa93 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -310,10 +310,20 @@ record_it:
local_irq_restore(flags);
}
-static void blk_trace_free(struct blk_trace *bt)
+static void blk_trace_free(struct request_queue *q, struct blk_trace *bt)
{
relay_close(bt->rchan);
- debugfs_remove(bt->dir);
+
+ /*
+ * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created
+ * under 'q->debugfs_dir', thus lookup and remove them.
+ */
+ if (!bt->dir) {
+ debugfs_remove(debugfs_lookup("dropped", q->debugfs_dir));
+ debugfs_remove(debugfs_lookup("msg", q->debugfs_dir));
+ } else {
+ debugfs_remove(bt->dir);
+ }
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
@@ -335,10 +345,10 @@ static void put_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
-static void blk_trace_cleanup(struct blk_trace *bt)
+static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
{
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
put_probe_ref();
}
@@ -352,7 +362,7 @@ static int __blk_trace_remove(struct request_queue *q)
return -EINVAL;
if (bt->trace_state != Blktrace_running)
- blk_trace_cleanup(bt);
+ blk_trace_cleanup(q, bt);
return 0;
}
@@ -572,7 +582,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
ret = 0;
err:
if (ret)
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
@@ -1616,7 +1626,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
put_probe_ref();
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return 0;
}
@@ -1647,7 +1657,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
return 0;
free_bt:
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a4b462b6f944..6105b7036482 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7790,7 +7790,7 @@ int ftrace_is_dead(void)
/**
* register_ftrace_function - register a function for profiling
- * @ops - ops structure that holds the function for profiling.
+ * @ops: ops structure that holds the function for profiling.
*
* Register a function to be called by all functions in the
* kernel.
@@ -7817,7 +7817,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_function);
/**
* unregister_ftrace_function - unregister a function for profiling.
- * @ops - ops structure that holds the function to unregister
+ * @ops: ops structure that holds the function to unregister
*
* Unregister a function that was added to be called by ftrace profiling.
*/
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3050892d1812..eb44418574f9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -235,7 +235,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
static int __init set_trace_boot_options(char *str)
{
strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
- return 0;
+ return 1;
}
__setup("trace_options=", set_trace_boot_options);
@@ -246,7 +246,7 @@ static int __init set_trace_boot_clock(char *str)
{
strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
trace_boot_clock = trace_boot_clock_buf;
- return 0;
+ return 1;
}
__setup("trace_clock=", set_trace_boot_clock);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index ada87bfb5bb8..dc7f733b4cb3 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2289,9 +2289,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
/*
* For backward compatibility, if field_name
* was "cpu", then we treat this the same as
- * common_cpu.
+ * common_cpu. This also works for "CPU".
*/
- if (strcmp(field_name, "cpu") == 0) {
+ if (field && field->filter_type == FILTER_CPU) {
*flags |= HIST_FIELD_FL_CPU;
} else {
hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
@@ -4832,7 +4832,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
cmp_fn = tracing_map_cmp_none;
- else if (!field)
+ else if (!field || hist_field->flags & HIST_FIELD_FL_CPU)
cmp_fn = tracing_map_cmp_num(hist_field->size,
hist_field->is_signed);
else if (is_string_field(field))
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 508f14af4f2c..b62fd785b599 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -32,7 +32,7 @@ static int __init set_kprobe_boot_events(char *str)
strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
disable_tracing_selftest("running kprobe events");
- return 0;
+ return 1;
}
__setup("kprobe_event=", set_kprobe_boot_events);
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index cfddb30e65ab..5e3c62a08fc0 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1387,6 +1387,26 @@ static int run_osnoise(void)
}
/*
+ * In some cases, notably when running on a nohz_full CPU with
+ * a stopped tick PREEMPT_RCU has no way to account for QSs.
+ * This will eventually cause unwarranted noise as PREEMPT_RCU
+ * will force preemption as the means of ending the current
+ * grace period. We avoid this problem by calling
+ * rcu_momentary_dyntick_idle(), which performs a zero duration
+ * EQS allowing PREEMPT_RCU to end the current grace period.
+ * This call shouldn't be wrapped inside an RCU critical
+ * section.
+ *
+ * Note that in non PREEMPT_RCU kernels QSs are handled through
+ * cond_resched()
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
+ local_irq_disable();
+ rcu_momentary_dyntick_idle();
+ local_irq_enable();
+ }
+
+ /*
* For the non-preemptive kernel config: let threads runs, if
* they so wish.
*/
@@ -2200,6 +2220,17 @@ static void osnoise_workload_stop(void)
if (osnoise_has_registered_instances())
return;
+ /*
+ * If callbacks were already disabled in a previous stop
+ * call, there is no need to disable then again.
+ *
+ * For instance, this happens when tracing is stopped via:
+ * echo 0 > tracing_on
+ * echo nop > current_tracer.
+ */
+ if (!trace_osnoise_callback_enabled)
+ return;
+
trace_osnoise_callback_enabled = false;
/*
* Make sure that ftrace_nmi_enter/exit() see
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b2e3ca7ee99..5481ba44a8d6 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -58,6 +58,18 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
cred->user_ns = user_ns;
}
+static unsigned long enforced_nproc_rlimit(void)
+{
+ unsigned long limit = RLIM_INFINITY;
+
+ /* Is RLIMIT_NPROC currently enforced? */
+ if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
+ (current_user_ns() != &init_user_ns))
+ limit = rlimit(RLIMIT_NPROC);
+
+ return limit;
+}
+
/*
* Create a new user namespace, deriving the creator from the user in the
* passed credentials, and replacing that user with the new root user for the
@@ -122,7 +134,7 @@ int create_user_ns(struct cred *new)
for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
ns->ucount_max[i] = INT_MAX;
}
- set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC));
+ set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 9c9eb20dd2c5..00703444a219 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -54,6 +54,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
bit += page->index;
set_bit(bit, wqueue->notes_bitmap);
+ generic_pipe_buf_release(pipe, buf);
}
// No try_steal function => no stealing
@@ -112,7 +113,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
buf->offset = offset;
buf->len = len;
buf->flags = PIPE_BUF_FLAG_WHOLE;
- pipe->head = head + 1;
+ smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
spin_unlock_irq(&pipe->rd_wait.lock);
@@ -219,7 +220,6 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
struct page **pages;
unsigned long *bitmap;
unsigned long user_bufs;
- unsigned int bmsize;
int ret, i, nr_pages;
if (!wqueue)
@@ -243,7 +243,8 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
goto error;
}
- ret = pipe_resize_ring(pipe, nr_notes);
+ nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
+ ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes));
if (ret < 0)
goto error;
@@ -258,17 +259,15 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE;
}
- bmsize = (nr_notes + BITS_PER_LONG - 1) / BITS_PER_LONG;
- bmsize *= sizeof(unsigned long);
- bitmap = kmalloc(bmsize, GFP_KERNEL);
+ bitmap = bitmap_alloc(nr_notes, GFP_KERNEL);
if (!bitmap)
goto error_p;
- memset(bitmap, 0xff, bmsize);
+ bitmap_fill(bitmap, nr_notes);
wqueue->notes = pages;
wqueue->notes_bitmap = bitmap;
wqueue->nr_pages = nr_pages;
- wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
+ wqueue->nr_notes = nr_notes;
return 0;
error_p:
@@ -320,7 +319,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
tf[i].info_mask & WATCH_INFO_LENGTH)
goto err_filter;
/* Ignore any unknown types */
- if (tf[i].type >= sizeof(wfilter->type_filter) * 8)
+ if (tf[i].type >= WATCH_TYPE__NR)
continue;
nr_filter++;
}
@@ -336,7 +335,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
q = wfilter->filters;
for (i = 0; i < filter.nr_filters; i++) {
- if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG)
+ if (tf[i].type >= WATCH_TYPE__NR)
continue;
q->type = tf[i].type;
@@ -371,6 +370,7 @@ static void __put_watch_queue(struct kref *kref)
for (i = 0; i < wqueue->nr_pages; i++)
__free_page(wqueue->notes[i]);
+ bitmap_free(wqueue->notes_bitmap);
wfilter = rcu_access_pointer(wqueue->filter);
if (wfilter)
@@ -566,7 +566,7 @@ void watch_queue_clear(struct watch_queue *wqueue)
rcu_read_lock();
spin_lock_bh(&wqueue->lock);
- /* Prevent new additions and prevent notifications from happening */
+ /* Prevent new notifications from being stored. */
wqueue->defunct = true;
while (!hlist_empty(&wqueue->watches)) {