aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c12
-rw-r--r--kernel/audit_tree.c16
-rw-r--r--kernel/auditfilter.c23
-rw-r--r--kernel/auditsc.c49
-rw-r--r--kernel/debug/debug_core.c52
-rw-r--r--kernel/debug/kdb/kdb_bp.c37
-rw-r--r--kernel/debug/kdb/kdb_debugger.c4
-rw-r--r--kernel/debug/kdb/kdb_main.c267
-rw-r--r--kernel/debug/kdb/kdb_private.h3
-rw-r--r--kernel/events/core.c23
-rw-r--r--kernel/events/uprobes.c8
-rw-r--r--kernel/exit.c33
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/gcov/Kconfig5
-rw-r--r--kernel/groups.c11
-rw-r--r--kernel/irq/internals.h4
-rw-r--r--kernel/irq/irqdesc.c52
-rw-r--r--kernel/irq/proc.c22
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/locking/mutex-debug.c2
-rw-r--r--kernel/locking/spinlock.c8
-rw-r--r--kernel/module.c170
-rw-r--r--kernel/nsproxy.c10
-rw-r--r--kernel/params.c97
-rw-r--r--kernel/pid.c5
-rw-r--r--kernel/pid_namespace.c29
-rw-r--r--kernel/power/Kconfig16
-rw-r--r--kernel/printk/printk.c10
-rw-r--r--kernel/sched/core.c13
-rw-r--r--kernel/sched/deadline.c25
-rw-r--r--kernel/sched/fair.c6
-rw-r--r--kernel/stacktrace.c32
-rw-r--r--kernel/sys_ni.c3
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/clocksource.c76
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/time.c1
-rw-r--r--kernel/time/timecounter.c112
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c3
-rw-r--r--kernel/trace/trace.c25
-rw-r--r--kernel/trace/trace.h14
-rw-r--r--kernel/trace/trace_events.c42
-rw-r--r--kernel/trace/trace_kdb.c4
-rw-r--r--kernel/trace/trace_syscalls.c7
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/user.c6
-rw-r--r--kernel/user_namespace.c153
-rw-r--r--kernel/utsname.c31
50 files changed, 977 insertions, 565 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 1f37f15117e5..72ab759a0b43 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -429,7 +429,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
* This function doesn't consume an skb as might be expected since it has to
* copy it anyways.
*/
-static void kauditd_send_multicast_skb(struct sk_buff *skb)
+static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
{
struct sk_buff *copy;
struct audit_net *aunet = net_generic(&init_net, audit_net_id);
@@ -448,11 +448,11 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb)
* no reason for new multicast clients to continue with this
* non-compliance.
*/
- copy = skb_copy(skb, GFP_KERNEL);
+ copy = skb_copy(skb, gfp_mask);
if (!copy)
return;
- nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
+ nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
}
/*
@@ -833,7 +833,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
s.backlog_limit = audit_backlog_limit;
s.lost = atomic_read(&audit_lost);
s.backlog = skb_queue_len(&audit_skb_queue);
- s.version = AUDIT_VERSION_LATEST;
+ s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
s.backlog_wait_time = audit_backlog_wait_time;
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break;
@@ -1100,7 +1100,7 @@ static void audit_receive(struct sk_buff *skb)
}
/* Run custom bind function on netlink socket group connect or bind requests. */
-static int audit_bind(int group)
+static int audit_bind(struct net *net, int group)
{
if (!capable(CAP_AUDIT_READ))
return -EPERM;
@@ -1940,7 +1940,7 @@ void audit_log_end(struct audit_buffer *ab)
struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
nlh->nlmsg_len = ab->skb->len;
- kauditd_send_multicast_skb(ab->skb);
+ kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
/*
* The original kaudit unicast socket sends up messages with
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 80f29e015570..2e0c97427b33 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -174,9 +174,9 @@ static void insert_hash(struct audit_chunk *chunk)
struct fsnotify_mark *entry = &chunk->mark;
struct list_head *list;
- if (!entry->i.inode)
+ if (!entry->inode)
return;
- list = chunk_hash(entry->i.inode);
+ list = chunk_hash(entry->inode);
list_add_rcu(&chunk->hash, list);
}
@@ -188,7 +188,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
list_for_each_entry_rcu(p, list, hash) {
/* mark.inode may have gone NULL, but who cares? */
- if (p->mark.i.inode == inode) {
+ if (p->mark.inode == inode) {
atomic_long_inc(&p->refs);
return p;
}
@@ -231,7 +231,7 @@ static void untag_chunk(struct node *p)
new = alloc_chunk(size);
spin_lock(&entry->lock);
- if (chunk->dead || !entry->i.inode) {
+ if (chunk->dead || !entry->inode) {
spin_unlock(&entry->lock);
if (new)
free_chunk(new);
@@ -258,7 +258,7 @@ static void untag_chunk(struct node *p)
goto Fallback;
fsnotify_duplicate_mark(&new->mark, entry);
- if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
+ if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.inode, NULL, 1)) {
fsnotify_put_mark(&new->mark);
goto Fallback;
}
@@ -386,7 +386,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk_entry = &chunk->mark;
spin_lock(&old_entry->lock);
- if (!old_entry->i.inode) {
+ if (!old_entry->inode) {
/* old_entry is being shot, lets just lie */
spin_unlock(&old_entry->lock);
fsnotify_put_mark(old_entry);
@@ -395,7 +395,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
}
fsnotify_duplicate_mark(chunk_entry, old_entry);
- if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
+ if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->inode, NULL, 1)) {
spin_unlock(&old_entry->lock);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
@@ -611,7 +611,7 @@ void audit_trim_trees(void)
list_for_each_entry(node, &tree->chunks, list) {
struct audit_chunk *chunk = find_chunk(node);
/* this could be NULL if the watch is dying else where... */
- struct inode *inode = chunk->mark.i.inode;
+ struct inode *inode = chunk->mark.inode;
node->index |= 1U<<31;
if (iterate_mounts(compare_root, inode, root_mnt))
node->index &= ~(1U<<31);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 3598e13f2a65..4f68a326d92e 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -442,19 +442,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
f->type = AUDIT_LOGINUID_SET;
f->val = 0;
- }
-
- if ((f->type == AUDIT_PID) || (f->type == AUDIT_PPID)) {
- struct pid *pid;
- rcu_read_lock();
- pid = find_vpid(f->val);
- if (!pid) {
- rcu_read_unlock();
- err = -ESRCH;
- goto exit_free;
- }
- f->val = pid_nr(pid);
- rcu_read_unlock();
+ entry->rule.pflags |= AUDIT_LOGINUID_LEGACY;
}
err = audit_field_valid(entry, f);
@@ -630,6 +618,13 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->buflen += data->values[i] =
audit_pack_string(&bufp, krule->filterkey);
break;
+ case AUDIT_LOGINUID_SET:
+ if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
+ data->fields[i] = AUDIT_LOGINUID;
+ data->values[i] = AUDIT_UID_UNSET;
+ break;
+ }
+ /* fallthrough if set */
default:
data->values[i] = f->val;
}
@@ -646,6 +641,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
int i;
if (a->flags != b->flags ||
+ a->pflags != b->pflags ||
a->listnr != b->listnr ||
a->action != b->action ||
a->field_count != b->field_count)
@@ -764,6 +760,7 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old)
new = &entry->rule;
new->vers_ops = old->vers_ops;
new->flags = old->flags;
+ new->pflags = old->pflags;
new->listnr = old->listnr;
new->action = old->action;
for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c75522a83678..072566dd0caf 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -72,6 +72,8 @@
#include <linux/fs_struct.h>
#include <linux/compat.h>
#include <linux/ctype.h>
+#include <linux/string.h>
+#include <uapi/linux/limits.h>
#include "audit.h"
@@ -1861,8 +1863,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
}
list_for_each_entry_reverse(n, &context->names_list, list) {
- /* does the name pointer match? */
- if (!n->name || n->name->name != name->name)
+ if (!n->name || strcmp(n->name->name, name->name))
continue;
/* match the correct record type */
@@ -1877,12 +1878,48 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
}
out_alloc:
- /* unable to find the name from a previous getname(). Allocate a new
- * anonymous entry.
- */
- n = audit_alloc_name(context, AUDIT_TYPE_NORMAL);
+ /* unable to find an entry with both a matching name and type */
+ n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN);
if (!n)
return;
+ /* unfortunately, while we may have a path name to record with the
+ * inode, we can't always rely on the string lasting until the end of
+ * the syscall so we need to create our own copy, it may fail due to
+ * memory allocation issues, but we do our best */
+ if (name) {
+ /* we can't use getname_kernel() due to size limits */
+ size_t len = strlen(name->name) + 1;
+ struct filename *new = __getname();
+
+ if (unlikely(!new))
+ goto out;
+
+ if (len <= (PATH_MAX - sizeof(*new))) {
+ new->name = (char *)(new) + sizeof(*new);
+ new->separate = false;
+ } else if (len <= PATH_MAX) {
+ /* this looks odd, but is due to final_putname() */
+ struct filename *new2;
+
+ new2 = kmalloc(sizeof(*new2), GFP_KERNEL);
+ if (unlikely(!new2)) {
+ __putname(new);
+ goto out;
+ }
+ new2->name = (char *)new;
+ new2->separate = true;
+ new = new2;
+ } else {
+ /* we should never get here, but let's be safe */
+ __putname(new);
+ goto out;
+ }
+ strlcpy((char *)new->name, name->name, len);
+ new->uptr = NULL;
+ new->aname = n;
+ n->name = new;
+ n->name_put = true;
+ }
out:
if (parent) {
n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 1adf62b39b96..07ce18ca71e0 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -27,6 +27,9 @@
* version 2. This program is licensed "as is" without any warranty of any
* kind, whether express or implied.
*/
+
+#define pr_fmt(fmt) "KGDB: " fmt
+
#include <linux/pid_namespace.h>
#include <linux/clocksource.h>
#include <linux/serial_core.h>
@@ -196,8 +199,8 @@ int __weak kgdb_validate_break_address(unsigned long addr)
return err;
err = kgdb_arch_remove_breakpoint(&tmp);
if (err)
- printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
- "memory destroyed at: %lx", addr);
+ pr_err("Critical breakpoint error, kernel memory destroyed at: %lx\n",
+ addr);
return err;
}
@@ -256,8 +259,8 @@ int dbg_activate_sw_breakpoints(void)
error = kgdb_arch_set_breakpoint(&kgdb_break[i]);
if (error) {
ret = error;
- printk(KERN_INFO "KGDB: BP install failed: %lx",
- kgdb_break[i].bpt_addr);
+ pr_info("BP install failed: %lx\n",
+ kgdb_break[i].bpt_addr);
continue;
}
@@ -319,8 +322,8 @@ int dbg_deactivate_sw_breakpoints(void)
continue;
error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
if (error) {
- printk(KERN_INFO "KGDB: BP remove failed: %lx\n",
- kgdb_break[i].bpt_addr);
+ pr_info("BP remove failed: %lx\n",
+ kgdb_break[i].bpt_addr);
ret = error;
}
@@ -367,7 +370,7 @@ int dbg_remove_all_break(void)
goto setundefined;
error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
if (error)
- printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
+ pr_err("breakpoint remove failed: %lx\n",
kgdb_break[i].bpt_addr);
setundefined:
kgdb_break[i].state = BP_UNDEFINED;
@@ -400,9 +403,9 @@ static int kgdb_io_ready(int print_wait)
if (print_wait) {
#ifdef CONFIG_KGDB_KDB
if (!dbg_kdb_mode)
- printk(KERN_CRIT "KGDB: waiting... or $3#33 for KDB\n");
+ pr_crit("waiting... or $3#33 for KDB\n");
#else
- printk(KERN_CRIT "KGDB: Waiting for remote debugger\n");
+ pr_crit("Waiting for remote debugger\n");
#endif
}
return 1;
@@ -430,8 +433,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
exception_level = 0;
kgdb_skipexception(ks->ex_vector, ks->linux_regs);
dbg_activate_sw_breakpoints();
- printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n",
- addr);
+ pr_crit("re-enter error: breakpoint removed %lx\n", addr);
WARN_ON_ONCE(1);
return 1;
@@ -444,7 +446,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
panic("Recursive entry to debugger");
}
- printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n");
+ pr_crit("re-enter exception: ALL breakpoints killed\n");
#ifdef CONFIG_KGDB_KDB
/* Allow kdb to debug itself one level */
return 0;
@@ -471,6 +473,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
int cpu;
int trace_on = 0;
int online_cpus = num_online_cpus();
+ u64 time_left;
kgdb_info[ks->cpu].enter_kgdb++;
kgdb_info[ks->cpu].exception_state |= exception_state;
@@ -595,9 +598,13 @@ return_normal:
/*
* Wait for the other CPUs to be notified and be waiting for us:
*/
- while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) +
- atomic_read(&slaves_in_kgdb)) != online_cpus)
+ time_left = loops_per_jiffy * HZ;
+ while (kgdb_do_roundup && --time_left &&
+ (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) !=
+ online_cpus)
cpu_relax();
+ if (!time_left)
+ pr_crit("KGDB: Timed out waiting for secondary CPUs.\n");
/*
* At this point the primary processor is completely
@@ -795,15 +802,15 @@ static struct console kgdbcons = {
static void sysrq_handle_dbg(int key)
{
if (!dbg_io_ops) {
- printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
+ pr_crit("ERROR: No KGDB I/O module available\n");
return;
}
if (!kgdb_connected) {
#ifdef CONFIG_KGDB_KDB
if (!dbg_kdb_mode)
- printk(KERN_CRIT "KGDB or $3#33 for KDB\n");
+ pr_crit("KGDB or $3#33 for KDB\n");
#else
- printk(KERN_CRIT "Entering KGDB\n");
+ pr_crit("Entering KGDB\n");
#endif
}
@@ -945,7 +952,7 @@ static void kgdb_initial_breakpoint(void)
{
kgdb_break_asap = 0;
- printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n");
+ pr_crit("Waiting for connection from remote gdb...\n");
kgdb_breakpoint();
}
@@ -964,8 +971,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
if (dbg_io_ops) {
spin_unlock(&kgdb_registration_lock);
- printk(KERN_ERR "kgdb: Another I/O driver is already "
- "registered with KGDB.\n");
+ pr_err("Another I/O driver is already registered with KGDB\n");
return -EBUSY;
}
@@ -981,8 +987,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
spin_unlock(&kgdb_registration_lock);
- printk(KERN_INFO "kgdb: Registered I/O driver %s.\n",
- new_dbg_io_ops->name);
+ pr_info("Registered I/O driver %s\n", new_dbg_io_ops->name);
/* Arm KGDB now. */
kgdb_register_callbacks();
@@ -1017,8 +1022,7 @@ void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops)
spin_unlock(&kgdb_registration_lock);
- printk(KERN_INFO
- "kgdb: Unregistered I/O driver %s, debugger disabled.\n",
+ pr_info("Unregistered I/O driver %s, debugger disabled\n",
old_dbg_io_ops->name);
}
EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index b20d544f20c2..e1dbf4a2c69e 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -531,22 +531,29 @@ void __init kdb_initbptab(void)
for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++)
bp->bp_free = 1;
- kdb_register_repeat("bp", kdb_bp, "[<vaddr>]",
- "Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
- kdb_register_repeat("bl", kdb_bp, "[<vaddr>]",
- "Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("bp", kdb_bp, "[<vaddr>]",
+ "Set/Display breakpoints", 0,
+ KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("bl", kdb_bp, "[<vaddr>]",
+ "Display breakpoints", 0,
+ KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT)
- kdb_register_repeat("bph", kdb_bp, "[<vaddr>]",
- "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS);
- kdb_register_repeat("bc", kdb_bc, "<bpnum>",
- "Clear Breakpoint", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("be", kdb_bc, "<bpnum>",
- "Enable Breakpoint", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("bd", kdb_bc, "<bpnum>",
- "Disable Breakpoint", 0, KDB_REPEAT_NONE);
-
- kdb_register_repeat("ss", kdb_ss, "",
- "Single Step", 1, KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("bph", kdb_bp, "[<vaddr>]",
+ "[datar [length]|dataw [length]] Set hw brk", 0,
+ KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("bc", kdb_bc, "<bpnum>",
+ "Clear Breakpoint", 0,
+ KDB_ENABLE_FLOW_CTRL);
+ kdb_register_flags("be", kdb_bc, "<bpnum>",
+ "Enable Breakpoint", 0,
+ KDB_ENABLE_FLOW_CTRL);
+ kdb_register_flags("bd", kdb_bc, "<bpnum>",
+ "Disable Breakpoint", 0,
+ KDB_ENABLE_FLOW_CTRL);
+
+ kdb_register_flags("ss", kdb_ss, "",
+ "Single Step", 1,
+ KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
/*
* Architecture dependent initialization.
*/
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index 8859ca34dcfe..15e1a7af5dd0 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -129,6 +129,10 @@ int kdb_stub(struct kgdb_state *ks)
ks->pass_exception = 1;
KDB_FLAG_SET(CATASTROPHIC);
}
+ /* set CATASTROPHIC if the system contains unresponsive processors */
+ for_each_online_cpu(i)
+ if (!kgdb_info[i].enter_kgdb)
+ KDB_FLAG_SET(CATASTROPHIC);
if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) {
KDB_STATE_CLEAR(SSBPT);
KDB_STATE_CLEAR(DOING_SS);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 379650b984f8..f191bddf64b8 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -12,6 +12,7 @@
*/
#include <linux/ctype.h>
+#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/kmsg_dump.h>
@@ -23,6 +24,7 @@
#include <linux/vmalloc.h>
#include <linux/atomic.h>
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
@@ -42,6 +44,12 @@
#include <linux/slab.h>
#include "kdb_private.h"
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "kdb."
+
+static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE;
+module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600);
+
#define GREP_LEN 256
char kdb_grep_string[GREP_LEN];
int kdb_grepping_flag;
@@ -121,6 +129,7 @@ static kdbmsg_t kdbmsgs[] = {
KDBMSG(BADLENGTH, "Invalid length field"),
KDBMSG(NOBP, "No Breakpoint exists"),
KDBMSG(BADADDR, "Invalid address"),
+ KDBMSG(NOPERM, "Permission denied"),
};
#undef KDBMSG
@@ -188,6 +197,26 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
+ * Check whether the flags of the current command and the permissions
+ * of the kdb console has allow a command to be run.
+ */
+static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+ bool no_args)
+{
+ /* permissions comes from userspace so needs massaging slightly */
+ permissions &= KDB_ENABLE_MASK;
+ permissions |= KDB_ENABLE_ALWAYS_SAFE;
+
+ /* some commands change group when launched with no arguments */
+ if (no_args)
+ permissions |= permissions << KDB_ENABLE_NO_ARGS_SHIFT;
+
+ flags |= KDB_ENABLE_ALL;
+
+ return permissions & flags;
+}
+
+/*
* kdbgetenv - This function will return the character string value of
* an environment variable.
* Parameters:
@@ -476,6 +505,15 @@ int kdbgetaddrarg(int argc, const char **argv, int *nextarg,
kdb_symtab_t symtab;
/*
+ * If the enable flags prohibit both arbitrary memory access
+ * and flow control then there are no reasonable grounds to
+ * provide symbol lookup.
+ */
+ if (!kdb_check_flags(KDB_ENABLE_MEM_READ | KDB_ENABLE_FLOW_CTRL,
+ kdb_cmd_enabled, false))
+ return KDB_NOPERM;
+
+ /*
* Process arguments which follow the following syntax:
*
* symbol | numeric-address [+/- numeric-offset]
@@ -641,8 +679,13 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)
if (!s->count)
s->usable = 0;
if (s->usable)
- kdb_register(s->name, kdb_exec_defcmd,
- s->usage, s->help, 0);
+ /* macros are always safe because when executed each
+ * internal command re-enters kdb_parse() and is
+ * safety checked individually.
+ */
+ kdb_register_flags(s->name, kdb_exec_defcmd, s->usage,
+ s->help, 0,
+ KDB_ENABLE_ALWAYS_SAFE);
return 0;
}
if (!s->usable)
@@ -1003,25 +1046,22 @@ int kdb_parse(const char *cmdstr)
if (i < kdb_max_commands) {
int result;
+
+ if (!kdb_check_flags(tp->cmd_flags, kdb_cmd_enabled, argc <= 1))
+ return KDB_NOPERM;
+
KDB_STATE_SET(CMD);
result = (*tp->cmd_func)(argc-1, (const char **)argv);
if (result && ignore_errors && result > KDB_CMD_GO)
result = 0;
KDB_STATE_CLEAR(CMD);
- switch (tp->cmd_repeat) {
- case KDB_REPEAT_NONE:
- argc = 0;
- if (argv[0])
- *(argv[0]) = '\0';
- break;
- case KDB_REPEAT_NO_ARGS:
- argc = 1;
- if (argv[1])
- *(argv[1]) = '\0';
- break;
- case KDB_REPEAT_WITH_ARGS:
- break;
- }
+
+ if (tp->cmd_flags & KDB_REPEAT_WITH_ARGS)
+ return result;
+
+ argc = tp->cmd_flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
+ if (argv[argc])
+ *(argv[argc]) = '\0';
return result;
}
@@ -1921,10 +1961,14 @@ static int kdb_rm(int argc, const char **argv)
*/
static int kdb_sr(int argc, const char **argv)
{
+ bool check_mask =
+ !kdb_check_flags(KDB_ENABLE_ALL, kdb_cmd_enabled, false);
+
if (argc != 1)
return KDB_ARGCOUNT;
+
kdb_trap_printk++;
- __handle_sysrq(*argv[1], false);
+ __handle_sysrq(*argv[1], check_mask);
kdb_trap_printk--;
return 0;
@@ -2157,6 +2201,8 @@ static void kdb_cpu_status(void)
for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
if (!cpu_online(i)) {
state = 'F'; /* cpu is offline */
+ } else if (!kgdb_info[i].enter_kgdb) {
+ state = 'D'; /* cpu is online but unresponsive */
} else {
state = ' '; /* cpu is responding to kdb */
if (kdb_task_state_char(KDB_TSK(i)) == 'I')
@@ -2210,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv)
/*
* Validate cpunum
*/
- if ((cpunum > NR_CPUS) || !cpu_online(cpunum))
+ if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
return KDB_BADCPUNUM;
dbg_switch_cpu = cpunum;
@@ -2375,6 +2421,8 @@ static int kdb_help(int argc, const char **argv)
return 0;
if (!kt->cmd_name)
continue;
+ if (!kdb_check_flags(kt->cmd_flags, kdb_cmd_enabled, true))
+ continue;
if (strlen(kt->cmd_usage) > 20)
space = "\n ";
kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name,
@@ -2629,7 +2677,7 @@ static int kdb_grep_help(int argc, const char **argv)
}
/*
- * kdb_register_repeat - This function is used to register a kernel
+ * kdb_register_flags - This function is used to register a kernel
* debugger command.
* Inputs:
* cmd Command name
@@ -2641,12 +2689,12 @@ static int kdb_grep_help(int argc, const char **argv)
* zero for success, one if a duplicate command.
*/
#define kdb_command_extend 50 /* arbitrary */
-int kdb_register_repeat(char *cmd,
- kdb_func_t func,
- char *usage,
- char *help,
- short minlen,
- kdb_repeat_t repeat)
+int kdb_register_flags(char *cmd,
+ kdb_func_t func,
+ char *usage,
+ char *help,
+ short minlen,
+ kdb_cmdflags_t flags)
{
int i;
kdbtab_t *kp;
@@ -2694,19 +2742,18 @@ int kdb_register_repeat(char *cmd,
kp->cmd_func = func;
kp->cmd_usage = usage;
kp->cmd_help = help;
- kp->cmd_flags = 0;
kp->cmd_minlen = minlen;
- kp->cmd_repeat = repeat;
+ kp->cmd_flags = flags;
return 0;
}
-EXPORT_SYMBOL_GPL(kdb_register_repeat);
+EXPORT_SYMBOL_GPL(kdb_register_flags);
/*
* kdb_register - Compatibility register function for commands that do
* not need to specify a repeat state. Equivalent to
- * kdb_register_repeat with KDB_REPEAT_NONE.
+ * kdb_register_flags with flags set to 0.
* Inputs:
* cmd Command name
* func Function to execute the command
@@ -2721,8 +2768,7 @@ int kdb_register(char *cmd,
char *help,
short minlen)
{
- return kdb_register_repeat(cmd, func, usage, help, minlen,
- KDB_REPEAT_NONE);
+ return kdb_register_flags(cmd, func, usage, help, minlen, 0);
}
EXPORT_SYMBOL_GPL(kdb_register);
@@ -2764,80 +2810,109 @@ static void __init kdb_inittab(void)
for_each_kdbcmd(kp, i)
kp->cmd_name = NULL;
- kdb_register_repeat("md", kdb_md, "<vaddr>",
+ kdb_register_flags("md", kdb_md, "<vaddr>",
"Display Memory Contents, also mdWcN, e.g. md8c1", 1,
- KDB_REPEAT_NO_ARGS);
- kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>",
- "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
- kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>",
- "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
- kdb_register_repeat("mds", kdb_md, "<vaddr>",
- "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
- kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>",
- "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
- kdb_register_repeat("go", kdb_go, "[<vaddr>]",
- "Continue Execution", 1, KDB_REPEAT_NONE);
- kdb_register_repeat("rd", kdb_rd, "",
- "Display Registers", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("rm", kdb_rm, "<reg> <contents>",
- "Modify Registers", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("ef", kdb_ef, "<vaddr>",
- "Display exception frame", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("bt", kdb_bt, "[<vaddr>]",
- "Stack traceback", 1, KDB_REPEAT_NONE);
- kdb_register_repeat("btp", kdb_bt, "<pid>",
- "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]",
- "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("btc", kdb_bt, "",
- "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("btt", kdb_bt, "<vaddr>",
+ KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("mdr", kdb_md, "<vaddr> <bytes>",
+ "Display Raw Memory", 0,
+ KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("mdp", kdb_md, "<paddr> <bytes>",
+ "Display Physical Memory", 0,
+ KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("mds", kdb_md, "<vaddr>",
+ "Display Memory Symbolically", 0,
+ KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("mm", kdb_mm, "<vaddr> <contents>",
+ "Modify Memory Contents", 0,
+ KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS);
+ kdb_register_flags("go", kdb_go, "[<vaddr>]",
+ "Continue Execution", 1,
+ KDB_ENABLE_REG_WRITE | KDB_ENABLE_ALWAYS_SAFE_NO_ARGS);
+ kdb_register_flags("rd", kdb_rd, "",
+ "Display Registers", 0,
+ KDB_ENABLE_REG_READ);
+ kdb_register_flags("rm", kdb_rm, "<reg> <contents>",
+ "Modify Registers", 0,
+ KDB_ENABLE_REG_WRITE);
+ kdb_register_flags("ef", kdb_ef, "<vaddr>",
+ "Display exception frame", 0,
+ KDB_ENABLE_MEM_READ);
+ kdb_register_flags("bt", kdb_bt, "[<vaddr>]",
+ "Stack traceback", 1,
+ KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS);
+ kdb_register_flags("btp", kdb_bt, "<pid>",
+ "Display stack for process <pid>", 0,
+ KDB_ENABLE_INSPECT);
+ kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]",
+ "Backtrace all processes matching state flag", 0,
+ KDB_ENABLE_INSPECT);
+ kdb_register_flags("btc", kdb_bt, "",
+ "Backtrace current process on each cpu", 0,
+ KDB_ENABLE_INSPECT);
+ kdb_register_flags("btt", kdb_bt, "<vaddr>",
"Backtrace process given its struct task address", 0,
- KDB_REPEAT_NONE);
- kdb_register_repeat("env", kdb_env, "",
- "Show environment variables", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("set", kdb_set, "",
- "Set environment variables", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("help", kdb_help, "",
- "Display Help Message", 1, KDB_REPEAT_NONE);
- kdb_register_repeat("?", kdb_help, "",
- "Display Help Message", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("cpu", kdb_cpu, "<cpunum>",
- "Switch to new cpu", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("kgdb", kdb_kgdb, "",
- "Enter kgdb mode", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("ps", kdb_ps, "[<flags>|A]",
- "Display active task list", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("pid", kdb_pid, "<pidnum>",
- "Switch to another task", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("reboot", kdb_reboot, "",
- "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
+ KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS);
+ kdb_register_flags("env", kdb_env, "",
+ "Show environment variables", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
+ kdb_register_flags("set", kdb_set, "",
+ "Set environment variables", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
+ kdb_register_flags("help", kdb_help, "",
+ "Display Help Message", 1,
+ KDB_ENABLE_ALWAYS_SAFE);
+ kdb_register_flags("?", kdb_help, "",
+ "Display Help Message", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
+ kdb_register_flags("cpu", kdb_cpu, "<cpunum>",
+ "Switch to new cpu", 0,
+ KDB_ENABLE_ALWAYS_SAFE_NO_ARGS);
+ kdb_register_flags("kgdb", kdb_kgdb, "",
+ "Enter kgdb mode", 0, 0);
+ kdb_register_flags("ps", kdb_ps, "[<flags>|A]",
+ "Display active task list", 0,
+ KDB_ENABLE_INSPECT);
+ kdb_register_flags("pid", kdb_pid, "<pidnum>",
+ "Switch to another task", 0,
+ KDB_ENABLE_INSPECT);
+ kdb_register_flags("reboot", kdb_reboot, "",
+ "Reboot the machine immediately", 0,
+ KDB_ENABLE_REBOOT);
#if defined(CONFIG_MODULES)
- kdb_register_repeat("lsmod", kdb_lsmod, "",
- "List loaded kernel modules", 0, KDB_REPEAT_NONE);
+ kdb_register_flags("lsmod", kdb_lsmod, "",
+ "List loaded kernel modules", 0,
+ KDB_ENABLE_INSPECT);
#endif
#if defined(CONFIG_MAGIC_SYSRQ)
- kdb_register_repeat("sr", kdb_sr, "<key>",
- "Magic SysRq key", 0, KDB_REPEAT_NONE);
+ kdb_register_flags("sr", kdb_sr, "<key>",
+ "Magic SysRq key", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
#endif
#if defined(CONFIG_PRINTK)
- kdb_register_repeat("dmesg", kdb_dmesg, "[lines]",
- "Display syslog buffer", 0, KDB_REPEAT_NONE);
+ kdb_register_flags("dmesg", kdb_dmesg, "[lines]",
+ "Display syslog buffer", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
#endif
if (arch_kgdb_ops.enable_nmi) {
- kdb_register_repeat("disable_nmi", kdb_disable_nmi, "",
- "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE);
- }
- kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
- "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>",
- "Send a signal to a process", 0, KDB_REPEAT_NONE);
- kdb_register_repeat("summary", kdb_summary, "",
- "Summarize the system", 4, KDB_REPEAT_NONE);
- kdb_register_repeat("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
- "Display per_cpu variables", 3, KDB_REPEAT_NONE);
- kdb_register_repeat("grephelp", kdb_grep_help, "",
- "Display help on | grep", 0, KDB_REPEAT_NONE);
+ kdb_register_flags("disable_nmi", kdb_disable_nmi, "",
+ "Disable NMI entry to KDB", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
+ }
+ kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
+ "Define a set of commands, down to endefcmd", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
+ kdb_register_flags("kill", kdb_kill, "<-signal> <pid>",
+ "Send a signal to a process", 0,
+ KDB_ENABLE_SIGNAL);
+ kdb_register_flags("summary", kdb_summary, "",
+ "Summarize the system", 4,
+ KDB_ENABLE_ALWAYS_SAFE);
+ kdb_register_flags("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
+ "Display per_cpu variables", 3,
+ KDB_ENABLE_MEM_READ);
+ kdb_register_flags("grephelp", kdb_grep_help, "",
+ "Display help on | grep", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
}
/* Execute any commands defined in kdb_cmds. */
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 7afd3c8c41d5..eaacd1693954 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -172,10 +172,9 @@ typedef struct _kdbtab {
kdb_func_t cmd_func; /* Function to execute command */
char *cmd_usage; /* Usage String for this command */
char *cmd_help; /* Help message for this command */
- short cmd_flags; /* Parsing flags */
short cmd_minlen; /* Minimum legal # command
* chars required */
- kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */
+ kdb_cmdflags_t cmd_flags; /* Command behaviour flags */
} kdbtab_t;
extern int kdb_bt(int, const char **); /* KDB display back trace */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 113b837470cd..882f835a0d85 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4461,18 +4461,14 @@ perf_output_sample_regs(struct perf_output_handle *handle,
}
static void perf_sample_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs)
+ struct pt_regs *regs,
+ struct pt_regs *regs_user_copy)
{
- if (!user_mode(regs)) {
- if (current->mm)
- regs = task_pt_regs(current);
- else
- regs = NULL;
- }
-
- if (regs) {
- regs_user->abi = perf_reg_abi(current);
+ if (user_mode(regs)) {
+ regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
+ } else if (current->mm) {
+ perf_get_regs_user(regs_user, regs, regs_user_copy);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
regs_user->regs = NULL;
@@ -4951,7 +4947,8 @@ void perf_prepare_sample(struct perf_event_header *header,
}
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
- perf_sample_regs_user(&data->regs_user, regs);
+ perf_sample_regs_user(&data->regs_user, regs,
+ &data->regs_user_copy);
if (sample_type & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */
@@ -7477,11 +7474,11 @@ SYSCALL_DEFINE5(perf_event_open,
if (move_group) {
synchronize_rcu();
- perf_install_in_context(ctx, group_leader, event->cpu);
+ perf_install_in_context(ctx, group_leader, group_leader->cpu);
get_ctx(ctx);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
- perf_install_in_context(ctx, sibling, event->cpu);
+ perf_install_in_context(ctx, sibling, sibling->cpu);
get_ctx(ctx);
}
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ed8f2cde34c5..cb346f26a22d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -193,7 +193,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
}
flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush(vma, addr, ptep);
+ ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
@@ -724,14 +724,14 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
int more = 0;
again:
- mutex_lock(&mapping->i_mmap_mutex);
+ i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
if (!valid_vma(vma, is_register))
continue;
if (!prev && !more) {
/*
- * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
+ * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through
* reclaim. This is optimistic, no harm done if it fails.
*/
prev = kmalloc(sizeof(struct map_info),
@@ -755,7 +755,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
info->mm = vma->vm_mm;
info->vaddr = offset_to_vaddr(vma, offset);
}
- mutex_unlock(&mapping->i_mmap_mutex);
+ i_mmap_unlock_read(mapping);
if (!more)
goto out;
diff --git a/kernel/exit.c b/kernel/exit.c
index 8714e5ded8b4..6806c55475ee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -212,27 +212,6 @@ repeat:
}
/*
- * This checks not only the pgrp, but falls back on the pid if no
- * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
- * without this...
- *
- * The caller must hold rcu lock or the tasklist lock.
- */
-struct pid *session_of_pgrp(struct pid *pgrp)
-{
- struct task_struct *p;
- struct pid *sid = NULL;
-
- p = pid_task(pgrp, PIDTYPE_PGID);
- if (p == NULL)
- p = pid_task(pgrp, PIDTYPE_PID);
- if (p != NULL)
- sid = task_session(p);
-
- return sid;
-}
-
-/*
* Determine if a process group is "orphaned", according to the POSIX
* definition in 2.2.2.52. Orphaned process groups are not to be affected
* by terminal-generated stop signals. Newly orphaned process groups are
@@ -1308,9 +1287,15 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
static int wait_consider_task(struct wait_opts *wo, int ptrace,
struct task_struct *p)
{
+ /*
+ * We can race with wait_task_zombie() from another thread.
+ * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
+ * can't confuse the checks below.
+ */
+ int exit_state = ACCESS_ONCE(p->exit_state);
int ret;
- if (unlikely(p->exit_state == EXIT_DEAD))
+ if (unlikely(exit_state == EXIT_DEAD))
return 0;
ret = eligible_child(wo, p);
@@ -1331,7 +1316,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
return 0;
}
- if (unlikely(p->exit_state == EXIT_TRACE)) {
+ if (unlikely(exit_state == EXIT_TRACE)) {
/*
* ptrace == 0 means we are the natural parent. In this case
* we should clear notask_error, debugger will notify us.
@@ -1358,7 +1343,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
}
/* slay zombie? */
- if (p->exit_state == EXIT_ZOMBIE) {
+ if (exit_state == EXIT_ZOMBIE) {
/* we don't reap group leaders with subthreads */
if (!delay_group_leader(p)) {
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 9ca84189cfc2..4dc2ddade9f1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -433,7 +433,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
- mutex_lock(&mapping->i_mmap_mutex);
+ i_mmap_lock_write(mapping);
if (tmp->vm_flags & VM_SHARED)
atomic_inc(&mapping->i_mmap_writable);
flush_dcache_mmap_lock(mapping);
@@ -445,7 +445,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
vma_interval_tree_insert_after(tmp, mpnt,
&mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
- mutex_unlock(&mapping->i_mmap_mutex);
+ i_mmap_unlock_write(mapping);
}
/*
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 3b7408759bdf..c92e44855ddd 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -32,10 +32,13 @@ config GCOV_KERNEL
Note that the debugfs filesystem has to be mounted to access
profiling data.
+config ARCH_HAS_GCOV_PROFILE_ALL
+ def_bool n
+
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
depends on GCOV_KERNEL
- depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64
+ depends on ARCH_HAS_GCOV_PROFILE_ALL
default n
---help---
This options activates profiling for the entire kernel.
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f86cfa..664411f171b5 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/user_namespace.h>
#include <asm/uaccess.h>
/* init to 2 - one for init_task, one to ensure it is never freed */
@@ -213,6 +214,14 @@ out:
return i;
}
+bool may_setgroups(void)
+{
+ struct user_namespace *user_ns = current_user_ns();
+
+ return ns_capable(user_ns, CAP_SETGID) &&
+ userns_may_setgroups(user_ns);
+}
+
/*
* SMP: Our groups are copy-on-write. We can set them safely
* without another task interfering.
@@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!ns_capable(current_user_ns(), CAP_SETGID))
+ if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d766619d..df553b0af936 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -78,8 +78,12 @@ extern void unmask_threaded_irq(struct irq_desc *desc);
#ifdef CONFIG_SPARSE_IRQ
static inline void irq_mark_irq(unsigned int irq) { }
+extern void irq_lock_sparse(void);
+extern void irq_unlock_sparse(void);
#else
extern void irq_mark_irq(unsigned int irq);
+static inline void irq_lock_sparse(void) { }
+static inline void irq_unlock_sparse(void) { }
#endif
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a1782f88f0af..99793b9b6d23 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -132,6 +132,16 @@ static void free_masks(struct irq_desc *desc)
static inline void free_masks(struct irq_desc *desc) { }
#endif
+void irq_lock_sparse(void)
+{
+ mutex_lock(&sparse_irq_lock);
+}
+
+void irq_unlock_sparse(void)
+{
+ mutex_unlock(&sparse_irq_lock);
+}
+
static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
{
struct irq_desc *desc;
@@ -168,6 +178,12 @@ static void free_desc(unsigned int irq)
unregister_irq_proc(irq, desc);
+ /*
+ * sparse_irq_lock protects also show_interrupts() and
+ * kstat_irq_usr(). Once we deleted the descriptor from the
+ * sparse tree we can free it. Access in proc will fail to
+ * lookup the descriptor.
+ */
mutex_lock(&sparse_irq_lock);
delete_irq_desc(irq);
mutex_unlock(&sparse_irq_lock);
@@ -574,6 +590,15 @@ void kstat_incr_irq_this_cpu(unsigned int irq)
kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
}
+/**
+ * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
+ * @irq: The interrupt number
+ * @cpu: The cpu number
+ *
+ * Returns the sum of interrupt counts on @cpu since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -582,6 +607,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}
+/**
+ * kstat_irqs - Get the statistics for an interrupt
+ * @irq: The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
unsigned int kstat_irqs(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -594,3 +627,22 @@ unsigned int kstat_irqs(unsigned int irq)
sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
return sum;
}
+
+/**
+ * kstat_irqs_usr - Get the statistics for an interrupt
+ * @irq: The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. Contrary to kstat_irqs() this can be called from any
+ * preemptible context. It's protected against concurrent removal of
+ * an interrupt descriptor when sparse irqs are enabled.
+ */
+unsigned int kstat_irqs_usr(unsigned int irq)
+{
+ int sum;
+
+ irq_lock_sparse();
+ sum = kstat_irqs(irq);
+ irq_unlock_sparse();
+ return sum;
+}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index ac1ba2f11032..9dc9bfd8a678 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -15,6 +15,23 @@
#include "internals.h"
+/*
+ * Access rules:
+ *
+ * procfs protects read/write of /proc/irq/N/ files against a
+ * concurrent free of the interrupt descriptor. remove_proc_entry()
+ * immediately prevents new read/writes to happen and waits for
+ * already running read/write functions to complete.
+ *
+ * We remove the proc entries first and then delete the interrupt
+ * descriptor from the radix tree and free it. So it is guaranteed
+ * that irq_to_desc(N) is valid as long as the read/writes are
+ * permitted by procfs.
+ *
+ * The read from /proc/interrupts is a different problem because there
+ * is no protection. So the lookup and the access to irqdesc
+ * information must be protected by sparse_irq_lock.
+ */
static struct proc_dir_entry *root_irq_dir;
#ifdef CONFIG_SMP
@@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
}
+ irq_lock_sparse();
desc = irq_to_desc(i);
if (!desc)
- return 0;
+ goto outsparse;
raw_spin_lock_irqsave(&desc->lock, flags);
for_each_online_cpu(j)
@@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
out:
raw_spin_unlock_irqrestore(&desc->lock, flags);
+outsparse:
+ irq_unlock_sparse();
return 0;
}
#endif
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 2abf9f6e9a61..9a8a01abbaed 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -600,7 +600,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
if (!kexec_on_panic) {
image->swap_page = kimage_alloc_control_pages(image, 0);
if (!image->swap_page) {
- pr_err(KERN_ERR "Could not allocate swap buffer\n");
+ pr_err("Could not allocate swap buffer\n");
goto out_free_control_pages;
}
}
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 5cf6731b98e9..3ef3736002d8 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock)
DEBUG_LOCKS_WARN_ON(lock->owner != current);
DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
- mutex_clear_owner(lock);
}
/*
* __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
* mutexes so that we can do it here after we've verified state.
*/
+ mutex_clear_owner(lock);
atomic_set(&lock->count, 1);
}
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 4b082b5cac9e..db3ccb1dd614 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -363,6 +363,14 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(_raw_spin_lock_nested);
+void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
+{
+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
+}
+EXPORT_SYMBOL(_raw_spin_lock_bh_nested);
+
unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
int subclass)
{
diff --git a/kernel/module.c b/kernel/module.c
index e52a8739361a..3965511ae133 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -42,7 +42,6 @@
#include <linux/vermagic.h>
#include <linux/notifier.h>
#include <linux/sched.h>
-#include <linux/stop_machine.h>
#include <linux/device.h>
#include <linux/string.h>
#include <linux/mutex.h>
@@ -98,7 +97,7 @@
* 1) List of modules (also safely readable with preempt_disable),
* 2) module_use links,
* 3) module_addr_min/module_addr_max.
- * (delete uses stop_machine/add uses RCU list operations). */
+ * (delete and add uses RCU list operations). */
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
@@ -158,13 +157,13 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list);
* Protected by module_mutex. */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-int register_module_notifier(struct notifier_block * nb)
+int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
}
EXPORT_SYMBOL(register_module_notifier);
-int unregister_module_notifier(struct notifier_block * nb)
+int unregister_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&module_notify_list, nb);
}
@@ -628,18 +627,23 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
EXPORT_TRACEPOINT_SYMBOL(module_get);
+/* MODULE_REF_BASE is the base reference count by kmodule loader. */
+#define MODULE_REF_BASE 1
+
/* Init the unload section of the module. */
static int module_unload_init(struct module *mod)
{
- mod->refptr = alloc_percpu(struct module_ref);
- if (!mod->refptr)
- return -ENOMEM;
+ /*
+ * Initialize reference counter to MODULE_REF_BASE.
+ * refcnt == 0 means module is going.
+ */
+ atomic_set(&mod->refcnt, MODULE_REF_BASE);
INIT_LIST_HEAD(&mod->source_list);
INIT_LIST_HEAD(&mod->target_list);
/* Hold reference count during initialization. */
- raw_cpu_write(mod->refptr->incs, 1);
+ atomic_inc(&mod->refcnt);
return 0;
}
@@ -721,8 +725,6 @@ static void module_unload_free(struct module *mod)
kfree(use);
}
mutex_unlock(&module_mutex);
-
- free_percpu(mod->refptr);
}
#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -740,60 +742,39 @@ static inline int try_force_unload(unsigned int flags)
}
#endif /* CONFIG_MODULE_FORCE_UNLOAD */
-struct stopref
+/* Try to release refcount of module, 0 means success. */
+static int try_release_module_ref(struct module *mod)
{
- struct module *mod;
- int flags;
- int *forced;
-};
+ int ret;
-/* Whole machine is stopped with interrupts off when this runs. */
-static int __try_stop_module(void *_sref)
-{
- struct stopref *sref = _sref;
+ /* Try to decrement refcnt which we set at loading */
+ ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt);
+ BUG_ON(ret < 0);
+ if (ret)
+ /* Someone can put this right now, recover with checking */
+ ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0);
+
+ return ret;
+}
+static int try_stop_module(struct module *mod, int flags, int *forced)
+{
/* If it's not unused, quit unless we're forcing. */
- if (module_refcount(sref->mod) != 0) {
- if (!(*sref->forced = try_force_unload(sref->flags)))
+ if (try_release_module_ref(mod) != 0) {
+ *forced = try_force_unload(flags);
+ if (!(*forced))
return -EWOULDBLOCK;
}
/* Mark it as dying. */
- sref->mod->state = MODULE_STATE_GOING;
- return 0;
-}
-
-static int try_stop_module(struct module *mod, int flags, int *forced)
-{
- struct stopref sref = { mod, flags, forced };
+ mod->state = MODULE_STATE_GOING;
- return stop_machine(__try_stop_module, &sref, NULL);
+ return 0;
}
unsigned long module_refcount(struct module *mod)
{
- unsigned long incs = 0, decs = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- decs += per_cpu_ptr(mod->refptr, cpu)->decs;
- /*
- * ensure the incs are added up after the decs.
- * module_put ensures incs are visible before decs with smp_wmb.
- *
- * This 2-count scheme avoids the situation where the refcount
- * for CPU0 is read, then CPU0 increments the module refcount,
- * then CPU1 drops that refcount, then the refcount for CPU1 is
- * read. We would record a decrement but not its corresponding
- * increment so we would see a low count (disaster).
- *
- * Rare situation? But module_refcount can be preempted, and we
- * might be tallying up 4096+ CPUs. So it is not impossible.
- */
- smp_rmb();
- for_each_possible_cpu(cpu)
- incs += per_cpu_ptr(mod->refptr, cpu)->incs;
- return incs - decs;
+ return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE;
}
EXPORT_SYMBOL(module_refcount);
@@ -877,8 +858,10 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
seq_printf(m, " %lu ", module_refcount(mod));
- /* Always include a trailing , so userspace can differentiate
- between this and the old multi-field proc format. */
+ /*
+ * Always include a trailing , so userspace can differentiate
+ * between this and the old multi-field proc format.
+ */
list_for_each_entry(use, &mod->source_list, source_list) {
printed_something = 1;
seq_printf(m, "%s,", use->source->name);
@@ -886,11 +869,11 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
if (mod->init != NULL && mod->exit == NULL) {
printed_something = 1;
- seq_printf(m, "[permanent],");
+ seq_puts(m, "[permanent],");
}
if (!printed_something)
- seq_printf(m, "-");
+ seq_puts(m, "-");
}
void __symbol_put(const char *symbol)
@@ -935,7 +918,7 @@ void __module_get(struct module *module)
{
if (module) {
preempt_disable();
- __this_cpu_inc(module->refptr->incs);
+ atomic_inc(&module->refcnt);
trace_module_get(module, _RET_IP_);
preempt_enable();
}
@@ -948,11 +931,11 @@ bool try_module_get(struct module *module)
if (module) {
preempt_disable();
-
- if (likely(module_is_live(module))) {
- __this_cpu_inc(module->refptr->incs);
+ /* Note: here, we can fail to get a reference */
+ if (likely(module_is_live(module) &&
+ atomic_inc_not_zero(&module->refcnt) != 0))
trace_module_get(module, _RET_IP_);
- } else
+ else
ret = false;
preempt_enable();
@@ -963,11 +946,12 @@ EXPORT_SYMBOL(try_module_get);
void module_put(struct module *module)
{
+ int ret;
+
if (module) {
preempt_disable();
- smp_wmb(); /* see comment in module_refcount */
- __this_cpu_inc(module->refptr->decs);
-
+ ret = atomic_dec_if_positive(&module->refcnt);
+ WARN_ON(ret < 0); /* Failed to put refcount */
trace_module_put(module, _RET_IP_);
preempt_enable();
}
@@ -978,7 +962,7 @@ EXPORT_SYMBOL(module_put);
static inline void print_unload_info(struct seq_file *m, struct module *mod)
{
/* We don't know the usage count, or what modules are using. */
- seq_printf(m, " - -");
+ seq_puts(m, " - -");
}
static inline void module_unload_free(struct module *mod)
@@ -1131,7 +1115,7 @@ static unsigned long maybe_relocated(unsigned long crc,
static int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
- struct module *mod,
+ struct module *mod,
const unsigned long *crc,
const struct module *crc_owner)
{
@@ -1165,7 +1149,7 @@ static int check_version(Elf_Shdr *sechdrs,
return 0;
bad_version:
- printk("%s: disagrees about version of symbol %s\n",
+ pr_warn("%s: disagrees about version of symbol %s\n",
mod->name, symname);
return 0;
}
@@ -1200,7 +1184,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
static inline int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
- struct module *mod,
+ struct module *mod,
const unsigned long *crc,
const struct module *crc_owner)
{
@@ -1288,15 +1272,13 @@ static inline bool sect_empty(const Elf_Shdr *sect)
return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
}
-struct module_sect_attr
-{
+struct module_sect_attr {
struct module_attribute mattr;
char *name;
unsigned long address;
};
-struct module_sect_attrs
-{
+struct module_sect_attrs {
struct attribute_group grp;
unsigned int nsections;
struct module_sect_attr attrs[0];
@@ -1550,7 +1532,8 @@ static int module_add_modinfo_attrs(struct module *mod)
(attr->test && attr->test(mod))) {
memcpy(temp_attr, attr, sizeof(*temp_attr));
sysfs_attr_init(&temp_attr->attr);
- error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
+ error = sysfs_create_file(&mod->mkobj.kobj,
+ &temp_attr->attr);
++temp_attr;
}
}
@@ -1566,7 +1549,7 @@ static void module_remove_modinfo_attrs(struct module *mod)
/* pick a field to test for end of list */
if (!attr->attr.name)
break;
- sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
+ sysfs_remove_file(&mod->mkobj.kobj, &attr->attr);
if (attr->free)
attr->free(mod);
}
@@ -1697,18 +1680,6 @@ static void mod_sysfs_teardown(struct module *mod)
mod_sysfs_fini(mod);
}
-/*
- * unlink the module with the whole machine is stopped with interrupts off
- * - this defends against kallsyms not taking locks
- */
-static int __unlink_module(void *_mod)
-{
- struct module *mod = _mod;
- list_del(&mod->list);
- module_bug_cleanup(mod);
- return 0;
-}
-
#ifdef CONFIG_DEBUG_SET_MODULE_RONX
/*
* LKM RO/NX protection: protect module's text/ro-data
@@ -1860,7 +1831,12 @@ static void free_module(struct module *mod)
/* Now we can delete it from the lists */
mutex_lock(&module_mutex);
- stop_machine(__unlink_module, mod, NULL);
+ /* Unlink carefully: kallsyms could be walking list. */
+ list_del_rcu(&mod->list);
+ /* Remove this module from bug list, this uses list_del_rcu */
+ module_bug_cleanup(mod);
+ /* Wait for RCU synchronizing before releasing mod->list and buglist. */
+ synchronize_rcu();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
@@ -1955,7 +1931,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
/* We compiled with -fno-common. These are not
supposed to happen. */
pr_debug("Common symbol: %s\n", name);
- printk("%s: please compile with -fno-common\n",
+ pr_warn("%s: please compile with -fno-common\n",
mod->name);
ret = -ENOEXEC;
break;
@@ -2259,7 +2235,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info)
}
static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
- unsigned int shnum)
+ unsigned int shnum)
{
const Elf_Shdr *sec;
@@ -2735,7 +2711,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
* This shouldn't happen with same compiler and binutils
* building all parts of the module.
*/
- printk(KERN_WARNING "%s: has both .ctors and .init_array.\n",
+ pr_warn("%s: has both .ctors and .init_array.\n",
mod->name);
return -EINVAL;
}
@@ -3023,8 +2999,10 @@ static int do_init_module(struct module *mod)
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
if (ret < 0) {
- /* Init routine failed: abort. Try to protect us from
- buggy refcounters. */
+ /*
+ * Init routine failed: abort. Try to protect us from
+ * buggy refcounters.
+ */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
@@ -3202,7 +3180,7 @@ out:
static int unknown_module_param_cb(char *param, char *val, const char *modname)
{
- /* Check for magic 'dyndbg' arg */
+ /* Check for magic 'dyndbg' arg */
int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
if (ret != 0)
pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
@@ -3352,6 +3330,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
+ /* Wait for RCU synchronizing before releasing mod->list. */
+ synchronize_rcu();
mutex_unlock(&module_mutex);
free_module:
module_deallocate(mod, info);
@@ -3685,8 +3665,8 @@ static int m_show(struct seq_file *m, void *p)
/* Informative for users. */
seq_printf(m, " %s",
- mod->state == MODULE_STATE_GOING ? "Unloading":
- mod->state == MODULE_STATE_COMING ? "Loading":
+ mod->state == MODULE_STATE_GOING ? "Unloading" :
+ mod->state == MODULE_STATE_COMING ? "Loading" :
"Live");
/* Used by oprofile and other similar tools. */
seq_printf(m, " 0x%pK", mod->module_core);
@@ -3695,7 +3675,7 @@ static int m_show(struct seq_file *m, void *p)
if (mod->taints)
seq_printf(m, " %s", module_flags(mod, buf));
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
return 0;
}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index ef42d0ab3115..49746c81ad8d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -220,11 +220,10 @@ void exit_task_namespaces(struct task_struct *p)
SYSCALL_DEFINE2(setns, int, fd, int, nstype)
{
- const struct proc_ns_operations *ops;
struct task_struct *tsk = current;
struct nsproxy *new_nsproxy;
- struct proc_ns *ei;
struct file *file;
+ struct ns_common *ns;
int err;
file = proc_ns_fget(fd);
@@ -232,9 +231,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
return PTR_ERR(file);
err = -EINVAL;
- ei = get_proc_ns(file_inode(file));
- ops = ei->ns_ops;
- if (nstype && (ops->type != nstype))
+ ns = get_proc_ns(file_inode(file));
+ if (nstype && (ns->ops->type != nstype))
goto out;
new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
@@ -243,7 +241,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
- err = ops->install(new_nsproxy, ei->ns);
+ err = ns->ops->install(new_nsproxy, ns);
if (err) {
free_nsproxy(new_nsproxy);
goto out;
diff --git a/kernel/params.c b/kernel/params.c
index db97b791390f..0af9b2c4e56c 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -603,74 +603,67 @@ static __modinit int add_sysfs_param(struct module_kobject *mk,
const struct kernel_param *kp,
const char *name)
{
- struct module_param_attrs *new;
- struct attribute **attrs;
- int err, num;
+ struct module_param_attrs *new_mp;
+ struct attribute **new_attrs;
+ unsigned int i;
/* We don't bother calling this with invisible parameters. */
BUG_ON(!kp->perm);
if (!mk->mp) {
- num = 0;
- attrs = NULL;
- } else {
- num = mk->mp->num;
- attrs = mk->mp->grp.attrs;
+ /* First allocation. */
+ mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL);
+ if (!mk->mp)
+ return -ENOMEM;
+ mk->mp->grp.name = "parameters";
+ /* NULL-terminated attribute array. */
+ mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]),
+ GFP_KERNEL);
+ /* Caller will cleanup via free_module_param_attrs */
+ if (!mk->mp->grp.attrs)
+ return -ENOMEM;
}
- /* Enlarge. */
- new = krealloc(mk->mp,
- sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
- GFP_KERNEL);
- if (!new) {
- kfree(attrs);
- err = -ENOMEM;
- goto fail;
- }
- /* Despite looking like the typical realloc() bug, this is safe.
- * We *want* the old 'attrs' to be freed either way, and we'll store
- * the new one in the success case. */
- attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
- if (!attrs) {
- err = -ENOMEM;
- goto fail_free_new;
- }
+ /* Enlarge allocations. */
+ new_mp = krealloc(mk->mp,
+ sizeof(*mk->mp) +
+ sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1),
+ GFP_KERNEL);
+ if (!new_mp)
+ return -ENOMEM;
+ mk->mp = new_mp;
- /* Sysfs wants everything zeroed. */
- memset(new, 0, sizeof(*new));
- memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
- memset(&attrs[num], 0, sizeof(attrs[num]));
- new->grp.name = "parameters";
- new->grp.attrs = attrs;
+ /* Extra pointer for NULL terminator */
+ new_attrs = krealloc(mk->mp->grp.attrs,
+ sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2),
+ GFP_KERNEL);
+ if (!new_attrs)
+ return -ENOMEM;
+ mk->mp->grp.attrs = new_attrs;
/* Tack new one on the end. */
- sysfs_attr_init(&new->attrs[num].mattr.attr);
- new->attrs[num].param = kp;
- new->attrs[num].mattr.show = param_attr_show;
- new->attrs[num].mattr.store = param_attr_store;
- new->attrs[num].mattr.attr.name = (char *)name;
- new->attrs[num].mattr.attr.mode = kp->perm;
- new->num = num+1;
+ sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr);
+ mk->mp->attrs[mk->mp->num].param = kp;
+ mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show;
+ /* Do not allow runtime DAC changes to make param writable. */
+ if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
+ mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store;
+ mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name;
+ mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm;
+ mk->mp->num++;
/* Fix up all the pointers, since krealloc can move us */
- for (num = 0; num < new->num; num++)
- new->grp.attrs[num] = &new->attrs[num].mattr.attr;
- new->grp.attrs[num] = NULL;
-
- mk->mp = new;
+ for (i = 0; i < mk->mp->num; i++)
+ mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr;
+ mk->mp->grp.attrs[mk->mp->num] = NULL;
return 0;
-
-fail_free_new:
- kfree(new);
-fail:
- mk->mp = NULL;
- return err;
}
#ifdef CONFIG_MODULES
static void free_module_param_attrs(struct module_kobject *mk)
{
- kfree(mk->mp->grp.attrs);
+ if (mk->mp)
+ kfree(mk->mp->grp.attrs);
kfree(mk->mp);
mk->mp = NULL;
}
@@ -695,8 +688,10 @@ int module_param_sysfs_setup(struct module *mod,
if (kparam[i].perm == 0)
continue;
err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
- if (err)
+ if (err) {
+ free_module_param_attrs(&mod->mkobj);
return err;
+ }
params = true;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 82430c858d69..cd36a5e0d173 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -79,7 +79,10 @@ struct pid_namespace init_pid_ns = {
.level = 0,
.child_reaper = &init_task,
.user_ns = &init_user_ns,
- .proc_inum = PROC_PID_INIT_INO,
+ .ns.inum = PROC_PID_INIT_INO,
+#ifdef CONFIG_PID_NS
+ .ns.ops = &pidns_operations,
+#endif
};
EXPORT_SYMBOL_GPL(init_pid_ns);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index bc6d6a89b6e6..a65ba137fd15 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -105,9 +105,10 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
if (ns->pid_cachep == NULL)
goto out_free_map;
- err = proc_alloc_inum(&ns->proc_inum);
+ err = ns_alloc_inum(&ns->ns);
if (err)
goto out_free_map;
+ ns->ns.ops = &pidns_operations;
kref_init(&ns->kref);
ns->level = level;
@@ -142,7 +143,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
{
int i;
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
for (i = 0; i < PIDMAP_ENTRIES; i++)
kfree(ns->pidmap[i].page);
put_user_ns(ns->user_ns);
@@ -333,7 +334,12 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
return 0;
}
-static void *pidns_get(struct task_struct *task)
+static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct pid_namespace, ns);
+}
+
+static struct ns_common *pidns_get(struct task_struct *task)
{
struct pid_namespace *ns;
@@ -343,18 +349,18 @@ static void *pidns_get(struct task_struct *task)
get_pid_ns(ns);
rcu_read_unlock();
- return ns;
+ return ns ? &ns->ns : NULL;
}
-static void pidns_put(void *ns)
+static void pidns_put(struct ns_common *ns)
{
- put_pid_ns(ns);
+ put_pid_ns(to_pid_ns(ns));
}
-static int pidns_install(struct nsproxy *nsproxy, void *ns)
+static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
struct pid_namespace *active = task_active_pid_ns(current);
- struct pid_namespace *ancestor, *new = ns;
+ struct pid_namespace *ancestor, *new = to_pid_ns(ns);
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
@@ -382,19 +388,12 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
-static unsigned int pidns_inum(void *ns)
-{
- struct pid_namespace *pid_ns = ns;
- return pid_ns->proc_inum;
-}
-
const struct proc_ns_operations pidns_operations = {
.name = "pid",
.type = CLONE_NEWPID,
.get = pidns_get,
.put = pidns_put,
.install = pidns_install,
- .inum = pidns_inum,
};
static __init int pid_namespaces_init(void)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6e7708c2c21f..48b28d387c7f 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -94,7 +94,7 @@ config PM_STD_PARTITION
config PM_SLEEP
def_bool y
depends on SUSPEND || HIBERNATE_CALLBACKS
- select PM_RUNTIME
+ select PM
config PM_SLEEP_SMP
def_bool y
@@ -130,23 +130,19 @@ config PM_WAKELOCKS_GC
depends on PM_WAKELOCKS
default y
-config PM_RUNTIME
- bool "Run-time PM core functionality"
+config PM
+ bool "Device power management core functionality"
---help---
Enable functionality allowing I/O devices to be put into energy-saving
- (low power) states at run time (or autosuspended) after a specified
- period of inactivity and woken up in response to a hardware-generated
+ (low power) states, for example after a specified period of inactivity
+ (autosuspended), and woken up in response to a hardware-generated
wake-up event or a driver's request.
Hardware support is generally required for this functionality to work
and the bus type drivers of the buses the devices are on are
- responsible for the actual handling of the autosuspend requests and
+ responsible for the actual handling of device suspend requests and
wake-up events.
-config PM
- def_bool y
- depends on PM_SLEEP || PM_RUNTIME
-
config PM_DEBUG
bool "Power Management Debug Support"
depends on PM
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f900dc9f6822..02d6b6d28796 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1857,10 +1857,16 @@ asmlinkage __visible int printk(const char *fmt, ...)
int r;
va_start(args, fmt);
- preempt_disable();
+
+ /*
+ * If a caller overrides the per_cpu printk_func, then it needs
+ * to disable preemption when calling printk(). Otherwise
+ * the printk_func should be set to the default. No need to
+ * disable preemption here.
+ */
vprintk_func = this_cpu_read(printk_func);
r = vprintk_func(fmt, args);
- preempt_enable();
+
va_end(args);
return r;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b5797b78add6..c0accc00566e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7113,9 +7113,6 @@ void __init sched_init(void)
#ifdef CONFIG_RT_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
- alloc_size += num_possible_cpus() * cpumask_size();
-#endif
if (alloc_size) {
ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
@@ -7135,13 +7132,13 @@ void __init sched_init(void)
ptr += nr_cpu_ids * sizeof(void **);
#endif /* CONFIG_RT_GROUP_SCHED */
+ }
#ifdef CONFIG_CPUMASK_OFFSTACK
- for_each_possible_cpu(i) {
- per_cpu(load_balance_mask, i) = (void *)ptr;
- ptr += cpumask_size();
- }
-#endif /* CONFIG_CPUMASK_OFFSTACK */
+ for_each_possible_cpu(i) {
+ per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
+ cpumask_size(), GFP_KERNEL, cpu_to_node(i));
}
+#endif /* CONFIG_CPUMASK_OFFSTACK */
init_rt_bandwidth(&def_rt_bandwidth,
global_rt_period(), global_rt_runtime());
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e5db8c6feebd..b52092f2636d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -570,24 +570,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
static
int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)
{
- int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));
- int rorun = dl_se->runtime <= 0;
-
- if (!rorun && !dmiss)
- return 0;
-
- /*
- * If we are beyond our current deadline and we are still
- * executing, then we have already used some of the runtime of
- * the next instance. Thus, if we do not account that, we are
- * stealing bandwidth from the system at each deadline miss!
- */
- if (dmiss) {
- dl_se->runtime = rorun ? dl_se->runtime : 0;
- dl_se->runtime -= rq_clock(rq) - dl_se->deadline;
- }
-
- return 1;
+ return (dl_se->runtime <= 0);
}
extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
@@ -826,10 +809,10 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
* parameters of the task might need updating. Otherwise,
* we want a replenishment of its runtime.
*/
- if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)
- replenish_dl_entity(dl_se, pi_se);
- else
+ if (dl_se->dl_new || flags & ENQUEUE_WAKEUP)
update_dl_entity(dl_se, pi_se);
+ else if (flags & ENQUEUE_REPLENISH)
+ replenish_dl_entity(dl_se, pi_se);
__enqueue_dl_entity(dl_se);
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index df2cdf77f899..40667cbf371b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4005,6 +4005,10 @@ void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force)
static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
{
+ /* init_cfs_bandwidth() was not called */
+ if (!cfs_b->throttled_cfs_rq.next)
+ return;
+
hrtimer_cancel(&cfs_b->period_timer);
hrtimer_cancel(&cfs_b->slack_timer);
}
@@ -4424,7 +4428,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
* wl = S * s'_i; see (2)
*/
if (W > 0 && w < W)
- wl = (w * tg->shares) / W;
+ wl = (w * (long)tg->shares) / W;
else
wl = tg->shares;
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 00fe55cc5a82..b6e4c16377c7 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -25,6 +25,38 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
}
EXPORT_SYMBOL_GPL(print_stack_trace);
+int snprint_stack_trace(char *buf, size_t size,
+ struct stack_trace *trace, int spaces)
+{
+ int i;
+ unsigned long ip;
+ int generated;
+ int total = 0;
+
+ if (WARN_ON(!trace->entries))
+ return 0;
+
+ for (i = 0; i < trace->nr_entries; i++) {
+ ip = trace->entries[i];
+ generated = snprintf(buf, size, "%*c[<%p>] %pS\n",
+ 1 + spaces, ' ', (void *) ip, (void *) ip);
+
+ total += generated;
+
+ /* Assume that generated isn't a negative number */
+ if (generated >= size) {
+ buf += size;
+ size = 0;
+ } else {
+ buf += generated;
+ size -= generated;
+ }
+ }
+
+ return total;
+}
+EXPORT_SYMBOL_GPL(snprint_stack_trace);
+
/*
* Architectures that do not implement save_stack_trace_tsk or
* save_stack_trace_regs get this weak alias and a once-per-bootup warning
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 61eea02b53f5..5adcb0ae3a58 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -226,3 +226,6 @@ cond_syscall(sys_seccomp);
/* access BPF programs and maps */
cond_syscall(sys_bpf);
+
+/* execveat */
+cond_syscall(sys_execveat);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7c54ff79afd7..137c7f69b264 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -623,6 +623,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "tracepoint_printk",
+ .data = &tracepoint_printk,
+ .maxlen = sizeof(tracepoint_printk),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#endif
#ifdef CONFIG_KEXEC
{
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f622cf28628a..c09c07817d7a 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,6 +1,6 @@
obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
-obj-y += timeconv.o posix-clock.o alarmtimer.o
+obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b79f39bda7e1..4892352f0e49 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -34,82 +34,6 @@
#include "tick-internal.h"
#include "timekeeping_internal.h"
-void timecounter_init(struct timecounter *tc,
- const struct cyclecounter *cc,
- u64 start_tstamp)
-{
- tc->cc = cc;
- tc->cycle_last = cc->read(cc);
- tc->nsec = start_tstamp;
-}
-EXPORT_SYMBOL_GPL(timecounter_init);
-
-/**
- * timecounter_read_delta - get nanoseconds since last call of this function
- * @tc: Pointer to time counter
- *
- * When the underlying cycle counter runs over, this will be handled
- * correctly as long as it does not run over more than once between
- * calls.
- *
- * The first call to this function for a new time counter initializes
- * the time tracking and returns an undefined result.
- */
-static u64 timecounter_read_delta(struct timecounter *tc)
-{
- cycle_t cycle_now, cycle_delta;
- u64 ns_offset;
-
- /* read cycle counter: */
- cycle_now = tc->cc->read(tc->cc);
-
- /* calculate the delta since the last timecounter_read_delta(): */
- cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
-
- /* convert to nanoseconds: */
- ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
-
- /* update time stamp of timecounter_read_delta() call: */
- tc->cycle_last = cycle_now;
-
- return ns_offset;
-}
-
-u64 timecounter_read(struct timecounter *tc)
-{
- u64 nsec;
-
- /* increment time by nanoseconds since last call */
- nsec = timecounter_read_delta(tc);
- nsec += tc->nsec;
- tc->nsec = nsec;
-
- return nsec;
-}
-EXPORT_SYMBOL_GPL(timecounter_read);
-
-u64 timecounter_cyc2time(struct timecounter *tc,
- cycle_t cycle_tstamp)
-{
- u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
- u64 nsec;
-
- /*
- * Instead of always treating cycle_tstamp as more recent
- * than tc->cycle_last, detect when it is too far in the
- * future and treat it as old time stamp instead.
- */
- if (cycle_delta > tc->cc->mask / 2) {
- cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
- nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
- } else {
- nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
- }
-
- return nsec;
-}
-EXPORT_SYMBOL_GPL(timecounter_cyc2time);
-
/**
* clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
* @mult: pointer to mult variable
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4d54b7540585..1363d58f07e9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -847,7 +847,6 @@ void tick_nohz_idle_enter(void)
local_irq_enable();
}
-EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
/**
* tick_nohz_irq_exit - update next tick event from interrupt exit
@@ -974,7 +973,6 @@ void tick_nohz_idle_exit(void)
local_irq_enable();
}
-EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
{
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 65015ff2f07c..6390517e77d4 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -741,6 +741,7 @@ u64 nsecs_to_jiffies64(u64 n)
return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
#endif
}
+EXPORT_SYMBOL(nsecs_to_jiffies64);
/**
* nsecs_to_jiffies - Convert nsecs in u64 to jiffies
diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c
new file mode 100644
index 000000000000..4687b3104bae
--- /dev/null
+++ b/kernel/time/timecounter.c
@@ -0,0 +1,112 @@
+/*
+ * linux/kernel/time/timecounter.c
+ *
+ * based on code that migrated away from
+ * linux/kernel/time/clocksource.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/timecounter.h>
+
+void timecounter_init(struct timecounter *tc,
+ const struct cyclecounter *cc,
+ u64 start_tstamp)
+{
+ tc->cc = cc;
+ tc->cycle_last = cc->read(cc);
+ tc->nsec = start_tstamp;
+ tc->mask = (1ULL << cc->shift) - 1;
+ tc->frac = 0;
+}
+EXPORT_SYMBOL_GPL(timecounter_init);
+
+/**
+ * timecounter_read_delta - get nanoseconds since last call of this function
+ * @tc: Pointer to time counter
+ *
+ * When the underlying cycle counter runs over, this will be handled
+ * correctly as long as it does not run over more than once between
+ * calls.
+ *
+ * The first call to this function for a new time counter initializes
+ * the time tracking and returns an undefined result.
+ */
+static u64 timecounter_read_delta(struct timecounter *tc)
+{
+ cycle_t cycle_now, cycle_delta;
+ u64 ns_offset;
+
+ /* read cycle counter: */
+ cycle_now = tc->cc->read(tc->cc);
+
+ /* calculate the delta since the last timecounter_read_delta(): */
+ cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
+
+ /* convert to nanoseconds: */
+ ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta,
+ tc->mask, &tc->frac);
+
+ /* update time stamp of timecounter_read_delta() call: */
+ tc->cycle_last = cycle_now;
+
+ return ns_offset;
+}
+
+u64 timecounter_read(struct timecounter *tc)
+{
+ u64 nsec;
+
+ /* increment time by nanoseconds since last call */
+ nsec = timecounter_read_delta(tc);
+ nsec += tc->nsec;
+ tc->nsec = nsec;
+
+ return nsec;
+}
+EXPORT_SYMBOL_GPL(timecounter_read);
+
+/*
+ * This is like cyclecounter_cyc2ns(), but it is used for computing a
+ * time previous to the time stored in the cycle counter.
+ */
+static u64 cc_cyc2ns_backwards(const struct cyclecounter *cc,
+ cycle_t cycles, u64 mask, u64 frac)
+{
+ u64 ns = (u64) cycles;
+
+ ns = ((ns * cc->mult) - frac) >> cc->shift;
+
+ return ns;
+}
+
+u64 timecounter_cyc2time(struct timecounter *tc,
+ cycle_t cycle_tstamp)
+{
+ u64 delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
+ u64 nsec = tc->nsec, frac = tc->frac;
+
+ /*
+ * Instead of always treating cycle_tstamp as more recent
+ * than tc->cycle_last, detect when it is too far in the
+ * future and treat it as old time stamp instead.
+ */
+ if (delta > tc->cc->mask / 2) {
+ delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
+ nsec -= cc_cyc2ns_backwards(tc->cc, delta, tc->mask, frac);
+ } else {
+ nsec += cyclecounter_cyc2ns(tc->cc, delta, tc->mask, &frac);
+ }
+
+ return nsec;
+}
+EXPORT_SYMBOL_GPL(timecounter_cyc2time);
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 67d6369ddf83..979ccde26720 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -55,7 +55,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
-ifeq ($(CONFIG_PM_RUNTIME),y)
+ifeq ($(CONFIG_PM),y)
obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
endif
ifeq ($(CONFIG_TRACING),y)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 11b9cb36092b..483cecfa5c17 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1477,9 +1477,6 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- spin_lock_irq(&running_trace_lock);
- list_del(&bt->running_list);
- spin_unlock_irq(&running_trace_lock);
blk_trace_free(bt);
return 0;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ab76b7bcb36a..2e767972e99c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -63,6 +63,10 @@ static bool __read_mostly tracing_selftest_running;
*/
bool __read_mostly tracing_selftest_disabled;
+/* Pipe tracepoints to printk */
+struct trace_iterator *tracepoint_print_iter;
+int tracepoint_printk;
+
/* For tracers that don't implement custom flags */
static struct tracer_opt dummy_tracer_opt[] = {
{ }
@@ -193,6 +197,13 @@ static int __init set_trace_boot_clock(char *str)
}
__setup("trace_clock=", set_trace_boot_clock);
+static int __init set_tracepoint_printk(char *str)
+{
+ if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
+ tracepoint_printk = 1;
+ return 1;
+}
+__setup("tp_printk", set_tracepoint_printk);
unsigned long long ns2usecs(cycle_t nsec)
{
@@ -6898,6 +6909,19 @@ out:
return ret;
}
+void __init trace_init(void)
+{
+ if (tracepoint_printk) {
+ tracepoint_print_iter =
+ kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
+ if (WARN_ON(!tracepoint_print_iter))
+ tracepoint_printk = 0;
+ }
+ tracer_alloc_buffers();
+ init_ftrace_syscalls();
+ trace_event_init();
+}
+
__init static int clear_boot_tracer(void)
{
/*
@@ -6917,6 +6941,5 @@ __init static int clear_boot_tracer(void)
return 0;
}
-early_initcall(tracer_alloc_buffers);
fs_initcall(tracer_init_debugfs);
late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3255dfb054a0..8de48bac1ce2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1301,4 +1301,18 @@ int perf_ftrace_event_register(struct ftrace_event_call *call,
#define perf_ftrace_event_register NULL
#endif
+#ifdef CONFIG_FTRACE_SYSCALLS
+void init_ftrace_syscalls(void);
+#else
+static inline void init_ftrace_syscalls(void) { }
+#endif
+
+#ifdef CONFIG_EVENT_TRACING
+void trace_event_init(void);
+#else
+static inline void __init trace_event_init(void) { }
+#endif
+
+extern struct trace_iterator *tracepoint_print_iter;
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d0e4f92b5eb6..366a78a3e61e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -212,8 +212,40 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
}
EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve);
+static DEFINE_SPINLOCK(tracepoint_iter_lock);
+
+static void output_printk(struct ftrace_event_buffer *fbuffer)
+{
+ struct ftrace_event_call *event_call;
+ struct trace_event *event;
+ unsigned long flags;
+ struct trace_iterator *iter = tracepoint_print_iter;
+
+ if (!iter)
+ return;
+
+ event_call = fbuffer->ftrace_file->event_call;
+ if (!event_call || !event_call->event.funcs ||
+ !event_call->event.funcs->trace)
+ return;
+
+ event = &fbuffer->ftrace_file->event_call->event;
+
+ spin_lock_irqsave(&tracepoint_iter_lock, flags);
+ trace_seq_init(&iter->seq);
+ iter->ent = fbuffer->entry;
+ event_call->event.funcs->trace(iter, 0, event);
+ trace_seq_putc(&iter->seq, 0);
+ printk("%s", iter->seq.buffer);
+
+ spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
+}
+
void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer)
{
+ if (tracepoint_printk)
+ output_printk(fbuffer);
+
event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer,
fbuffer->event, fbuffer->entry,
fbuffer->flags, fbuffer->pc);
@@ -2480,8 +2512,14 @@ static __init int event_trace_init(void)
#endif
return 0;
}
-early_initcall(event_trace_memsetup);
-core_initcall(event_trace_enable);
+
+void __init trace_event_init(void)
+{
+ event_trace_memsetup();
+ init_ftrace_syscalls();
+ event_trace_enable();
+}
+
fs_initcall(event_trace_init);
#ifdef CONFIG_FTRACE_STARTUP_TEST
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index b0b1c44e923a..3ccf5c2c1320 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -132,8 +132,8 @@ static int kdb_ftdump(int argc, const char **argv)
static __init int kdb_ftrace_register(void)
{
- kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
- "Dump ftrace log", 0, KDB_REPEAT_NONE);
+ kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
+ "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE);
return 0;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index dfe00a4f3f3e..c6ee36fcbf90 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -514,7 +514,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
return (unsigned long)sys_call_table[nr];
}
-static int __init init_ftrace_syscalls(void)
+void __init init_ftrace_syscalls(void)
{
struct syscall_metadata *meta;
unsigned long addr;
@@ -524,7 +524,7 @@ static int __init init_ftrace_syscalls(void)
GFP_KERNEL);
if (!syscalls_metadata) {
WARN_ON(1);
- return -ENOMEM;
+ return;
}
for (i = 0; i < NR_syscalls; i++) {
@@ -536,10 +536,7 @@ static int __init init_ftrace_syscalls(void)
meta->syscall_nr = i;
syscalls_metadata[i] = meta;
}
-
- return 0;
}
-early_initcall(init_ftrace_syscalls);
#ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602e5bbbceff..d58cc4d8f0d1 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!ns_capable(current_user_ns(), CAP_SETGID))
+ if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 4efa39350e44..b069ccbfb0b0 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -50,7 +50,11 @@ struct user_namespace init_user_ns = {
.count = ATOMIC_INIT(3),
.owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID,
- .proc_inum = PROC_USER_INIT_INO,
+ .ns.inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_USER_NS
+ .ns.ops = &userns_operations,
+#endif
+ .flags = USERNS_INIT_FLAGS,
#ifdef CONFIG_PERSISTENT_KEYRINGS
.persistent_keyring_register_sem =
__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index aa312b0dc3ec..4109f8320684 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,7 @@
#include <linux/fs_struct.h>
static struct kmem_cache *user_ns_cachep __read_mostly;
+static DEFINE_MUTEX(userns_state_mutex);
static bool new_idmap_permitted(const struct file *file,
struct user_namespace *ns, int cap_setid,
@@ -86,11 +87,12 @@ int create_user_ns(struct cred *new)
if (!ns)
return -ENOMEM;
- ret = proc_alloc_inum(&ns->proc_inum);
+ ret = ns_alloc_inum(&ns->ns);
if (ret) {
kmem_cache_free(user_ns_cachep, ns);
return ret;
}
+ ns->ns.ops = &userns_operations;
atomic_set(&ns->count, 1);
/* Leave the new->user_ns reference with the new user namespace. */
@@ -99,6 +101,11 @@ int create_user_ns(struct cred *new)
ns->owner = owner;
ns->group = group;
+ /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
+ mutex_lock(&userns_state_mutex);
+ ns->flags = parent_ns->flags;
+ mutex_unlock(&userns_state_mutex);
+
set_cred_user_ns(new, ns);
#ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -136,7 +143,7 @@ void free_user_ns(struct user_namespace *ns)
#ifdef CONFIG_PERSISTENT_KEYRINGS
key_put(ns->persistent_keyring_register);
#endif
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
kmem_cache_free(user_ns_cachep, ns);
ns = parent;
} while (atomic_dec_and_test(&parent->count));
@@ -583,9 +590,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
return false;
}
-
-static DEFINE_MUTEX(id_map_mutex);
-
static ssize_t map_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos,
int cap_setid,
@@ -602,7 +606,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
ssize_t ret = -EINVAL;
/*
- * The id_map_mutex serializes all writes to any given map.
+ * The userns_state_mutex serializes all writes to any given map.
*
* Any map is only ever written once.
*
@@ -620,7 +624,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
* order and smp_rmb() is guaranteed that we don't have crazy
* architectures returning stale data.
*/
- mutex_lock(&id_map_mutex);
+ mutex_lock(&userns_state_mutex);
ret = -EPERM;
/* Only allow one successful write to the map */
@@ -640,7 +644,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
if (!page)
goto out;
- /* Only allow <= page size writes at the beginning of the file */
+ /* Only allow < page size writes at the beginning of the file */
ret = -EINVAL;
if ((*ppos != 0) || (count >= PAGE_SIZE))
goto out;
@@ -750,7 +754,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
*ppos = count;
ret = count;
out:
- mutex_unlock(&id_map_mutex);
+ mutex_unlock(&userns_state_mutex);
if (page)
free_page(page);
return ret;
@@ -812,16 +816,21 @@ static bool new_idmap_permitted(const struct file *file,
struct user_namespace *ns, int cap_setid,
struct uid_gid_map *new_map)
{
- /* Allow mapping to your own filesystem ids */
- if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+ const struct cred *cred = file->f_cred;
+ /* Don't allow mappings that would allow anything that wouldn't
+ * be allowed without the establishment of unprivileged mappings.
+ */
+ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
+ uid_eq(ns->owner, cred->euid)) {
u32 id = new_map->extent[0].lower_first;
if (cap_setid == CAP_SETUID) {
kuid_t uid = make_kuid(ns->parent, id);
- if (uid_eq(uid, file->f_cred->fsuid))
+ if (uid_eq(uid, cred->euid))
return true;
} else if (cap_setid == CAP_SETGID) {
kgid_t gid = make_kgid(ns->parent, id);
- if (gid_eq(gid, file->f_cred->fsgid))
+ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
+ gid_eq(gid, cred->egid))
return true;
}
}
@@ -841,7 +850,106 @@ static bool new_idmap_permitted(const struct file *file,
return false;
}
-static void *userns_get(struct task_struct *task)
+int proc_setgroups_show(struct seq_file *seq, void *v)
+{
+ struct user_namespace *ns = seq->private;
+ unsigned long userns_flags = ACCESS_ONCE(ns->flags);
+
+ seq_printf(seq, "%s\n",
+ (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
+ "allow" : "deny");
+ return 0;
+}
+
+ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *seq = file->private_data;
+ struct user_namespace *ns = seq->private;
+ char kbuf[8], *pos;
+ bool setgroups_allowed;
+ ssize_t ret;
+
+ /* Only allow a very narrow range of strings to be written */
+ ret = -EINVAL;
+ if ((*ppos != 0) || (count >= sizeof(kbuf)))
+ goto out;
+
+ /* What was written? */
+ ret = -EFAULT;
+ if (copy_from_user(kbuf, buf, count))
+ goto out;
+ kbuf[count] = '\0';
+ pos = kbuf;
+
+ /* What is being requested? */
+ ret = -EINVAL;
+ if (strncmp(pos, "allow", 5) == 0) {
+ pos += 5;
+ setgroups_allowed = true;
+ }
+ else if (strncmp(pos, "deny", 4) == 0) {
+ pos += 4;
+ setgroups_allowed = false;
+ }
+ else
+ goto out;
+
+ /* Verify there is not trailing junk on the line */
+ pos = skip_spaces(pos);
+ if (*pos != '\0')
+ goto out;
+
+ ret = -EPERM;
+ mutex_lock(&userns_state_mutex);
+ if (setgroups_allowed) {
+ /* Enabling setgroups after setgroups has been disabled
+ * is not allowed.
+ */
+ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
+ goto out_unlock;
+ } else {
+ /* Permanently disabling setgroups after setgroups has
+ * been enabled by writing the gid_map is not allowed.
+ */
+ if (ns->gid_map.nr_extents != 0)
+ goto out_unlock;
+ ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
+ }
+ mutex_unlock(&userns_state_mutex);
+
+ /* Report a successful write */
+ *ppos = count;
+ ret = count;
+out:
+ return ret;
+out_unlock:
+ mutex_unlock(&userns_state_mutex);
+ goto out;
+}
+
+bool userns_may_setgroups(const struct user_namespace *ns)
+{
+ bool allowed;
+
+ mutex_lock(&userns_state_mutex);
+ /* It is not safe to use setgroups until a gid mapping in
+ * the user namespace has been established.
+ */
+ allowed = ns->gid_map.nr_extents != 0;
+ /* Is setgroups allowed? */
+ allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
+ mutex_unlock(&userns_state_mutex);
+
+ return allowed;
+}
+
+static inline struct user_namespace *to_user_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct user_namespace, ns);
+}
+
+static struct ns_common *userns_get(struct task_struct *task)
{
struct user_namespace *user_ns;
@@ -849,17 +957,17 @@ static void *userns_get(struct task_struct *task)
user_ns = get_user_ns(__task_cred(task)->user_ns);
rcu_read_unlock();
- return user_ns;
+ return user_ns ? &user_ns->ns : NULL;
}
-static void userns_put(void *ns)
+static void userns_put(struct ns_common *ns)
{
- put_user_ns(ns);
+ put_user_ns(to_user_ns(ns));
}
-static int userns_install(struct nsproxy *nsproxy, void *ns)
+static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
- struct user_namespace *user_ns = ns;
+ struct user_namespace *user_ns = to_user_ns(ns);
struct cred *cred;
/* Don't allow gaining capabilities by reentering
@@ -888,19 +996,12 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
return commit_creds(cred);
}
-static unsigned int userns_inum(void *ns)
-{
- struct user_namespace *user_ns = ns;
- return user_ns->proc_inum;
-}
-
const struct proc_ns_operations userns_operations = {
.name = "user",
.type = CLONE_NEWUSER,
.get = userns_get,
.put = userns_put,
.install = userns_install,
- .inum = userns_inum,
};
static __init int user_namespaces_init(void)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 883aaaa7de8a..831ea7108232 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -42,12 +42,14 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
if (!ns)
return ERR_PTR(-ENOMEM);
- err = proc_alloc_inum(&ns->proc_inum);
+ err = ns_alloc_inum(&ns->ns);
if (err) {
kfree(ns);
return ERR_PTR(err);
}
+ ns->ns.ops = &utsns_operations;
+
down_read(&uts_sem);
memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
ns->user_ns = get_user_ns(user_ns);
@@ -84,11 +86,16 @@ void free_uts_ns(struct kref *kref)
ns = container_of(kref, struct uts_namespace, kref);
put_user_ns(ns->user_ns);
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
kfree(ns);
}
-static void *utsns_get(struct task_struct *task)
+static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct uts_namespace, ns);
+}
+
+static struct ns_common *utsns_get(struct task_struct *task)
{
struct uts_namespace *ns = NULL;
struct nsproxy *nsproxy;
@@ -101,17 +108,17 @@ static void *utsns_get(struct task_struct *task)
}
task_unlock(task);
- return ns;
+ return ns ? &ns->ns : NULL;
}
-static void utsns_put(void *ns)
+static void utsns_put(struct ns_common *ns)
{
- put_uts_ns(ns);
+ put_uts_ns(to_uts_ns(ns));
}
-static int utsns_install(struct nsproxy *nsproxy, void *new)
+static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
{
- struct uts_namespace *ns = new;
+ struct uts_namespace *ns = to_uts_ns(new);
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
@@ -123,18 +130,10 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
return 0;
}
-static unsigned int utsns_inum(void *vp)
-{
- struct uts_namespace *ns = vp;
-
- return ns->proc_inum;
-}
-
const struct proc_ns_operations utsns_operations = {
.name = "uts",
.type = CLONE_NEWUTS,
.get = utsns_get,
.put = utsns_put,
.install = utsns_install,
- .inum = utsns_inum,
};