aboutsummaryrefslogtreecommitdiff
path: root/arch/s390/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/ipl.c16
-rw-r--r--arch/s390/kernel/module.c3
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c452
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c16
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c19
-rw-r--r--arch/s390/kernel/perf_pai_ext.c23
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/time.c5
-rw-r--r--arch/s390/kernel/uv.c2
11 files changed, 388 insertions, 153 deletions
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 8a617be28bb4..7af69948b290 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads;
+ size_t alloc_size;
int mem_chunk_cnt;
void *ptr, *hdr;
- u32 alloc_size;
u64 hdr_off;
/* If we are not in kdump or zfcp/nvme dump mode return */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 34674e38826b..9f41853f36b9 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -34,14 +34,12 @@ void kernel_stack_overflow(struct pt_regs * regs);
void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs);
-void __init init_IRQ(void);
void do_io_irq(struct pt_regs *regs);
void do_ext_irq(struct pt_regs *regs);
void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
-void __init time_init(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f44f70de9661..85a00d97a314 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -176,6 +176,8 @@ static bool reipl_fcp_clear;
static bool reipl_ccw_clear;
static bool reipl_eckd_clear;
+static unsigned long os_info_flags;
+
static inline int __diag308(unsigned long subcode, unsigned long addr)
{
union register_pair r1;
@@ -1938,6 +1940,20 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
struct lowcore *abs_lc;
unsigned int csum;
+ /*
+ * Set REIPL_CLEAR flag in os_info flags entry indicating
+ * 'clear' sysfs attribute has been set on the panicked system
+ * for specified reipl type.
+ * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+ */
+ if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+ (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+ (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+ (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+ reipl_type == IPL_TYPE_NSS ||
+ reipl_type == IPL_TYPE_UNKNOWN)
+ os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+ os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore();
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index f1b35dcdf3eb..42215f9404af 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -352,7 +352,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
- val += (Elf_Addr) me->mem[MOD_TEXT].base - loc;
+ val += (Elf_Addr)me->mem[MOD_TEXT].base +
+ me->arch.got_offset - loc;
rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index cf1b6e8a708d..90679143534b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -76,6 +76,7 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
}
struct cpu_cf_events {
+ refcount_t refcnt; /* Reference count */
atomic_t ctr_set[CPUMF_CTR_SET_MAX];
u64 state; /* For perf_event_open SVC */
u64 dev_state; /* For /dev/hwctr */
@@ -88,9 +89,6 @@ struct cpu_cf_events {
unsigned int sets; /* # Counter set saved in memory */
};
-/* Per-CPU event structure for the counter facility */
-static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
-
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
static debug_info_t *cf_dbg;
@@ -103,6 +101,221 @@ static debug_info_t *cf_dbg;
*/
static struct cpumf_ctr_info cpumf_ctr_info;
+struct cpu_cf_ptr {
+ struct cpu_cf_events *cpucf;
+};
+
+static struct cpu_cf_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ * CPU hot plug remove can not happen. Event removal requires a close()
+ * first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ * All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
+{
+ struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
+
+ if (p) {
+ struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
+
+ return q->cpucf;
+ }
+ return NULL;
+}
+
+static struct cpu_cf_events *this_cpu_cfhw(void)
+{
+ return get_cpu_cfhw(smp_processor_id());
+}
+
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
+{
+ lcctl(0);
+}
+
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+ if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+ return;
+ free_percpu(cpu_cf_root.cfptr);
+ cpu_cf_root.cfptr = NULL;
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n",
+ __func__, refcount_read(&cpu_cf_root.refcnt),
+ cpu_cf_root.cfptr);
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+ int rc = 0;
+
+ if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+ return rc;
+
+ /* The memory is already zeroed. */
+ cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+ if (cpu_cf_root.cfptr) {
+ refcount_set(&cpu_cf_root.refcnt, 1);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ } else {
+ rc = -ENOMEM;
+ }
+
+ return rc;
+}
+
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+
+ mutex_lock(&pmc_reserve_mutex);
+ /*
+ * When invoked via CPU hotplug handler, there might be no events
+ * installed or that particular CPU might not have an
+ * event installed. This anchor pointer can be NULL!
+ */
+ if (!cpu_cf_root.cfptr)
+ goto out;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+ /*
+ * Might be zero when called from CPU hotplug handler and no event
+ * installed on that CPU, but on different CPUs.
+ */
+ if (!cpuhw)
+ goto out;
+
+ if (refcount_dec_and_test(&cpuhw->refcnt)) {
+ kfree(cpuhw);
+ p->cpucf = NULL;
+ }
+ cpum_cf_free_root();
+out:
+ mutex_unlock(&pmc_reserve_mutex);
+}
+
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+ int rc;
+
+ mutex_lock(&pmc_reserve_mutex);
+ rc = cpum_cf_alloc_root();
+ if (rc)
+ goto unlock;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+
+ if (!cpuhw) {
+ cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+ if (cpuhw) {
+ p->cpucf = cpuhw;
+ refcount_set(&cpuhw->refcnt, 1);
+ } else {
+ rc = -ENOMEM;
+ }
+ } else {
+ refcount_inc(&cpuhw->refcnt);
+ }
+ if (rc) {
+ /*
+ * Error in allocation of event, decrement anchor. Since
+ * cpu_cf_event in not created, its destroy() function is not
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ cpum_cf_free_root();
+ }
+unlock:
+ mutex_unlock(&pmc_reserve_mutex);
+ return rc;
+}
+
+/*
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
+ */
+static int cpum_cf_alloc(int cpu)
+{
+ cpumask_var_t mask;
+ int rc;
+
+ if (cpu == -1) {
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ for_each_online_cpu(cpu) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (rc) {
+ for_each_cpu(cpu, mask)
+ cpum_cf_free_cpu(cpu);
+ break;
+ }
+ cpumask_set_cpu(cpu, mask);
+ }
+ free_cpumask_var(mask);
+ } else {
+ rc = cpum_cf_alloc_cpu(cpu);
+ }
+ return rc;
+}
+
+static void cpum_cf_free(int cpu)
+{
+ if (cpu == -1) {
+ for_each_online_cpu(cpu)
+ cpum_cf_free_cpu(cpu);
+ } else {
+ cpum_cf_free_cpu(cpu);
+ }
+}
+
#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
/* interval in seconds */
@@ -451,10 +664,10 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
*/
static void cpumf_pmu_enable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int err;
- if (cpuhw->flags & PMU_F_ENABLED)
+ if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
return;
err = lcctl(cpuhw->state | cpuhw->dev_state);
@@ -471,11 +684,11 @@ static void cpumf_pmu_enable(struct pmu *pmu)
*/
static void cpumf_pmu_disable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- int err;
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
u64 inactive;
+ int err;
- if (!(cpuhw->flags & PMU_F_ENABLED))
+ if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
return;
inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
@@ -487,58 +700,10 @@ static void cpumf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}
-#define PMC_INIT 0UL
-#define PMC_RELEASE 1UL
-
-static void cpum_cf_setup_cpu(void *flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- switch ((unsigned long)flags) {
- case PMC_INIT:
- cpuhw->flags |= PMU_F_RESERVED;
- break;
-
- case PMC_RELEASE:
- cpuhw->flags &= ~PMU_F_RESERVED;
- break;
- }
-
- /* Disable CPU counter sets */
- lcctl(0);
- debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n",
- __func__, *(int *)flags, cpuhw->flags,
- cpuhw->state);
-}
-
-/* Initialize the CPU-measurement counter facility */
-static int __kernel_cpumcf_begin(void)
-{
- on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1);
- irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
-}
-
-/* Release the CPU-measurement counter facility */
-static void __kernel_cpumcf_end(void)
-{
- on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1);
- irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-}
-
-/* Number of perf events counting hardware events */
-static atomic_t num_events = ATOMIC_INIT(0);
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- __kernel_cpumcf_end();
- mutex_unlock(&pmc_reserve_mutex);
+ cpum_cf_free(event->cpu);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -562,14 +727,6 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static void cpumf_hw_inuse(void)
-{
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_inc_return(&num_events) == 1)
- __kernel_cpumcf_begin();
- mutex_unlock(&pmc_reserve_mutex);
-}
-
static int is_userspace_event(u64 ev)
{
return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
@@ -653,7 +810,8 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
/*
@@ -756,7 +914,7 @@ static void cpumf_pmu_read(struct perf_event *event)
static void cpumf_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -830,7 +988,7 @@ static int cfdiag_push_sample(struct perf_event *event,
static void cpumf_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -857,8 +1015,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
false);
if (cfdiag_diffctr(cpuhw, event->hw.config_base))
cfdiag_push_sample(event, cpuhw);
- } else if (cpuhw->flags & PMU_F_RESERVED) {
- /* Only update when PMU not hotplugged off */
+ } else {
hw_perf_event_update(event);
}
hwc->state |= PERF_HES_UPTODATE;
@@ -867,7 +1024,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
static int cpumf_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
ctr_set_enable(&cpuhw->state, event->hw.config_base);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -880,7 +1037,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
static void cpumf_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int i;
cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -912,29 +1069,83 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
-static int cpum_cf_setup(unsigned int cpu, unsigned long flags)
-{
- local_irq_disable();
- cpum_cf_setup_cpu((void *)flags);
- local_irq_enable();
- return 0;
-}
+static struct cfset_session { /* CPUs and counter set bit mask */
+ struct list_head head; /* Head of list of active processes */
+} cfset_session = {
+ .head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+/*
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ * and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ * pmu_delete(). The event itself is removed when the file descriptor is
+ * closed.
+ */
static int cfset_online_cpu(unsigned int cpu);
+
static int cpum_cf_online_cpu(unsigned int cpu)
{
- debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__,
- cpu, in_interrupt());
- cpum_cf_setup(cpu, PMC_INIT);
- return cfset_online_cpu(cpu);
+ int rc = 0;
+
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d "
+ "opencnt %d\n", __func__, cpu,
+ refcount_read(&cpu_cf_root.refcnt),
+ refcount_read(&cfset_opencnt));
+ /*
+ * Ignore notification for perf_event_open().
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (!rc)
+ cfset_online_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return rc;
}
static int cfset_offline_cpu(unsigned int cpu);
+
static int cpum_cf_offline_cpu(unsigned int cpu)
{
- debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu);
- cfset_offline_cpu(cpu);
- return cpum_cf_setup(cpu, PMC_RELEASE);
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n",
+ __func__, cpu, refcount_read(&cpu_cf_root.refcnt),
+ refcount_read(&cfset_opencnt));
+ /*
+ * During task exit processing of grouped perf events triggered by CPU
+ * hotplug processing, pmu_disable() is called as part of perf context
+ * removal process. Therefore do not trigger event removal now for
+ * perf_event_open() created events. Perf common code triggers event
+ * destruction when the event file descriptor is closed.
+ *
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ cfset_offline_cpu(cpu);
+ cpum_cf_free_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
}
/* Return true if store counter set multiple instruction is available */
@@ -953,13 +1164,13 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
return;
inc_irq_stat(IRQEXT_CMC);
- cpuhw = this_cpu_ptr(&cpu_cf_events);
/*
* Measurement alerts are shared and might happen when the PMU
* is not reserved. Ignore these alerts in this case.
*/
- if (!(cpuhw->flags & PMU_F_RESERVED))
+ cpuhw = this_cpu_cfhw();
+ if (!cpuhw)
return;
/* counter authorization change alert */
@@ -1039,19 +1250,11 @@ out1:
* counter set via normal file operations.
*/
-static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */
-static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */
struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
unsigned int sets; /* Counter set bit mask */
atomic_t cpus_ack; /* # CPUs successfully executed func */
};
-static struct cfset_session { /* CPUs and counter set bit mask */
- struct list_head head; /* Head of list of active processes */
-} cfset_session = {
- .head = LIST_HEAD_INIT(cfset_session.head)
-};
-
struct cfset_request { /* CPUs and counter set bit mask */
unsigned long ctrset; /* Bit mask of counter set to read */
cpumask_t mask; /* CPU mask to read from */
@@ -1113,11 +1316,11 @@ static void cfset_session_add(struct cfset_request *p)
/* Stop all counter sets via ioctl interface */
static void cfset_ioctl_off(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
- /* Check if any counter set used by /dev/hwc */
+ /* Check if any counter set used by /dev/hwctr */
for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
if ((p->sets & cpumf_ctr_ctl[rc])) {
if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
@@ -1141,7 +1344,7 @@ static void cfset_ioctl_off(void *parm)
/* Start counter sets on particular CPU */
static void cfset_ioctl_on(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
@@ -1163,7 +1366,7 @@ static void cfset_ioctl_on(void *parm)
static void cfset_release_cpu(void *p)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int rc;
debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n",
@@ -1203,27 +1406,41 @@ static int cfset_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
}
- if (!atomic_dec_return(&cfset_opencnt))
+ if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */
on_each_cpu(cfset_release_cpu, NULL, 1);
+ cpum_cf_free(-1);
+ }
mutex_unlock(&cfset_ctrset_mutex);
-
- hw_perf_event_destroy(NULL);
return 0;
}
+/*
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
+ */
static int cfset_open(struct inode *inode, struct file *file)
{
- if (!capable(CAP_SYS_ADMIN))
+ int rc = 0;
+
+ if (!perfmon_capable())
return -EPERM;
+ file->private_data = NULL;
+
mutex_lock(&cfset_ctrset_mutex);
- if (atomic_inc_return(&cfset_opencnt) == 1)
- cfset_session_init();
+ if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */
+ rc = cpum_cf_alloc(-1);
+ if (!rc) {
+ cfset_session_init();
+ refcount_set(&cfset_opencnt, 1);
+ }
+ }
mutex_unlock(&cfset_ctrset_mutex);
- cpumf_hw_inuse();
- file->private_data = NULL;
/* nonseekable_open() never fails */
- return nonseekable_open(inode, file);
+ return rc ?: nonseekable_open(inode, file);
}
static int cfset_all_start(struct cfset_request *req)
@@ -1280,7 +1497,7 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
ctrset_read = (struct s390_ctrset_read __user *)arg;
uptr = ctrset_read->data;
for_each_cpu(cpu, mask) {
- struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu);
+ struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
struct s390_ctrset_cpudata __user *ctrset_cpudata;
ctrset_cpudata = uptr;
@@ -1324,7 +1541,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
/* Read all counter sets. */
static void cfset_cpu_read(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int set, set_size;
size_t space;
@@ -1348,9 +1565,9 @@ static void cfset_cpu_read(void *parm)
cpuhw->used += space;
cpuhw->sets += 1;
}
+ debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
+ cpuhw->sets, cpuhw->used);
}
- debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
- cpuhw->sets, cpuhw->used);
}
static int cfset_all_read(unsigned long arg, struct cfset_request *req)
@@ -1502,6 +1719,7 @@ static struct miscdevice cfset_dev = {
.name = S390_HWCTR_DEVICE,
.minor = MISC_DYNAMIC_MINOR,
.fops = &cfset_fops,
+ .mode = 0666,
};
/* Hotplug add of a CPU. Scan through all active processes and add
@@ -1512,7 +1730,6 @@ static int cfset_online_cpu(unsigned int cpu)
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1520,19 +1737,18 @@ static int cfset_online_cpu(unsigned int cpu)
cpumask_set_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
/* Hotplug remove of a CPU. Scan through all active processes and clear
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
*/
static int cfset_offline_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1540,7 +1756,6 @@ static int cfset_offline_cpu(unsigned int cpu)
cpumask_clear_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
@@ -1618,7 +1833,8 @@ static int cfdiag_event_init(struct perf_event *event)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
err = cfdiag_event_init2(event);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 7ef72f5ff52e..8ecfbce4ac92 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1271,16 +1271,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
}
}
-static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
-{
- asm volatile(
- " cdsg %[old],%[new],%[ptr]\n"
- : [old] "+d" (old), [ptr] "+QS" (*ptr)
- : [new] "d" (new)
- : "memory", "cc");
- return old;
-}
-
/* hw_perf_event_update() - Process sampling buffer
* @event: The perf event
* @flush_all: Flag to also flush partially filled sample-data-blocks
@@ -1352,7 +1342,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
new.f = 0;
new.a = 1;
new.overflow = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
/* Advance to next sample-data-block */
@@ -1562,7 +1552,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
}
new.a = 1;
new.overflow = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
return true;
}
@@ -1636,7 +1626,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
new.a = 1;
else
new.a = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
*overflow += orig_overflow;
}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index a7b339c4fd7c..fe7d1774ded1 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -36,7 +36,7 @@ struct paicrypt_map {
unsigned long *page; /* Page for CPU to store counters */
struct pai_userdata *save; /* Page to store no-zero counters */
unsigned int active_events; /* # of PAI crypto users */
- unsigned int refcnt; /* Reference count mapped buffers */
+ refcount_t refcnt; /* Reference count mapped buffers */
enum paievt_mode mode; /* Type of event */
struct perf_event *event; /* Perf event for sampling */
};
@@ -57,10 +57,11 @@ static void paicrypt_event_destroy(struct perf_event *event)
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
- " mode %d refcnt %d\n", __func__,
+ " mode %d refcnt %u\n", __func__,
event->attr.config, event->cpu,
- cpump->active_events, cpump->mode, cpump->refcnt);
- if (!--cpump->refcnt) {
+ cpump->active_events, cpump->mode,
+ refcount_read(&cpump->refcnt));
+ if (refcount_dec_and_test(&cpump->refcnt)) {
debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
__func__, (unsigned long)cpump->page,
cpump->save);
@@ -149,8 +150,10 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
/* Allocate memory for counter page and counter extraction.
* Only the first counting event has to allocate a page.
*/
- if (cpump->page)
+ if (cpump->page) {
+ refcount_inc(&cpump->refcnt);
goto unlock;
+ }
rc = -ENOMEM;
cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
@@ -164,18 +167,18 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
goto unlock;
}
rc = 0;
+ refcount_set(&cpump->refcnt, 1);
unlock:
/* If rc is non-zero, do not set mode and reference count */
if (!rc) {
- cpump->refcnt++;
cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
: PAI_MODE_COUNTING;
}
debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
- " mode %d refcnt %d page %#lx save %p rc %d\n",
+ " mode %d refcnt %u page %#lx save %p rc %d\n",
__func__, a->sample_period, cpump->active_events,
- cpump->mode, cpump->refcnt,
+ cpump->mode, refcount_read(&cpump->refcnt),
(unsigned long)cpump->page, cpump->save, rc);
mutex_unlock(&pai_reserve_mutex);
return rc;
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index fcea307d7529..3b4f384f77f7 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -50,7 +50,7 @@ struct paiext_map {
struct pai_userdata *save; /* Area to store non-zero counters */
enum paievt_mode mode; /* Type of event */
unsigned int active_events; /* # of PAI Extension users */
- unsigned int refcnt;
+ refcount_t refcnt;
struct perf_event *event; /* Perf event for sampling */
struct paiext_cb *paiext_cb; /* PAI extension control block area */
};
@@ -60,14 +60,14 @@ struct paiext_mapptr {
};
static struct paiext_root { /* Anchor to per CPU data */
- int refcnt; /* Overall active events */
+ refcount_t refcnt; /* Overall active events */
struct paiext_mapptr __percpu *mapptr;
} paiext_root;
/* Free per CPU data when the last event is removed. */
static void paiext_root_free(void)
{
- if (!--paiext_root.refcnt) {
+ if (refcount_dec_and_test(&paiext_root.refcnt)) {
free_percpu(paiext_root.mapptr);
paiext_root.mapptr = NULL;
}
@@ -80,7 +80,7 @@ static void paiext_root_free(void)
*/
static int paiext_root_alloc(void)
{
- if (++paiext_root.refcnt == 1) {
+ if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
/* The memory is already zeroed. */
paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
if (!paiext_root.mapptr) {
@@ -91,6 +91,7 @@ static int paiext_root_alloc(void)
*/
return -ENOMEM;
}
+ refcount_set(&paiext_root.refcnt, 1);
}
return 0;
}
@@ -122,7 +123,7 @@ static void paiext_event_destroy(struct perf_event *event)
mutex_lock(&paiext_reserve_mutex);
cpump->event = NULL;
- if (!--cpump->refcnt) /* Last reference gone */
+ if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
mutex_unlock(&paiext_reserve_mutex);
@@ -163,7 +164,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
rc = -ENOMEM;
cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
if (!cpump)
- goto unlock;
+ goto undo;
/* Allocate memory for counter area and counter extraction.
* These are
@@ -183,8 +184,9 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
GFP_KERNEL);
if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
paiext_free(mp);
- goto unlock;
+ goto undo;
}
+ refcount_set(&cpump->refcnt, 1);
cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
: PAI_MODE_COUNTING;
} else {
@@ -195,15 +197,15 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
if (cpump->mode == PAI_MODE_SAMPLING ||
(cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
rc = -EBUSY;
- goto unlock;
+ goto undo;
}
+ refcount_inc(&cpump->refcnt);
}
rc = 0;
cpump->event = event;
- ++cpump->refcnt;
-unlock:
+undo:
if (rc) {
/* Error in allocation of event, decrement anchor. Since
* the event in not created, its destroy() function is never
@@ -211,6 +213,7 @@ unlock:
*/
paiext_root_free();
}
+unlock:
mutex_unlock(&paiext_reserve_mutex);
/* If rc is non-zero, no increment of counter/sampler was done. */
return rc;
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index b68f47541169..a6935af2235c 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -453,3 +453,4 @@
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat sys_cachestat
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 6b7b6d5e3632..276278199c44 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -102,6 +102,11 @@ void __init time_early_init(void)
((long) qui.old_leap * 4096000000L);
}
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+ return tod_to_ns(__get_tod_clock_monotonic());
+}
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index cb2ee06df286..3c62d1b218b1 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -294,6 +294,8 @@ again:
rc = -ENXIO;
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ if (!ptep)
+ goto out;
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
page = pte_page(*ptep);
rc = -EAGAIN;