diff options
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r-- | arch/s390/kernel/crash_dump.c | 2 | ||||
-rw-r--r-- | arch/s390/kernel/entry.h | 2 | ||||
-rw-r--r-- | arch/s390/kernel/ipl.c | 16 | ||||
-rw-r--r-- | arch/s390/kernel/module.c | 3 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_cf.c | 452 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_sf.c | 16 | ||||
-rw-r--r-- | arch/s390/kernel/perf_pai_crypto.c | 19 | ||||
-rw-r--r-- | arch/s390/kernel/perf_pai_ext.c | 23 | ||||
-rw-r--r-- | arch/s390/kernel/syscalls/syscall.tbl | 1 | ||||
-rw-r--r-- | arch/s390/kernel/time.c | 5 | ||||
-rw-r--r-- | arch/s390/kernel/uv.c | 2 |
11 files changed, 388 insertions, 153 deletions
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 8a617be28bb4..7af69948b290 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt) int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) { Elf64_Phdr *phdr_notes, *phdr_loads; + size_t alloc_size; int mem_chunk_cnt; void *ptr, *hdr; - u32 alloc_size; u64 hdr_off; /* If we are not in kdump or zfcp/nvme dump mode return */ diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 34674e38826b..9f41853f36b9 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -34,14 +34,12 @@ void kernel_stack_overflow(struct pt_regs * regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); -void __init init_IRQ(void); void do_io_irq(struct pt_regs *regs); void do_ext_irq(struct pt_regs *regs); void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); -void __init time_init(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f44f70de9661..85a00d97a314 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -176,6 +176,8 @@ static bool reipl_fcp_clear; static bool reipl_ccw_clear; static bool reipl_eckd_clear; +static unsigned long os_info_flags; + static inline int __diag308(unsigned long subcode, unsigned long addr) { union register_pair r1; @@ -1938,6 +1940,20 @@ static void dump_reipl_run(struct shutdown_trigger *trigger) struct lowcore *abs_lc; unsigned int csum; + /* + * Set REIPL_CLEAR flag in os_info flags entry indicating + * 'clear' sysfs attribute has been set on the panicked system + * for specified reipl type. + * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN. + */ + if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) || + (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) || + (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) || + (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) || + reipl_type == IPL_TYPE_NSS || + reipl_type == IPL_TYPE_UNKNOWN) + os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR; + os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags)); csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); abs_lc = get_abs_lowcore(); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index f1b35dcdf3eb..42215f9404af 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -352,7 +352,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0, write); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->mem[MOD_TEXT].base - loc; + val += (Elf_Addr)me->mem[MOD_TEXT].base + + me->arch.got_offset - loc; rc = apply_rela_bits(loc, val, 1, 32, 1, write); } break; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index cf1b6e8a708d..90679143534b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -76,6 +76,7 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) } struct cpu_cf_events { + refcount_t refcnt; /* Reference count */ atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state; /* For perf_event_open SVC */ u64 dev_state; /* For /dev/hwctr */ @@ -88,9 +89,6 @@ struct cpu_cf_events { unsigned int sets; /* # Counter set saved in memory */ }; -/* Per-CPU event structure for the counter facility */ -static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events); - static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ static debug_info_t *cf_dbg; @@ -103,6 +101,221 @@ static debug_info_t *cf_dbg; */ static struct cpumf_ctr_info cpumf_ctr_info; +struct cpu_cf_ptr { + struct cpu_cf_events *cpucf; +}; + +static struct cpu_cf_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct cpu_cf_ptr __percpu *cfptr; +} cpu_cf_root; + +/* + * Serialize event initialization and event removal. Both are called from + * user space in task context with perf_event_open() and close() + * system calls. + * + * This mutex serializes functions cpum_cf_alloc_cpu() called at event + * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() + * called at event removal via call back function hw_perf_event_destroy() + * when the event is deleted. They are serialized to enforce correct + * bookkeeping of pointer and reference counts anchored by + * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the + * per CPU pointers stored in cpu_cf_root::cfptr. + */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Get pointer to per-cpu structure. + * + * Function get_cpu_cfhw() is called from + * - cfset_copy_all(): This function is protected by cpus_read_lock(), so + * CPU hot plug remove can not happen. Event removal requires a close() + * first. + * + * Function this_cpu_cfhw() is called from perf common code functions: + * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): + * All functions execute with interrupts disabled on that particular CPU. + * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). + * + * Therefore it is safe to access the CPU specific pointer to the event. + */ +static struct cpu_cf_events *get_cpu_cfhw(int cpu) +{ + struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; + + if (p) { + struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); + + return q->cpucf; + } + return NULL; +} + +static struct cpu_cf_events *this_cpu_cfhw(void) +{ + return get_cpu_cfhw(smp_processor_id()); +} + +/* Disable counter sets on dedicated CPU */ +static void cpum_cf_reset_cpu(void *flags) +{ + lcctl(0); +} + +/* Free per CPU data when the last event is removed. */ +static void cpum_cf_free_root(void) +{ + if (!refcount_dec_and_test(&cpu_cf_root.refcnt)) + return; + free_percpu(cpu_cf_root.cfptr); + cpu_cf_root.cfptr = NULL; + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n", + __func__, refcount_read(&cpu_cf_root.refcnt), + cpu_cf_root.cfptr); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int cpum_cf_alloc_root(void) +{ + int rc = 0; + + if (refcount_inc_not_zero(&cpu_cf_root.refcnt)) + return rc; + + /* The memory is already zeroed. */ + cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); + if (cpu_cf_root.cfptr) { + refcount_set(&cpu_cf_root.refcnt, 1); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + } else { + rc = -ENOMEM; + } + + return rc; +} + +/* Free CPU counter data structure for a PMU */ +static void cpum_cf_free_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + + mutex_lock(&pmc_reserve_mutex); + /* + * When invoked via CPU hotplug handler, there might be no events + * installed or that particular CPU might not have an + * event installed. This anchor pointer can be NULL! + */ + if (!cpu_cf_root.cfptr) + goto out; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + /* + * Might be zero when called from CPU hotplug handler and no event + * installed on that CPU, but on different CPUs. + */ + if (!cpuhw) + goto out; + + if (refcount_dec_and_test(&cpuhw->refcnt)) { + kfree(cpuhw); + p->cpucf = NULL; + } + cpum_cf_free_root(); +out: + mutex_unlock(&pmc_reserve_mutex); +} + +/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ +static int cpum_cf_alloc_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + int rc; + + mutex_lock(&pmc_reserve_mutex); + rc = cpum_cf_alloc_root(); + if (rc) + goto unlock; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + + if (!cpuhw) { + cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL); + if (cpuhw) { + p->cpucf = cpuhw; + refcount_set(&cpuhw->refcnt, 1); + } else { + rc = -ENOMEM; + } + } else { + refcount_inc(&cpuhw->refcnt); + } + if (rc) { + /* + * Error in allocation of event, decrement anchor. Since + * cpu_cf_event in not created, its destroy() function is not + * invoked. Adjust the reference counter for the anchor. + */ + cpum_cf_free_root(); + } +unlock: + mutex_unlock(&pmc_reserve_mutex); + return rc; +} + +/* + * Create/delete per CPU data structures for /dev/hwctr interface and events + * created by perf_event_open(). + * If cpu is -1, track task on all available CPUs. This requires + * allocation of hardware data structures for all CPUs. This setup handles + * perf_event_open() with task context and /dev/hwctr interface. + * If cpu is non-zero install event on this CPU only. This setup handles + * perf_event_open() with CPU context. + */ +static int cpum_cf_alloc(int cpu) +{ + cpumask_var_t mask; + int rc; + + if (cpu == -1) { + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + for_each_online_cpu(cpu) { + rc = cpum_cf_alloc_cpu(cpu); + if (rc) { + for_each_cpu(cpu, mask) + cpum_cf_free_cpu(cpu); + break; + } + cpumask_set_cpu(cpu, mask); + } + free_cpumask_var(mask); + } else { + rc = cpum_cf_alloc_cpu(cpu); + } + return rc; +} + +static void cpum_cf_free(int cpu) +{ + if (cpu == -1) { + for_each_online_cpu(cpu) + cpum_cf_free_cpu(cpu); + } else { + cpum_cf_free_cpu(cpu); + } +} + #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ /* interval in seconds */ @@ -451,10 +664,10 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) */ static void cpumf_pmu_enable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int err; - if (cpuhw->flags & PMU_F_ENABLED) + if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) return; err = lcctl(cpuhw->state | cpuhw->dev_state); @@ -471,11 +684,11 @@ static void cpumf_pmu_enable(struct pmu *pmu) */ static void cpumf_pmu_disable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - int err; + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); u64 inactive; + int err; - if (!(cpuhw->flags & PMU_F_ENABLED)) + if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) return; inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); @@ -487,58 +700,10 @@ static void cpumf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -#define PMC_INIT 0UL -#define PMC_RELEASE 1UL - -static void cpum_cf_setup_cpu(void *flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - switch ((unsigned long)flags) { - case PMC_INIT: - cpuhw->flags |= PMU_F_RESERVED; - break; - - case PMC_RELEASE: - cpuhw->flags &= ~PMU_F_RESERVED; - break; - } - - /* Disable CPU counter sets */ - lcctl(0); - debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n", - __func__, *(int *)flags, cpuhw->flags, - cpuhw->state); -} - -/* Initialize the CPU-measurement counter facility */ -static int __kernel_cpumcf_begin(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1); - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} - -/* Release the CPU-measurement counter facility */ -static void __kernel_cpumcf_end(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1); - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); -} - -/* Number of perf events counting hardware events */ -static atomic_t num_events = ATOMIC_INIT(0); -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_events) == 0) - __kernel_cpumcf_end(); - mutex_unlock(&pmc_reserve_mutex); + cpum_cf_free(event->cpu); } /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ @@ -562,14 +727,6 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static void cpumf_hw_inuse(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (atomic_inc_return(&num_events) == 1) - __kernel_cpumcf_begin(); - mutex_unlock(&pmc_reserve_mutex); -} - static int is_userspace_event(u64 ev) { return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || @@ -653,7 +810,8 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; /* @@ -756,7 +914,7 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -830,7 +988,7 @@ static int cfdiag_push_sample(struct perf_event *event, static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -857,8 +1015,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) false); if (cfdiag_diffctr(cpuhw, event->hw.config_base)) cfdiag_push_sample(event, cpuhw); - } else if (cpuhw->flags & PMU_F_RESERVED) { - /* Only update when PMU not hotplugged off */ + } else { hw_perf_event_update(event); } hwc->state |= PERF_HES_UPTODATE; @@ -867,7 +1024,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); ctr_set_enable(&cpuhw->state, event->hw.config_base); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -880,7 +1037,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int i; cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -912,29 +1069,83 @@ static struct pmu cpumf_pmu = { .read = cpumf_pmu_read, }; -static int cpum_cf_setup(unsigned int cpu, unsigned long flags) -{ - local_irq_disable(); - cpum_cf_setup_cpu((void *)flags); - local_irq_enable(); - return 0; -} +static struct cfset_session { /* CPUs and counter set bit mask */ + struct list_head head; /* Head of list of active processes */ +} cfset_session = { + .head = LIST_HEAD_INIT(cfset_session.head) +}; + +static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ +/* + * Synchronize access to device /dev/hwc. This mutex protects against + * concurrent access to functions cfset_open() and cfset_release(). + * Same for CPU hotplug add and remove events triggering + * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). + * It also serializes concurrent device ioctl access from multiple + * processes accessing /dev/hwc. + * + * The mutex protects concurrent access to the /dev/hwctr session management + * struct cfset_session and reference counting variable cfset_opencnt. + */ +static DEFINE_MUTEX(cfset_ctrset_mutex); +/* + * CPU hotplug handles only /dev/hwctr device. + * For perf_event_open() the CPU hotplug handling is done on kernel common + * code: + * - CPU add: Nothing is done since a file descriptor can not be created + * and returned to the user. + * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and + * pmu_delete(). The event itself is removed when the file descriptor is + * closed. + */ static int cfset_online_cpu(unsigned int cpu); + static int cpum_cf_online_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__, - cpu, in_interrupt()); - cpum_cf_setup(cpu, PMC_INIT); - return cfset_online_cpu(cpu); + int rc = 0; + + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d " + "opencnt %d\n", __func__, cpu, + refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * Ignore notification for perf_event_open(). + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + rc = cpum_cf_alloc_cpu(cpu); + if (!rc) + cfset_online_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return rc; } static int cfset_offline_cpu(unsigned int cpu); + static int cpum_cf_offline_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu); - cfset_offline_cpu(cpu); - return cpum_cf_setup(cpu, PMC_RELEASE); + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n", + __func__, cpu, refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * During task exit processing of grouped perf events triggered by CPU + * hotplug processing, pmu_disable() is called as part of perf context + * removal process. Therefore do not trigger event removal now for + * perf_event_open() created events. Perf common code triggers event + * destruction when the event file descriptor is closed. + * + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + cfset_offline_cpu(cpu); + cpum_cf_free_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return 0; } /* Return true if store counter set multiple instruction is available */ @@ -953,13 +1164,13 @@ static void cpumf_measurement_alert(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_CMC); - cpuhw = this_cpu_ptr(&cpu_cf_events); /* * Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ - if (!(cpuhw->flags & PMU_F_RESERVED)) + cpuhw = this_cpu_cfhw(); + if (!cpuhw) return; /* counter authorization change alert */ @@ -1039,19 +1250,11 @@ out1: * counter set via normal file operations. */ -static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */ -static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ unsigned int sets; /* Counter set bit mask */ atomic_t cpus_ack; /* # CPUs successfully executed func */ }; -static struct cfset_session { /* CPUs and counter set bit mask */ - struct list_head head; /* Head of list of active processes */ -} cfset_session = { - .head = LIST_HEAD_INIT(cfset_session.head) -}; - struct cfset_request { /* CPUs and counter set bit mask */ unsigned long ctrset; /* Bit mask of counter set to read */ cpumask_t mask; /* CPU mask to read from */ @@ -1113,11 +1316,11 @@ static void cfset_session_add(struct cfset_request *p) /* Stop all counter sets via ioctl interface */ static void cfset_ioctl_off(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; - /* Check if any counter set used by /dev/hwc */ + /* Check if any counter set used by /dev/hwctr */ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) if ((p->sets & cpumf_ctr_ctl[rc])) { if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { @@ -1141,7 +1344,7 @@ static void cfset_ioctl_off(void *parm) /* Start counter sets on particular CPU */ static void cfset_ioctl_on(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; @@ -1163,7 +1366,7 @@ static void cfset_ioctl_on(void *parm) static void cfset_release_cpu(void *p) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int rc; debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", @@ -1203,27 +1406,41 @@ static int cfset_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; } - if (!atomic_dec_return(&cfset_opencnt)) + if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */ on_each_cpu(cfset_release_cpu, NULL, 1); + cpum_cf_free(-1); + } mutex_unlock(&cfset_ctrset_mutex); - - hw_perf_event_destroy(NULL); return 0; } +/* + * Open via /dev/hwctr device. Allocate all per CPU resources on the first + * open of the device. The last close releases all per CPU resources. + * Parallel perf_event_open system calls also use per CPU resources. + * These invocations are handled via reference counting on the per CPU data + * structures. + */ static int cfset_open(struct inode *inode, struct file *file) { - if (!capable(CAP_SYS_ADMIN)) + int rc = 0; + + if (!perfmon_capable()) return -EPERM; + file->private_data = NULL; + mutex_lock(&cfset_ctrset_mutex); - if (atomic_inc_return(&cfset_opencnt) == 1) - cfset_session_init(); + if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */ + rc = cpum_cf_alloc(-1); + if (!rc) { + cfset_session_init(); + refcount_set(&cfset_opencnt, 1); + } + } mutex_unlock(&cfset_ctrset_mutex); - cpumf_hw_inuse(); - file->private_data = NULL; /* nonseekable_open() never fails */ - return nonseekable_open(inode, file); + return rc ?: nonseekable_open(inode, file); } static int cfset_all_start(struct cfset_request *req) @@ -1280,7 +1497,7 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask) ctrset_read = (struct s390_ctrset_read __user *)arg; uptr = ctrset_read->data; for_each_cpu(cpu, mask) { - struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); + struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); struct s390_ctrset_cpudata __user *ctrset_cpudata; ctrset_cpudata = uptr; @@ -1324,7 +1541,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, /* Read all counter sets. */ static void cfset_cpu_read(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int set, set_size; size_t space; @@ -1348,9 +1565,9 @@ static void cfset_cpu_read(void *parm) cpuhw->used += space; cpuhw->sets += 1; } + debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, + cpuhw->sets, cpuhw->used); } - debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, - cpuhw->sets, cpuhw->used); } static int cfset_all_read(unsigned long arg, struct cfset_request *req) @@ -1502,6 +1719,7 @@ static struct miscdevice cfset_dev = { .name = S390_HWCTR_DEVICE, .minor = MISC_DYNAMIC_MINOR, .fops = &cfset_fops, + .mode = 0666, }; /* Hotplug add of a CPU. Scan through all active processes and add @@ -1512,7 +1730,6 @@ static int cfset_online_cpu(unsigned int cpu) struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1520,19 +1737,18 @@ static int cfset_online_cpu(unsigned int cpu) cpumask_set_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } /* Hotplug remove of a CPU. Scan through all active processes and clear * that CPU from the list of CPUs supplied with ioctl(..., START, ...). + * Adjust reference counts. */ static int cfset_offline_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1540,7 +1756,6 @@ static int cfset_offline_cpu(unsigned int cpu) cpumask_clear_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } @@ -1618,7 +1833,8 @@ static int cfdiag_event_init(struct perf_event *event) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7ef72f5ff52e..8ecfbce4ac92 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1271,16 +1271,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } -static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) -{ - asm volatile( - " cdsg %[old],%[new],%[ptr]\n" - : [old] "+d" (old), [ptr] "+QS" (*ptr) - : [new] "d" (new) - : "memory", "cc"); - return old; -} - /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1352,7 +1342,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) new.f = 0; new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); /* Advance to next sample-data-block */ @@ -1562,7 +1552,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, } new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); return true; } @@ -1636,7 +1626,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, new.a = 1; else new.a = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); *overflow += orig_overflow; } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index a7b339c4fd7c..fe7d1774ded1 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -36,7 +36,7 @@ struct paicrypt_map { unsigned long *page; /* Page for CPU to store counters */ struct pai_userdata *save; /* Page to store no-zero counters */ unsigned int active_events; /* # of PAI crypto users */ - unsigned int refcnt; /* Reference count mapped buffers */ + refcount_t refcnt; /* Reference count mapped buffers */ enum paievt_mode mode; /* Type of event */ struct perf_event *event; /* Perf event for sampling */ }; @@ -57,10 +57,11 @@ static void paicrypt_event_destroy(struct perf_event *event) static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" - " mode %d refcnt %d\n", __func__, + " mode %d refcnt %u\n", __func__, event->attr.config, event->cpu, - cpump->active_events, cpump->mode, cpump->refcnt); - if (!--cpump->refcnt) { + cpump->active_events, cpump->mode, + refcount_read(&cpump->refcnt)); + if (refcount_dec_and_test(&cpump->refcnt)) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", __func__, (unsigned long)cpump->page, cpump->save); @@ -149,8 +150,10 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) /* Allocate memory for counter page and counter extraction. * Only the first counting event has to allocate a page. */ - if (cpump->page) + if (cpump->page) { + refcount_inc(&cpump->refcnt); goto unlock; + } rc = -ENOMEM; cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); @@ -164,18 +167,18 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) goto unlock; } rc = 0; + refcount_set(&cpump->refcnt, 1); unlock: /* If rc is non-zero, do not set mode and reference count */ if (!rc) { - cpump->refcnt++; cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" - " mode %d refcnt %d page %#lx save %p rc %d\n", + " mode %d refcnt %u page %#lx save %p rc %d\n", __func__, a->sample_period, cpump->active_events, - cpump->mode, cpump->refcnt, + cpump->mode, refcount_read(&cpump->refcnt), (unsigned long)cpump->page, cpump->save, rc); mutex_unlock(&pai_reserve_mutex); return rc; diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index fcea307d7529..3b4f384f77f7 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -50,7 +50,7 @@ struct paiext_map { struct pai_userdata *save; /* Area to store non-zero counters */ enum paievt_mode mode; /* Type of event */ unsigned int active_events; /* # of PAI Extension users */ - unsigned int refcnt; + refcount_t refcnt; struct perf_event *event; /* Perf event for sampling */ struct paiext_cb *paiext_cb; /* PAI extension control block area */ }; @@ -60,14 +60,14 @@ struct paiext_mapptr { }; static struct paiext_root { /* Anchor to per CPU data */ - int refcnt; /* Overall active events */ + refcount_t refcnt; /* Overall active events */ struct paiext_mapptr __percpu *mapptr; } paiext_root; /* Free per CPU data when the last event is removed. */ static void paiext_root_free(void) { - if (!--paiext_root.refcnt) { + if (refcount_dec_and_test(&paiext_root.refcnt)) { free_percpu(paiext_root.mapptr); paiext_root.mapptr = NULL; } @@ -80,7 +80,7 @@ static void paiext_root_free(void) */ static int paiext_root_alloc(void) { - if (++paiext_root.refcnt == 1) { + if (!refcount_inc_not_zero(&paiext_root.refcnt)) { /* The memory is already zeroed. */ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); if (!paiext_root.mapptr) { @@ -91,6 +91,7 @@ static int paiext_root_alloc(void) */ return -ENOMEM; } + refcount_set(&paiext_root.refcnt, 1); } return 0; } @@ -122,7 +123,7 @@ static void paiext_event_destroy(struct perf_event *event) mutex_lock(&paiext_reserve_mutex); cpump->event = NULL; - if (!--cpump->refcnt) /* Last reference gone */ + if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); mutex_unlock(&paiext_reserve_mutex); @@ -163,7 +164,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) rc = -ENOMEM; cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); if (!cpump) - goto unlock; + goto undo; /* Allocate memory for counter area and counter extraction. * These are @@ -183,8 +184,9 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) GFP_KERNEL); if (!cpump->save || !cpump->area || !cpump->paiext_cb) { paiext_free(mp); - goto unlock; + goto undo; } + refcount_set(&cpump->refcnt, 1); cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } else { @@ -195,15 +197,15 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) if (cpump->mode == PAI_MODE_SAMPLING || (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { rc = -EBUSY; - goto unlock; + goto undo; } + refcount_inc(&cpump->refcnt); } rc = 0; cpump->event = event; - ++cpump->refcnt; -unlock: +undo: if (rc) { /* Error in allocation of event, decrement anchor. Since * the event in not created, its destroy() function is never @@ -211,6 +213,7 @@ unlock: */ paiext_root_free(); } +unlock: mutex_unlock(&paiext_reserve_mutex); /* If rc is non-zero, no increment of counter/sampler was done. */ return rc; diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index b68f47541169..a6935af2235c 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat sys_cachestat diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6b7b6d5e3632..276278199c44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -102,6 +102,11 @@ void __init time_early_init(void) ((long) qui.old_leap * 4096000000L); } +unsigned long long noinstr sched_clock_noinstr(void) +{ + return tod_to_ns(__get_tod_clock_monotonic()); +} + /* * Scheduler clock - returns current time in nanosec units. */ diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index cb2ee06df286..3c62d1b218b1 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -294,6 +294,8 @@ again: rc = -ENXIO; ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); + if (!ptep) + goto out; if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { page = pte_page(*ptep); rc = -EAGAIN; |