From 9fa45070a2e59a871e1cd3370173369f3a4f61e2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 4 Sep 2018 11:48:26 +0100 Subject: locking/atomics: Switch to generated fallbacks As a step to ensuring the atomic* APIs are consistent, switch to fallbacks generated by gen-atomic-fallback.sh. These are checked in rather than generated with Kbuild, since: * This allows inspection of the atomics with git grep and ctags on a pristine tree, which Linus strongly prefers being able to do. * The fallbacks are not affected by machine details or configuration options, so it is not necessary to regenerate them to take these into account. * These are included by files required *very* early in the build process (e.g. for generating bounds.h), and we'd rather not complicate the top-level Kbuild file with dependencies. The new fallback header should be equivalent to the old fallbacks in , but: * It is formatted a little differently due to scripting ensuring things are more regular than they used to be. * Fallbacks are now expanded in-place as static inline functions rather than macros. * The prototypes for fallbacks are arragned consistently with the return type on a separate line to try to keep to a sensible line length. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: catalin.marinas@arm.com Cc: linuxdrivers@attotech.com Cc: dvyukov@google.com Cc: Boqun Feng Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: glider@google.com Link: http://lkml.kernel.org/r/20180904104830.2975-3-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic-fallback.h | 2294 +++++++++++++++++++++++++++++++++++++++ include/linux/atomic.h | 1241 +-------------------- 2 files changed, 2295 insertions(+), 1240 deletions(-) create mode 100644 include/linux/atomic-fallback.h (limited to 'include/linux') diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h new file mode 100644 index 000000000000..1c02c0112fbb --- /dev/null +++ b/include/linux/atomic-fallback.h @@ -0,0 +1,2294 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Generated by scripts/atomic/gen-atomic-fallback.sh +// DO NOT MODIFY THIS FILE DIRECTLY + +#ifndef _LINUX_ATOMIC_FALLBACK_H +#define _LINUX_ATOMIC_FALLBACK_H + +#ifndef xchg_relaxed +#define xchg_relaxed xchg +#define xchg_acquire xchg +#define xchg_release xchg +#else /* xchg_relaxed */ + +#ifndef xchg_acquire +#define xchg_acquire(...) \ + __atomic_op_acquire(xchg, __VA_ARGS__) +#endif + +#ifndef xchg_release +#define xchg_release(...) \ + __atomic_op_release(xchg, __VA_ARGS__) +#endif + +#ifndef xchg +#define xchg(...) \ + __atomic_op_fence(xchg, __VA_ARGS__) +#endif + +#endif /* xchg_relaxed */ + +#ifndef cmpxchg_relaxed +#define cmpxchg_relaxed cmpxchg +#define cmpxchg_acquire cmpxchg +#define cmpxchg_release cmpxchg +#else /* cmpxchg_relaxed */ + +#ifndef cmpxchg_acquire +#define cmpxchg_acquire(...) \ + __atomic_op_acquire(cmpxchg, __VA_ARGS__) +#endif + +#ifndef cmpxchg_release +#define cmpxchg_release(...) \ + __atomic_op_release(cmpxchg, __VA_ARGS__) +#endif + +#ifndef cmpxchg +#define cmpxchg(...) \ + __atomic_op_fence(cmpxchg, __VA_ARGS__) +#endif + +#endif /* cmpxchg_relaxed */ + +#ifndef cmpxchg64_relaxed +#define cmpxchg64_relaxed cmpxchg64 +#define cmpxchg64_acquire cmpxchg64 +#define cmpxchg64_release cmpxchg64 +#else /* cmpxchg64_relaxed */ + +#ifndef cmpxchg64_acquire +#define cmpxchg64_acquire(...) \ + __atomic_op_acquire(cmpxchg64, __VA_ARGS__) +#endif + +#ifndef cmpxchg64_release +#define cmpxchg64_release(...) \ + __atomic_op_release(cmpxchg64, __VA_ARGS__) +#endif + +#ifndef cmpxchg64 +#define cmpxchg64(...) \ + __atomic_op_fence(cmpxchg64, __VA_ARGS__) +#endif + +#endif /* cmpxchg64_relaxed */ + +#ifndef atomic_read_acquire +static inline int +atomic_read_acquire(const atomic_t *v) +{ + return smp_load_acquire(&(v)->counter); +} +#define atomic_read_acquire atomic_read_acquire +#endif + +#ifndef atomic_set_release +static inline void +atomic_set_release(atomic_t *v, int i) +{ + smp_store_release(&(v)->counter, i); +} +#define atomic_set_release atomic_set_release +#endif + +#ifndef atomic_add_return_relaxed +#define atomic_add_return_acquire atomic_add_return +#define atomic_add_return_release atomic_add_return +#define atomic_add_return_relaxed atomic_add_return +#else /* atomic_add_return_relaxed */ + +#ifndef atomic_add_return_acquire +static inline int +atomic_add_return_acquire(int i, atomic_t *v) +{ + int ret = atomic_add_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_add_return_acquire atomic_add_return_acquire +#endif + +#ifndef atomic_add_return_release +static inline int +atomic_add_return_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_add_return_relaxed(i, v); +} +#define atomic_add_return_release atomic_add_return_release +#endif + +#ifndef atomic_add_return +static inline int +atomic_add_return(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_add_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_add_return atomic_add_return +#endif + +#endif /* atomic_add_return_relaxed */ + +#ifndef atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add +#define atomic_fetch_add_release atomic_fetch_add +#define atomic_fetch_add_relaxed atomic_fetch_add +#else /* atomic_fetch_add_relaxed */ + +#ifndef atomic_fetch_add_acquire +static inline int +atomic_fetch_add_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_add_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#endif + +#ifndef atomic_fetch_add_release +static inline int +atomic_fetch_add_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_add_relaxed(i, v); +} +#define atomic_fetch_add_release atomic_fetch_add_release +#endif + +#ifndef atomic_fetch_add +static inline int +atomic_fetch_add(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_add_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_add atomic_fetch_add +#endif + +#endif /* atomic_fetch_add_relaxed */ + +#ifndef atomic_sub_return_relaxed +#define atomic_sub_return_acquire atomic_sub_return +#define atomic_sub_return_release atomic_sub_return +#define atomic_sub_return_relaxed atomic_sub_return +#else /* atomic_sub_return_relaxed */ + +#ifndef atomic_sub_return_acquire +static inline int +atomic_sub_return_acquire(int i, atomic_t *v) +{ + int ret = atomic_sub_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_sub_return_acquire atomic_sub_return_acquire +#endif + +#ifndef atomic_sub_return_release +static inline int +atomic_sub_return_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_sub_return_relaxed(i, v); +} +#define atomic_sub_return_release atomic_sub_return_release +#endif + +#ifndef atomic_sub_return +static inline int +atomic_sub_return(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_sub_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_sub_return atomic_sub_return +#endif + +#endif /* atomic_sub_return_relaxed */ + +#ifndef atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub +#define atomic_fetch_sub_release atomic_fetch_sub +#define atomic_fetch_sub_relaxed atomic_fetch_sub +#else /* atomic_fetch_sub_relaxed */ + +#ifndef atomic_fetch_sub_acquire +static inline int +atomic_fetch_sub_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_sub_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#endif + +#ifndef atomic_fetch_sub_release +static inline int +atomic_fetch_sub_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_sub_relaxed(i, v); +} +#define atomic_fetch_sub_release atomic_fetch_sub_release +#endif + +#ifndef atomic_fetch_sub +static inline int +atomic_fetch_sub(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_sub_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_sub atomic_fetch_sub +#endif + +#endif /* atomic_fetch_sub_relaxed */ + +#ifndef atomic_inc +static inline void +atomic_inc(atomic_t *v) +{ + atomic_add(1, v); +} +#define atomic_inc atomic_inc +#endif + +#ifndef atomic_inc_return_relaxed +#ifdef atomic_inc_return +#define atomic_inc_return_acquire atomic_inc_return +#define atomic_inc_return_release atomic_inc_return +#define atomic_inc_return_relaxed atomic_inc_return +#endif /* atomic_inc_return */ + +#ifndef atomic_inc_return +static inline int +atomic_inc_return(atomic_t *v) +{ + return atomic_add_return(1, v); +} +#define atomic_inc_return atomic_inc_return +#endif + +#ifndef atomic_inc_return_acquire +static inline int +atomic_inc_return_acquire(atomic_t *v) +{ + return atomic_add_return_acquire(1, v); +} +#define atomic_inc_return_acquire atomic_inc_return_acquire +#endif + +#ifndef atomic_inc_return_release +static inline int +atomic_inc_return_release(atomic_t *v) +{ + return atomic_add_return_release(1, v); +} +#define atomic_inc_return_release atomic_inc_return_release +#endif + +#ifndef atomic_inc_return_relaxed +static inline int +atomic_inc_return_relaxed(atomic_t *v) +{ + return atomic_add_return_relaxed(1, v); +} +#define atomic_inc_return_relaxed atomic_inc_return_relaxed +#endif + +#else /* atomic_inc_return_relaxed */ + +#ifndef atomic_inc_return_acquire +static inline int +atomic_inc_return_acquire(atomic_t *v) +{ + int ret = atomic_inc_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_inc_return_acquire atomic_inc_return_acquire +#endif + +#ifndef atomic_inc_return_release +static inline int +atomic_inc_return_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_inc_return_relaxed(v); +} +#define atomic_inc_return_release atomic_inc_return_release +#endif + +#ifndef atomic_inc_return +static inline int +atomic_inc_return(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_inc_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_inc_return atomic_inc_return +#endif + +#endif /* atomic_inc_return_relaxed */ + +#ifndef atomic_fetch_inc_relaxed +#ifdef atomic_fetch_inc +#define atomic_fetch_inc_acquire atomic_fetch_inc +#define atomic_fetch_inc_release atomic_fetch_inc +#define atomic_fetch_inc_relaxed atomic_fetch_inc +#endif /* atomic_fetch_inc */ + +#ifndef atomic_fetch_inc +static inline int +atomic_fetch_inc(atomic_t *v) +{ + return atomic_fetch_add(1, v); +} +#define atomic_fetch_inc atomic_fetch_inc +#endif + +#ifndef atomic_fetch_inc_acquire +static inline int +atomic_fetch_inc_acquire(atomic_t *v) +{ + return atomic_fetch_add_acquire(1, v); +} +#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#endif + +#ifndef atomic_fetch_inc_release +static inline int +atomic_fetch_inc_release(atomic_t *v) +{ + return atomic_fetch_add_release(1, v); +} +#define atomic_fetch_inc_release atomic_fetch_inc_release +#endif + +#ifndef atomic_fetch_inc_relaxed +static inline int +atomic_fetch_inc_relaxed(atomic_t *v) +{ + return atomic_fetch_add_relaxed(1, v); +} +#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed +#endif + +#else /* atomic_fetch_inc_relaxed */ + +#ifndef atomic_fetch_inc_acquire +static inline int +atomic_fetch_inc_acquire(atomic_t *v) +{ + int ret = atomic_fetch_inc_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#endif + +#ifndef atomic_fetch_inc_release +static inline int +atomic_fetch_inc_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_inc_relaxed(v); +} +#define atomic_fetch_inc_release atomic_fetch_inc_release +#endif + +#ifndef atomic_fetch_inc +static inline int +atomic_fetch_inc(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_inc_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_inc atomic_fetch_inc +#endif + +#endif /* atomic_fetch_inc_relaxed */ + +#ifndef atomic_dec +static inline void +atomic_dec(atomic_t *v) +{ + atomic_sub(1, v); +} +#define atomic_dec atomic_dec +#endif + +#ifndef atomic_dec_return_relaxed +#ifdef atomic_dec_return +#define atomic_dec_return_acquire atomic_dec_return +#define atomic_dec_return_release atomic_dec_return +#define atomic_dec_return_relaxed atomic_dec_return +#endif /* atomic_dec_return */ + +#ifndef atomic_dec_return +static inline int +atomic_dec_return(atomic_t *v) +{ + return atomic_sub_return(1, v); +} +#define atomic_dec_return atomic_dec_return +#endif + +#ifndef atomic_dec_return_acquire +static inline int +atomic_dec_return_acquire(atomic_t *v) +{ + return atomic_sub_return_acquire(1, v); +} +#define atomic_dec_return_acquire atomic_dec_return_acquire +#endif + +#ifndef atomic_dec_return_release +static inline int +atomic_dec_return_release(atomic_t *v) +{ + return atomic_sub_return_release(1, v); +} +#define atomic_dec_return_release atomic_dec_return_release +#endif + +#ifndef atomic_dec_return_relaxed +static inline int +atomic_dec_return_relaxed(atomic_t *v) +{ + return atomic_sub_return_relaxed(1, v); +} +#define atomic_dec_return_relaxed atomic_dec_return_relaxed +#endif + +#else /* atomic_dec_return_relaxed */ + +#ifndef atomic_dec_return_acquire +static inline int +atomic_dec_return_acquire(atomic_t *v) +{ + int ret = atomic_dec_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_dec_return_acquire atomic_dec_return_acquire +#endif + +#ifndef atomic_dec_return_release +static inline int +atomic_dec_return_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_dec_return_relaxed(v); +} +#define atomic_dec_return_release atomic_dec_return_release +#endif + +#ifndef atomic_dec_return +static inline int +atomic_dec_return(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_dec_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_dec_return atomic_dec_return +#endif + +#endif /* atomic_dec_return_relaxed */ + +#ifndef atomic_fetch_dec_relaxed +#ifdef atomic_fetch_dec +#define atomic_fetch_dec_acquire atomic_fetch_dec +#define atomic_fetch_dec_release atomic_fetch_dec +#define atomic_fetch_dec_relaxed atomic_fetch_dec +#endif /* atomic_fetch_dec */ + +#ifndef atomic_fetch_dec +static inline int +atomic_fetch_dec(atomic_t *v) +{ + return atomic_fetch_sub(1, v); +} +#define atomic_fetch_dec atomic_fetch_dec +#endif + +#ifndef atomic_fetch_dec_acquire +static inline int +atomic_fetch_dec_acquire(atomic_t *v) +{ + return atomic_fetch_sub_acquire(1, v); +} +#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#endif + +#ifndef atomic_fetch_dec_release +static inline int +atomic_fetch_dec_release(atomic_t *v) +{ + return atomic_fetch_sub_release(1, v); +} +#define atomic_fetch_dec_release atomic_fetch_dec_release +#endif + +#ifndef atomic_fetch_dec_relaxed +static inline int +atomic_fetch_dec_relaxed(atomic_t *v) +{ + return atomic_fetch_sub_relaxed(1, v); +} +#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed +#endif + +#else /* atomic_fetch_dec_relaxed */ + +#ifndef atomic_fetch_dec_acquire +static inline int +atomic_fetch_dec_acquire(atomic_t *v) +{ + int ret = atomic_fetch_dec_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#endif + +#ifndef atomic_fetch_dec_release +static inline int +atomic_fetch_dec_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_dec_relaxed(v); +} +#define atomic_fetch_dec_release atomic_fetch_dec_release +#endif + +#ifndef atomic_fetch_dec +static inline int +atomic_fetch_dec(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_dec_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_dec atomic_fetch_dec +#endif + +#endif /* atomic_fetch_dec_relaxed */ + +#ifndef atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and +#define atomic_fetch_and_release atomic_fetch_and +#define atomic_fetch_and_relaxed atomic_fetch_and +#else /* atomic_fetch_and_relaxed */ + +#ifndef atomic_fetch_and_acquire +static inline int +atomic_fetch_and_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_and_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#endif + +#ifndef atomic_fetch_and_release +static inline int +atomic_fetch_and_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_and_relaxed(i, v); +} +#define atomic_fetch_and_release atomic_fetch_and_release +#endif + +#ifndef atomic_fetch_and +static inline int +atomic_fetch_and(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_and_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_and atomic_fetch_and +#endif + +#endif /* atomic_fetch_and_relaxed */ + +#ifndef atomic_andnot +static inline void +atomic_andnot(int i, atomic_t *v) +{ + atomic_and(~i, v); +} +#define atomic_andnot atomic_andnot +#endif + +#ifndef atomic_fetch_andnot_relaxed +#ifdef atomic_fetch_andnot +#define atomic_fetch_andnot_acquire atomic_fetch_andnot +#define atomic_fetch_andnot_release atomic_fetch_andnot +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot +#endif /* atomic_fetch_andnot */ + +#ifndef atomic_fetch_andnot +static inline int +atomic_fetch_andnot(int i, atomic_t *v) +{ + return atomic_fetch_and(~i, v); +} +#define atomic_fetch_andnot atomic_fetch_andnot +#endif + +#ifndef atomic_fetch_andnot_acquire +static inline int +atomic_fetch_andnot_acquire(int i, atomic_t *v) +{ + return atomic_fetch_and_acquire(~i, v); +} +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#endif + +#ifndef atomic_fetch_andnot_release +static inline int +atomic_fetch_andnot_release(int i, atomic_t *v) +{ + return atomic_fetch_and_release(~i, v); +} +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#endif + +#ifndef atomic_fetch_andnot_relaxed +static inline int +atomic_fetch_andnot_relaxed(int i, atomic_t *v) +{ + return atomic_fetch_and_relaxed(~i, v); +} +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#endif + +#else /* atomic_fetch_andnot_relaxed */ + +#ifndef atomic_fetch_andnot_acquire +static inline int +atomic_fetch_andnot_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_andnot_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#endif + +#ifndef atomic_fetch_andnot_release +static inline int +atomic_fetch_andnot_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_andnot_relaxed(i, v); +} +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#endif + +#ifndef atomic_fetch_andnot +static inline int +atomic_fetch_andnot(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_andnot_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_andnot atomic_fetch_andnot +#endif + +#endif /* atomic_fetch_andnot_relaxed */ + +#ifndef atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or +#define atomic_fetch_or_release atomic_fetch_or +#define atomic_fetch_or_relaxed atomic_fetch_or +#else /* atomic_fetch_or_relaxed */ + +#ifndef atomic_fetch_or_acquire +static inline int +atomic_fetch_or_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_or_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#endif + +#ifndef atomic_fetch_or_release +static inline int +atomic_fetch_or_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_or_relaxed(i, v); +} +#define atomic_fetch_or_release atomic_fetch_or_release +#endif + +#ifndef atomic_fetch_or +static inline int +atomic_fetch_or(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_or_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_or atomic_fetch_or +#endif + +#endif /* atomic_fetch_or_relaxed */ + +#ifndef atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor +#define atomic_fetch_xor_release atomic_fetch_xor +#define atomic_fetch_xor_relaxed atomic_fetch_xor +#else /* atomic_fetch_xor_relaxed */ + +#ifndef atomic_fetch_xor_acquire +static inline int +atomic_fetch_xor_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_xor_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#endif + +#ifndef atomic_fetch_xor_release +static inline int +atomic_fetch_xor_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_xor_relaxed(i, v); +} +#define atomic_fetch_xor_release atomic_fetch_xor_release +#endif + +#ifndef atomic_fetch_xor +static inline int +atomic_fetch_xor(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_xor_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_xor atomic_fetch_xor +#endif + +#endif /* atomic_fetch_xor_relaxed */ + +#ifndef atomic_xchg_relaxed +#define atomic_xchg_acquire atomic_xchg +#define atomic_xchg_release atomic_xchg +#define atomic_xchg_relaxed atomic_xchg +#else /* atomic_xchg_relaxed */ + +#ifndef atomic_xchg_acquire +static inline int +atomic_xchg_acquire(atomic_t *v, int i) +{ + int ret = atomic_xchg_relaxed(v, i); + __atomic_acquire_fence(); + return ret; +} +#define atomic_xchg_acquire atomic_xchg_acquire +#endif + +#ifndef atomic_xchg_release +static inline int +atomic_xchg_release(atomic_t *v, int i) +{ + __atomic_release_fence(); + return atomic_xchg_relaxed(v, i); +} +#define atomic_xchg_release atomic_xchg_release +#endif + +#ifndef atomic_xchg +static inline int +atomic_xchg(atomic_t *v, int i) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_xchg_relaxed(v, i); + __atomic_post_full_fence(); + return ret; +} +#define atomic_xchg atomic_xchg +#endif + +#endif /* atomic_xchg_relaxed */ + +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_acquire atomic_cmpxchg +#define atomic_cmpxchg_release atomic_cmpxchg +#define atomic_cmpxchg_relaxed atomic_cmpxchg +#else /* atomic_cmpxchg_relaxed */ + +#ifndef atomic_cmpxchg_acquire +static inline int +atomic_cmpxchg_acquire(atomic_t *v, int old, int new) +{ + int ret = atomic_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire +#endif + +#ifndef atomic_cmpxchg_release +static inline int +atomic_cmpxchg_release(atomic_t *v, int old, int new) +{ + __atomic_release_fence(); + return atomic_cmpxchg_relaxed(v, old, new); +} +#define atomic_cmpxchg_release atomic_cmpxchg_release +#endif + +#ifndef atomic_cmpxchg +static inline int +atomic_cmpxchg(atomic_t *v, int old, int new) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic_cmpxchg atomic_cmpxchg +#endif + +#endif /* atomic_cmpxchg_relaxed */ + +#ifndef atomic_try_cmpxchg_relaxed +#ifdef atomic_try_cmpxchg +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#define atomic_try_cmpxchg_release atomic_try_cmpxchg +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#endif /* atomic_try_cmpxchg */ + +#ifndef atomic_try_cmpxchg +static inline bool +atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg atomic_try_cmpxchg +#endif + +#ifndef atomic_try_cmpxchg_acquire +static inline bool +atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_acquire(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#endif + +#ifndef atomic_try_cmpxchg_release +static inline bool +atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_release(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#endif + +#ifndef atomic_try_cmpxchg_relaxed +static inline bool +atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_relaxed(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed +#endif + +#else /* atomic_try_cmpxchg_relaxed */ + +#ifndef atomic_try_cmpxchg_acquire +static inline bool +atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) +{ + bool ret = atomic_try_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#endif + +#ifndef atomic_try_cmpxchg_release +static inline bool +atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) +{ + __atomic_release_fence(); + return atomic_try_cmpxchg_relaxed(v, old, new); +} +#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#endif + +#ifndef atomic_try_cmpxchg +static inline bool +atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + bool ret; + __atomic_pre_full_fence(); + ret = atomic_try_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic_try_cmpxchg atomic_try_cmpxchg +#endif + +#endif /* atomic_try_cmpxchg_relaxed */ + +#ifndef atomic_sub_and_test +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic_sub_and_test(int i, atomic_t *v) +{ + return atomic_sub_return(i, v) == 0; +} +#define atomic_sub_and_test atomic_sub_and_test +#endif + +#ifndef atomic_dec_and_test +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool +atomic_dec_and_test(atomic_t *v) +{ + return atomic_dec_return(v) == 0; +} +#define atomic_dec_and_test atomic_dec_and_test +#endif + +#ifndef atomic_inc_and_test +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic_inc_and_test(atomic_t *v) +{ + return atomic_inc_return(v) == 0; +} +#define atomic_inc_and_test atomic_inc_and_test +#endif + +#ifndef atomic_add_negative +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool +atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} +#define atomic_add_negative atomic_add_negative +#endif + +#ifndef atomic_fetch_add_unless +/** + * atomic_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns original value of @v + */ +static inline int +atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int c = atomic_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic_try_cmpxchg(v, &c, c + a)); + + return c; +} +#define atomic_fetch_add_unless atomic_fetch_add_unless +#endif + +#ifndef atomic_add_unless +/** + * atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +static inline bool +atomic_add_unless(atomic_t *v, int a, int u) +{ + return atomic_fetch_add_unless(v, a, u) != u; +} +#define atomic_add_unless atomic_add_unless +#endif + +#ifndef atomic_inc_not_zero +/** + * atomic_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +static inline bool +atomic_inc_not_zero(atomic_t *v) +{ + return atomic_add_unless(v, 1, 0); +} +#define atomic_inc_not_zero atomic_inc_not_zero +#endif + +#ifndef atomic_inc_unless_negative +static inline bool +atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#define atomic_inc_unless_negative atomic_inc_unless_negative +#endif + +#ifndef atomic_dec_unless_positive +static inline bool +atomic_dec_unless_positive(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#define atomic_dec_unless_positive atomic_dec_unless_positive +#endif + +#ifndef atomic_dec_if_positive +static inline int +atomic_dec_if_positive(atomic_t *v) +{ + int dec, c = atomic_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic_try_cmpxchg(v, &c, dec)); + + return dec; +} +#define atomic_dec_if_positive atomic_dec_if_positive +#endif + +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) + +#ifdef CONFIG_GENERIC_ATOMIC64 +#include +#endif + +#ifndef atomic64_read_acquire +static inline s64 +atomic64_read_acquire(const atomic64_t *v) +{ + return smp_load_acquire(&(v)->counter); +} +#define atomic64_read_acquire atomic64_read_acquire +#endif + +#ifndef atomic64_set_release +static inline void +atomic64_set_release(atomic64_t *v, s64 i) +{ + smp_store_release(&(v)->counter, i); +} +#define atomic64_set_release atomic64_set_release +#endif + +#ifndef atomic64_add_return_relaxed +#define atomic64_add_return_acquire atomic64_add_return +#define atomic64_add_return_release atomic64_add_return +#define atomic64_add_return_relaxed atomic64_add_return +#else /* atomic64_add_return_relaxed */ + +#ifndef atomic64_add_return_acquire +static inline s64 +atomic64_add_return_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_add_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_add_return_acquire atomic64_add_return_acquire +#endif + +#ifndef atomic64_add_return_release +static inline s64 +atomic64_add_return_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_add_return_relaxed(i, v); +} +#define atomic64_add_return_release atomic64_add_return_release +#endif + +#ifndef atomic64_add_return +static inline s64 +atomic64_add_return(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_add_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_add_return atomic64_add_return +#endif + +#endif /* atomic64_add_return_relaxed */ + +#ifndef atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add +#define atomic64_fetch_add_release atomic64_fetch_add +#define atomic64_fetch_add_relaxed atomic64_fetch_add +#else /* atomic64_fetch_add_relaxed */ + +#ifndef atomic64_fetch_add_acquire +static inline s64 +atomic64_fetch_add_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_add_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#endif + +#ifndef atomic64_fetch_add_release +static inline s64 +atomic64_fetch_add_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_add_relaxed(i, v); +} +#define atomic64_fetch_add_release atomic64_fetch_add_release +#endif + +#ifndef atomic64_fetch_add +static inline s64 +atomic64_fetch_add(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_add_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_add atomic64_fetch_add +#endif + +#endif /* atomic64_fetch_add_relaxed */ + +#ifndef atomic64_sub_return_relaxed +#define atomic64_sub_return_acquire atomic64_sub_return +#define atomic64_sub_return_release atomic64_sub_return +#define atomic64_sub_return_relaxed atomic64_sub_return +#else /* atomic64_sub_return_relaxed */ + +#ifndef atomic64_sub_return_acquire +static inline s64 +atomic64_sub_return_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_sub_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_sub_return_acquire atomic64_sub_return_acquire +#endif + +#ifndef atomic64_sub_return_release +static inline s64 +atomic64_sub_return_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_sub_return_relaxed(i, v); +} +#define atomic64_sub_return_release atomic64_sub_return_release +#endif + +#ifndef atomic64_sub_return +static inline s64 +atomic64_sub_return(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_sub_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_sub_return atomic64_sub_return +#endif + +#endif /* atomic64_sub_return_relaxed */ + +#ifndef atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub +#define atomic64_fetch_sub_release atomic64_fetch_sub +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub +#else /* atomic64_fetch_sub_relaxed */ + +#ifndef atomic64_fetch_sub_acquire +static inline s64 +atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_sub_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#endif + +#ifndef atomic64_fetch_sub_release +static inline s64 +atomic64_fetch_sub_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_sub_relaxed(i, v); +} +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#endif + +#ifndef atomic64_fetch_sub +static inline s64 +atomic64_fetch_sub(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_sub_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_sub atomic64_fetch_sub +#endif + +#endif /* atomic64_fetch_sub_relaxed */ + +#ifndef atomic64_inc +static inline void +atomic64_inc(atomic64_t *v) +{ + atomic64_add(1, v); +} +#define atomic64_inc atomic64_inc +#endif + +#ifndef atomic64_inc_return_relaxed +#ifdef atomic64_inc_return +#define atomic64_inc_return_acquire atomic64_inc_return +#define atomic64_inc_return_release atomic64_inc_return +#define atomic64_inc_return_relaxed atomic64_inc_return +#endif /* atomic64_inc_return */ + +#ifndef atomic64_inc_return +static inline s64 +atomic64_inc_return(atomic64_t *v) +{ + return atomic64_add_return(1, v); +} +#define atomic64_inc_return atomic64_inc_return +#endif + +#ifndef atomic64_inc_return_acquire +static inline s64 +atomic64_inc_return_acquire(atomic64_t *v) +{ + return atomic64_add_return_acquire(1, v); +} +#define atomic64_inc_return_acquire atomic64_inc_return_acquire +#endif + +#ifndef atomic64_inc_return_release +static inline s64 +atomic64_inc_return_release(atomic64_t *v) +{ + return atomic64_add_return_release(1, v); +} +#define atomic64_inc_return_release atomic64_inc_return_release +#endif + +#ifndef atomic64_inc_return_relaxed +static inline s64 +atomic64_inc_return_relaxed(atomic64_t *v) +{ + return atomic64_add_return_relaxed(1, v); +} +#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed +#endif + +#else /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_inc_return_acquire +static inline s64 +atomic64_inc_return_acquire(atomic64_t *v) +{ + s64 ret = atomic64_inc_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_inc_return_acquire atomic64_inc_return_acquire +#endif + +#ifndef atomic64_inc_return_release +static inline s64 +atomic64_inc_return_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_inc_return_relaxed(v); +} +#define atomic64_inc_return_release atomic64_inc_return_release +#endif + +#ifndef atomic64_inc_return +static inline s64 +atomic64_inc_return(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_inc_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_inc_return atomic64_inc_return +#endif + +#endif /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_fetch_inc_relaxed +#ifdef atomic64_fetch_inc +#define atomic64_fetch_inc_acquire atomic64_fetch_inc +#define atomic64_fetch_inc_release atomic64_fetch_inc +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc +#endif /* atomic64_fetch_inc */ + +#ifndef atomic64_fetch_inc +static inline s64 +atomic64_fetch_inc(atomic64_t *v) +{ + return atomic64_fetch_add(1, v); +} +#define atomic64_fetch_inc atomic64_fetch_inc +#endif + +#ifndef atomic64_fetch_inc_acquire +static inline s64 +atomic64_fetch_inc_acquire(atomic64_t *v) +{ + return atomic64_fetch_add_acquire(1, v); +} +#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#endif + +#ifndef atomic64_fetch_inc_release +static inline s64 +atomic64_fetch_inc_release(atomic64_t *v) +{ + return atomic64_fetch_add_release(1, v); +} +#define atomic64_fetch_inc_release atomic64_fetch_inc_release +#endif + +#ifndef atomic64_fetch_inc_relaxed +static inline s64 +atomic64_fetch_inc_relaxed(atomic64_t *v) +{ + return atomic64_fetch_add_relaxed(1, v); +} +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed +#endif + +#else /* atomic64_fetch_inc_relaxed */ + +#ifndef atomic64_fetch_inc_acquire +static inline s64 +atomic64_fetch_inc_acquire(atomic64_t *v) +{ + s64 ret = atomic64_fetch_inc_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#endif + +#ifndef atomic64_fetch_inc_release +static inline s64 +atomic64_fetch_inc_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_inc_relaxed(v); +} +#define atomic64_fetch_inc_release atomic64_fetch_inc_release +#endif + +#ifndef atomic64_fetch_inc +static inline s64 +atomic64_fetch_inc(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_inc_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_inc atomic64_fetch_inc +#endif + +#endif /* atomic64_fetch_inc_relaxed */ + +#ifndef atomic64_dec +static inline void +atomic64_dec(atomic64_t *v) +{ + atomic64_sub(1, v); +} +#define atomic64_dec atomic64_dec +#endif + +#ifndef atomic64_dec_return_relaxed +#ifdef atomic64_dec_return +#define atomic64_dec_return_acquire atomic64_dec_return +#define atomic64_dec_return_release atomic64_dec_return +#define atomic64_dec_return_relaxed atomic64_dec_return +#endif /* atomic64_dec_return */ + +#ifndef atomic64_dec_return +static inline s64 +atomic64_dec_return(atomic64_t *v) +{ + return atomic64_sub_return(1, v); +} +#define atomic64_dec_return atomic64_dec_return +#endif + +#ifndef atomic64_dec_return_acquire +static inline s64 +atomic64_dec_return_acquire(atomic64_t *v) +{ + return atomic64_sub_return_acquire(1, v); +} +#define atomic64_dec_return_acquire atomic64_dec_return_acquire +#endif + +#ifndef atomic64_dec_return_release +static inline s64 +atomic64_dec_return_release(atomic64_t *v) +{ + return atomic64_sub_return_release(1, v); +} +#define atomic64_dec_return_release atomic64_dec_return_release +#endif + +#ifndef atomic64_dec_return_relaxed +static inline s64 +atomic64_dec_return_relaxed(atomic64_t *v) +{ + return atomic64_sub_return_relaxed(1, v); +} +#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed +#endif + +#else /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_dec_return_acquire +static inline s64 +atomic64_dec_return_acquire(atomic64_t *v) +{ + s64 ret = atomic64_dec_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_dec_return_acquire atomic64_dec_return_acquire +#endif + +#ifndef atomic64_dec_return_release +static inline s64 +atomic64_dec_return_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_dec_return_relaxed(v); +} +#define atomic64_dec_return_release atomic64_dec_return_release +#endif + +#ifndef atomic64_dec_return +static inline s64 +atomic64_dec_return(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_dec_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_dec_return atomic64_dec_return +#endif + +#endif /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_fetch_dec_relaxed +#ifdef atomic64_fetch_dec +#define atomic64_fetch_dec_acquire atomic64_fetch_dec +#define atomic64_fetch_dec_release atomic64_fetch_dec +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec +#endif /* atomic64_fetch_dec */ + +#ifndef atomic64_fetch_dec +static inline s64 +atomic64_fetch_dec(atomic64_t *v) +{ + return atomic64_fetch_sub(1, v); +} +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + +#ifndef atomic64_fetch_dec_acquire +static inline s64 +atomic64_fetch_dec_acquire(atomic64_t *v) +{ + return atomic64_fetch_sub_acquire(1, v); +} +#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#endif + +#ifndef atomic64_fetch_dec_release +static inline s64 +atomic64_fetch_dec_release(atomic64_t *v) +{ + return atomic64_fetch_sub_release(1, v); +} +#define atomic64_fetch_dec_release atomic64_fetch_dec_release +#endif + +#ifndef atomic64_fetch_dec_relaxed +static inline s64 +atomic64_fetch_dec_relaxed(atomic64_t *v) +{ + return atomic64_fetch_sub_relaxed(1, v); +} +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed +#endif + +#else /* atomic64_fetch_dec_relaxed */ + +#ifndef atomic64_fetch_dec_acquire +static inline s64 +atomic64_fetch_dec_acquire(atomic64_t *v) +{ + s64 ret = atomic64_fetch_dec_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#endif + +#ifndef atomic64_fetch_dec_release +static inline s64 +atomic64_fetch_dec_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_dec_relaxed(v); +} +#define atomic64_fetch_dec_release atomic64_fetch_dec_release +#endif + +#ifndef atomic64_fetch_dec +static inline s64 +atomic64_fetch_dec(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_dec_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + +#endif /* atomic64_fetch_dec_relaxed */ + +#ifndef atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and +#define atomic64_fetch_and_release atomic64_fetch_and +#define atomic64_fetch_and_relaxed atomic64_fetch_and +#else /* atomic64_fetch_and_relaxed */ + +#ifndef atomic64_fetch_and_acquire +static inline s64 +atomic64_fetch_and_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_and_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#endif + +#ifndef atomic64_fetch_and_release +static inline s64 +atomic64_fetch_and_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_and_relaxed(i, v); +} +#define atomic64_fetch_and_release atomic64_fetch_and_release +#endif + +#ifndef atomic64_fetch_and +static inline s64 +atomic64_fetch_and(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_and_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_and atomic64_fetch_and +#endif + +#endif /* atomic64_fetch_and_relaxed */ + +#ifndef atomic64_andnot +static inline void +atomic64_andnot(s64 i, atomic64_t *v) +{ + atomic64_and(~i, v); +} +#define atomic64_andnot atomic64_andnot +#endif + +#ifndef atomic64_fetch_andnot_relaxed +#ifdef atomic64_fetch_andnot +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot +#define atomic64_fetch_andnot_release atomic64_fetch_andnot +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot +#endif /* atomic64_fetch_andnot */ + +#ifndef atomic64_fetch_andnot +static inline s64 +atomic64_fetch_andnot(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and(~i, v); +} +#define atomic64_fetch_andnot atomic64_fetch_andnot +#endif + +#ifndef atomic64_fetch_andnot_acquire +static inline s64 +atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_acquire(~i, v); +} +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#endif + +#ifndef atomic64_fetch_andnot_release +static inline s64 +atomic64_fetch_andnot_release(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_release(~i, v); +} +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#endif + +#ifndef atomic64_fetch_andnot_relaxed +static inline s64 +atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_relaxed(~i, v); +} +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#endif + +#else /* atomic64_fetch_andnot_relaxed */ + +#ifndef atomic64_fetch_andnot_acquire +static inline s64 +atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_andnot_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#endif + +#ifndef atomic64_fetch_andnot_release +static inline s64 +atomic64_fetch_andnot_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_andnot_relaxed(i, v); +} +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#endif + +#ifndef atomic64_fetch_andnot +static inline s64 +atomic64_fetch_andnot(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_andnot_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_andnot atomic64_fetch_andnot +#endif + +#endif /* atomic64_fetch_andnot_relaxed */ + +#ifndef atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or +#define atomic64_fetch_or_release atomic64_fetch_or +#define atomic64_fetch_or_relaxed atomic64_fetch_or +#else /* atomic64_fetch_or_relaxed */ + +#ifndef atomic64_fetch_or_acquire +static inline s64 +atomic64_fetch_or_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_or_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#endif + +#ifndef atomic64_fetch_or_release +static inline s64 +atomic64_fetch_or_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_or_relaxed(i, v); +} +#define atomic64_fetch_or_release atomic64_fetch_or_release +#endif + +#ifndef atomic64_fetch_or +static inline s64 +atomic64_fetch_or(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_or_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_or atomic64_fetch_or +#endif + +#endif /* atomic64_fetch_or_relaxed */ + +#ifndef atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor +#define atomic64_fetch_xor_release atomic64_fetch_xor +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor +#else /* atomic64_fetch_xor_relaxed */ + +#ifndef atomic64_fetch_xor_acquire +static inline s64 +atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_xor_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#endif + +#ifndef atomic64_fetch_xor_release +static inline s64 +atomic64_fetch_xor_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_xor_relaxed(i, v); +} +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#endif + +#ifndef atomic64_fetch_xor +static inline s64 +atomic64_fetch_xor(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_xor_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_xor atomic64_fetch_xor +#endif + +#endif /* atomic64_fetch_xor_relaxed */ + +#ifndef atomic64_xchg_relaxed +#define atomic64_xchg_acquire atomic64_xchg +#define atomic64_xchg_release atomic64_xchg +#define atomic64_xchg_relaxed atomic64_xchg +#else /* atomic64_xchg_relaxed */ + +#ifndef atomic64_xchg_acquire +static inline s64 +atomic64_xchg_acquire(atomic64_t *v, s64 i) +{ + s64 ret = atomic64_xchg_relaxed(v, i); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_xchg_acquire atomic64_xchg_acquire +#endif + +#ifndef atomic64_xchg_release +static inline s64 +atomic64_xchg_release(atomic64_t *v, s64 i) +{ + __atomic_release_fence(); + return atomic64_xchg_relaxed(v, i); +} +#define atomic64_xchg_release atomic64_xchg_release +#endif + +#ifndef atomic64_xchg +static inline s64 +atomic64_xchg(atomic64_t *v, s64 i) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_xchg_relaxed(v, i); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_xchg atomic64_xchg +#endif + +#endif /* atomic64_xchg_relaxed */ + +#ifndef atomic64_cmpxchg_relaxed +#define atomic64_cmpxchg_acquire atomic64_cmpxchg +#define atomic64_cmpxchg_release atomic64_cmpxchg +#define atomic64_cmpxchg_relaxed atomic64_cmpxchg +#else /* atomic64_cmpxchg_relaxed */ + +#ifndef atomic64_cmpxchg_acquire +static inline s64 +atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) +{ + s64 ret = atomic64_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire +#endif + +#ifndef atomic64_cmpxchg_release +static inline s64 +atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) +{ + __atomic_release_fence(); + return atomic64_cmpxchg_relaxed(v, old, new); +} +#define atomic64_cmpxchg_release atomic64_cmpxchg_release +#endif + +#ifndef atomic64_cmpxchg +static inline s64 +atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_cmpxchg atomic64_cmpxchg +#endif + +#endif /* atomic64_cmpxchg_relaxed */ + +#ifndef atomic64_try_cmpxchg_relaxed +#ifdef atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg +#endif /* atomic64_try_cmpxchg */ + +#ifndef atomic64_try_cmpxchg +static inline bool +atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg atomic64_try_cmpxchg +#endif + +#ifndef atomic64_try_cmpxchg_acquire +static inline bool +atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_acquire(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#endif + +#ifndef atomic64_try_cmpxchg_release +static inline bool +atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_release(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#endif + +#ifndef atomic64_try_cmpxchg_relaxed +static inline bool +atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_relaxed(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed +#endif + +#else /* atomic64_try_cmpxchg_relaxed */ + +#ifndef atomic64_try_cmpxchg_acquire +static inline bool +atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) +{ + bool ret = atomic64_try_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#endif + +#ifndef atomic64_try_cmpxchg_release +static inline bool +atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) +{ + __atomic_release_fence(); + return atomic64_try_cmpxchg_relaxed(v, old, new); +} +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#endif + +#ifndef atomic64_try_cmpxchg +static inline bool +atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + bool ret; + __atomic_pre_full_fence(); + ret = atomic64_try_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_try_cmpxchg atomic64_try_cmpxchg +#endif + +#endif /* atomic64_try_cmpxchg_relaxed */ + +#ifndef atomic64_sub_and_test +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic64_sub_and_test(s64 i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} +#define atomic64_sub_and_test atomic64_sub_and_test +#endif + +#ifndef atomic64_dec_and_test +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer of type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool +atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} +#define atomic64_dec_and_test atomic64_dec_and_test +#endif + +#ifndef atomic64_inc_and_test +/** + * atomic64_inc_and_test - increment and test + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} +#define atomic64_inc_and_test atomic64_inc_and_test +#endif + +#ifndef atomic64_add_negative +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool +atomic64_add_negative(s64 i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} +#define atomic64_add_negative atomic64_add_negative +#endif + +#ifndef atomic64_fetch_add_unless +/** + * atomic64_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns original value of @v + */ +static inline s64 +atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic64_try_cmpxchg(v, &c, c + a)); + + return c; +} +#define atomic64_fetch_add_unless atomic64_fetch_add_unless +#endif + +#ifndef atomic64_add_unless +/** + * atomic64_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +static inline bool +atomic64_add_unless(atomic64_t *v, s64 a, s64 u) +{ + return atomic64_fetch_add_unless(v, a, u) != u; +} +#define atomic64_add_unless atomic64_add_unless +#endif + +#ifndef atomic64_inc_not_zero +/** + * atomic64_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +static inline bool +atomic64_inc_not_zero(atomic64_t *v) +{ + return atomic64_add_unless(v, 1, 0); +} +#define atomic64_inc_not_zero atomic64_inc_not_zero +#endif + +#ifndef atomic64_inc_unless_negative +static inline bool +atomic64_inc_unless_negative(atomic64_t *v) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#define atomic64_inc_unless_negative atomic64_inc_unless_negative +#endif + +#ifndef atomic64_dec_unless_positive +static inline bool +atomic64_dec_unless_positive(atomic64_t *v) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#define atomic64_dec_unless_positive atomic64_dec_unless_positive +#endif + +#ifndef atomic64_dec_if_positive +static inline s64 +atomic64_dec_if_positive(atomic64_t *v) +{ + s64 dec, c = atomic64_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic64_try_cmpxchg(v, &c, dec)); + + return dec; +} +#define atomic64_dec_if_positive atomic64_dec_if_positive +#endif + +#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) + +#endif /* _LINUX_ATOMIC_FALLBACK_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 1e8e88bdaf09..4c0d009a46f0 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -25,14 +25,6 @@ * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions. */ -#ifndef atomic_read_acquire -#define atomic_read_acquire(v) smp_load_acquire(&(v)->counter) -#endif - -#ifndef atomic_set_release -#define atomic_set_release(v, i) smp_store_release(&(v)->counter, (i)) -#endif - /* * The idea here is to build acquire/release variants by adding explicit * barriers on top of the relaxed variant. In the case where the relaxed @@ -79,1238 +71,7 @@ __ret; \ }) -/* atomic_add_return_relaxed */ -#ifndef atomic_add_return_relaxed -#define atomic_add_return_relaxed atomic_add_return -#define atomic_add_return_acquire atomic_add_return -#define atomic_add_return_release atomic_add_return - -#else /* atomic_add_return_relaxed */ - -#ifndef atomic_add_return_acquire -#define atomic_add_return_acquire(...) \ - __atomic_op_acquire(atomic_add_return, __VA_ARGS__) -#endif - -#ifndef atomic_add_return_release -#define atomic_add_return_release(...) \ - __atomic_op_release(atomic_add_return, __VA_ARGS__) -#endif - -#ifndef atomic_add_return -#define atomic_add_return(...) \ - __atomic_op_fence(atomic_add_return, __VA_ARGS__) -#endif -#endif /* atomic_add_return_relaxed */ - -#ifndef atomic_inc -#define atomic_inc(v) atomic_add(1, (v)) -#endif - -/* atomic_inc_return_relaxed */ -#ifndef atomic_inc_return_relaxed - -#ifndef atomic_inc_return -#define atomic_inc_return(v) atomic_add_return(1, (v)) -#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) -#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) -#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) -#else /* atomic_inc_return */ -#define atomic_inc_return_relaxed atomic_inc_return -#define atomic_inc_return_acquire atomic_inc_return -#define atomic_inc_return_release atomic_inc_return -#endif /* atomic_inc_return */ - -#else /* atomic_inc_return_relaxed */ - -#ifndef atomic_inc_return_acquire -#define atomic_inc_return_acquire(...) \ - __atomic_op_acquire(atomic_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic_inc_return_release -#define atomic_inc_return_release(...) \ - __atomic_op_release(atomic_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic_inc_return -#define atomic_inc_return(...) \ - __atomic_op_fence(atomic_inc_return, __VA_ARGS__) -#endif -#endif /* atomic_inc_return_relaxed */ - -/* atomic_sub_return_relaxed */ -#ifndef atomic_sub_return_relaxed -#define atomic_sub_return_relaxed atomic_sub_return -#define atomic_sub_return_acquire atomic_sub_return -#define atomic_sub_return_release atomic_sub_return - -#else /* atomic_sub_return_relaxed */ - -#ifndef atomic_sub_return_acquire -#define atomic_sub_return_acquire(...) \ - __atomic_op_acquire(atomic_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic_sub_return_release -#define atomic_sub_return_release(...) \ - __atomic_op_release(atomic_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic_sub_return -#define atomic_sub_return(...) \ - __atomic_op_fence(atomic_sub_return, __VA_ARGS__) -#endif -#endif /* atomic_sub_return_relaxed */ - -#ifndef atomic_dec -#define atomic_dec(v) atomic_sub(1, (v)) -#endif - -/* atomic_dec_return_relaxed */ -#ifndef atomic_dec_return_relaxed - -#ifndef atomic_dec_return -#define atomic_dec_return(v) atomic_sub_return(1, (v)) -#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) -#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) -#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) -#else /* atomic_dec_return */ -#define atomic_dec_return_relaxed atomic_dec_return -#define atomic_dec_return_acquire atomic_dec_return -#define atomic_dec_return_release atomic_dec_return -#endif /* atomic_dec_return */ - -#else /* atomic_dec_return_relaxed */ - -#ifndef atomic_dec_return_acquire -#define atomic_dec_return_acquire(...) \ - __atomic_op_acquire(atomic_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic_dec_return_release -#define atomic_dec_return_release(...) \ - __atomic_op_release(atomic_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic_dec_return -#define atomic_dec_return(...) \ - __atomic_op_fence(atomic_dec_return, __VA_ARGS__) -#endif -#endif /* atomic_dec_return_relaxed */ - - -/* atomic_fetch_add_relaxed */ -#ifndef atomic_fetch_add_relaxed -#define atomic_fetch_add_relaxed atomic_fetch_add -#define atomic_fetch_add_acquire atomic_fetch_add -#define atomic_fetch_add_release atomic_fetch_add - -#else /* atomic_fetch_add_relaxed */ - -#ifndef atomic_fetch_add_acquire -#define atomic_fetch_add_acquire(...) \ - __atomic_op_acquire(atomic_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_add_release -#define atomic_fetch_add_release(...) \ - __atomic_op_release(atomic_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_add -#define atomic_fetch_add(...) \ - __atomic_op_fence(atomic_fetch_add, __VA_ARGS__) -#endif -#endif /* atomic_fetch_add_relaxed */ - -/* atomic_fetch_inc_relaxed */ -#ifndef atomic_fetch_inc_relaxed - -#ifndef atomic_fetch_inc -#define atomic_fetch_inc(v) atomic_fetch_add(1, (v)) -#define atomic_fetch_inc_relaxed(v) atomic_fetch_add_relaxed(1, (v)) -#define atomic_fetch_inc_acquire(v) atomic_fetch_add_acquire(1, (v)) -#define atomic_fetch_inc_release(v) atomic_fetch_add_release(1, (v)) -#else /* atomic_fetch_inc */ -#define atomic_fetch_inc_relaxed atomic_fetch_inc -#define atomic_fetch_inc_acquire atomic_fetch_inc -#define atomic_fetch_inc_release atomic_fetch_inc -#endif /* atomic_fetch_inc */ - -#else /* atomic_fetch_inc_relaxed */ - -#ifndef atomic_fetch_inc_acquire -#define atomic_fetch_inc_acquire(...) \ - __atomic_op_acquire(atomic_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_inc_release -#define atomic_fetch_inc_release(...) \ - __atomic_op_release(atomic_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_inc -#define atomic_fetch_inc(...) \ - __atomic_op_fence(atomic_fetch_inc, __VA_ARGS__) -#endif -#endif /* atomic_fetch_inc_relaxed */ - -/* atomic_fetch_sub_relaxed */ -#ifndef atomic_fetch_sub_relaxed -#define atomic_fetch_sub_relaxed atomic_fetch_sub -#define atomic_fetch_sub_acquire atomic_fetch_sub -#define atomic_fetch_sub_release atomic_fetch_sub - -#else /* atomic_fetch_sub_relaxed */ - -#ifndef atomic_fetch_sub_acquire -#define atomic_fetch_sub_acquire(...) \ - __atomic_op_acquire(atomic_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_sub_release -#define atomic_fetch_sub_release(...) \ - __atomic_op_release(atomic_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_sub -#define atomic_fetch_sub(...) \ - __atomic_op_fence(atomic_fetch_sub, __VA_ARGS__) -#endif -#endif /* atomic_fetch_sub_relaxed */ - -/* atomic_fetch_dec_relaxed */ -#ifndef atomic_fetch_dec_relaxed - -#ifndef atomic_fetch_dec -#define atomic_fetch_dec(v) atomic_fetch_sub(1, (v)) -#define atomic_fetch_dec_relaxed(v) atomic_fetch_sub_relaxed(1, (v)) -#define atomic_fetch_dec_acquire(v) atomic_fetch_sub_acquire(1, (v)) -#define atomic_fetch_dec_release(v) atomic_fetch_sub_release(1, (v)) -#else /* atomic_fetch_dec */ -#define atomic_fetch_dec_relaxed atomic_fetch_dec -#define atomic_fetch_dec_acquire atomic_fetch_dec -#define atomic_fetch_dec_release atomic_fetch_dec -#endif /* atomic_fetch_dec */ - -#else /* atomic_fetch_dec_relaxed */ - -#ifndef atomic_fetch_dec_acquire -#define atomic_fetch_dec_acquire(...) \ - __atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_dec_release -#define atomic_fetch_dec_release(...) \ - __atomic_op_release(atomic_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_dec -#define atomic_fetch_dec(...) \ - __atomic_op_fence(atomic_fetch_dec, __VA_ARGS__) -#endif -#endif /* atomic_fetch_dec_relaxed */ - -/* atomic_fetch_or_relaxed */ -#ifndef atomic_fetch_or_relaxed -#define atomic_fetch_or_relaxed atomic_fetch_or -#define atomic_fetch_or_acquire atomic_fetch_or -#define atomic_fetch_or_release atomic_fetch_or - -#else /* atomic_fetch_or_relaxed */ - -#ifndef atomic_fetch_or_acquire -#define atomic_fetch_or_acquire(...) \ - __atomic_op_acquire(atomic_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_or_release -#define atomic_fetch_or_release(...) \ - __atomic_op_release(atomic_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_or -#define atomic_fetch_or(...) \ - __atomic_op_fence(atomic_fetch_or, __VA_ARGS__) -#endif -#endif /* atomic_fetch_or_relaxed */ - -/* atomic_fetch_and_relaxed */ -#ifndef atomic_fetch_and_relaxed -#define atomic_fetch_and_relaxed atomic_fetch_and -#define atomic_fetch_and_acquire atomic_fetch_and -#define atomic_fetch_and_release atomic_fetch_and - -#else /* atomic_fetch_and_relaxed */ - -#ifndef atomic_fetch_and_acquire -#define atomic_fetch_and_acquire(...) \ - __atomic_op_acquire(atomic_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_and_release -#define atomic_fetch_and_release(...) \ - __atomic_op_release(atomic_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_and -#define atomic_fetch_and(...) \ - __atomic_op_fence(atomic_fetch_and, __VA_ARGS__) -#endif -#endif /* atomic_fetch_and_relaxed */ - -#ifndef atomic_andnot -#define atomic_andnot(i, v) atomic_and(~(int)(i), (v)) -#endif - -#ifndef atomic_fetch_andnot_relaxed - -#ifndef atomic_fetch_andnot -#define atomic_fetch_andnot(i, v) atomic_fetch_and(~(int)(i), (v)) -#define atomic_fetch_andnot_relaxed(i, v) atomic_fetch_and_relaxed(~(int)(i), (v)) -#define atomic_fetch_andnot_acquire(i, v) atomic_fetch_and_acquire(~(int)(i), (v)) -#define atomic_fetch_andnot_release(i, v) atomic_fetch_and_release(~(int)(i), (v)) -#else /* atomic_fetch_andnot */ -#define atomic_fetch_andnot_relaxed atomic_fetch_andnot -#define atomic_fetch_andnot_acquire atomic_fetch_andnot -#define atomic_fetch_andnot_release atomic_fetch_andnot -#endif /* atomic_fetch_andnot */ - -#else /* atomic_fetch_andnot_relaxed */ - -#ifndef atomic_fetch_andnot_acquire -#define atomic_fetch_andnot_acquire(...) \ - __atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_andnot_release -#define atomic_fetch_andnot_release(...) \ - __atomic_op_release(atomic_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_andnot -#define atomic_fetch_andnot(...) \ - __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__) -#endif -#endif /* atomic_fetch_andnot_relaxed */ - -/* atomic_fetch_xor_relaxed */ -#ifndef atomic_fetch_xor_relaxed -#define atomic_fetch_xor_relaxed atomic_fetch_xor -#define atomic_fetch_xor_acquire atomic_fetch_xor -#define atomic_fetch_xor_release atomic_fetch_xor - -#else /* atomic_fetch_xor_relaxed */ - -#ifndef atomic_fetch_xor_acquire -#define atomic_fetch_xor_acquire(...) \ - __atomic_op_acquire(atomic_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_xor_release -#define atomic_fetch_xor_release(...) \ - __atomic_op_release(atomic_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_xor -#define atomic_fetch_xor(...) \ - __atomic_op_fence(atomic_fetch_xor, __VA_ARGS__) -#endif -#endif /* atomic_fetch_xor_relaxed */ - - -/* atomic_xchg_relaxed */ -#ifndef atomic_xchg_relaxed -#define atomic_xchg_relaxed atomic_xchg -#define atomic_xchg_acquire atomic_xchg -#define atomic_xchg_release atomic_xchg - -#else /* atomic_xchg_relaxed */ - -#ifndef atomic_xchg_acquire -#define atomic_xchg_acquire(...) \ - __atomic_op_acquire(atomic_xchg, __VA_ARGS__) -#endif - -#ifndef atomic_xchg_release -#define atomic_xchg_release(...) \ - __atomic_op_release(atomic_xchg, __VA_ARGS__) -#endif - -#ifndef atomic_xchg -#define atomic_xchg(...) \ - __atomic_op_fence(atomic_xchg, __VA_ARGS__) -#endif -#endif /* atomic_xchg_relaxed */ - -/* atomic_cmpxchg_relaxed */ -#ifndef atomic_cmpxchg_relaxed -#define atomic_cmpxchg_relaxed atomic_cmpxchg -#define atomic_cmpxchg_acquire atomic_cmpxchg -#define atomic_cmpxchg_release atomic_cmpxchg - -#else /* atomic_cmpxchg_relaxed */ - -#ifndef atomic_cmpxchg_acquire -#define atomic_cmpxchg_acquire(...) \ - __atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic_cmpxchg_release -#define atomic_cmpxchg_release(...) \ - __atomic_op_release(atomic_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic_cmpxchg -#define atomic_cmpxchg(...) \ - __atomic_op_fence(atomic_cmpxchg, __VA_ARGS__) -#endif -#endif /* atomic_cmpxchg_relaxed */ - -#ifndef atomic_try_cmpxchg - -#define __atomic_try_cmpxchg(type, _p, _po, _n) \ -({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __r, __o = *__po; \ - __r = atomic_cmpxchg##type((_p), __o, (_n)); \ - if (unlikely(__r != __o)) \ - *__po = __r; \ - likely(__r == __o); \ -}) - -#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) -#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) - -#else /* atomic_try_cmpxchg */ -#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg -#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg -#define atomic_try_cmpxchg_release atomic_try_cmpxchg -#endif /* atomic_try_cmpxchg */ - -/* cmpxchg_relaxed */ -#ifndef cmpxchg_relaxed -#define cmpxchg_relaxed cmpxchg -#define cmpxchg_acquire cmpxchg -#define cmpxchg_release cmpxchg - -#else /* cmpxchg_relaxed */ - -#ifndef cmpxchg_acquire -#define cmpxchg_acquire(...) \ - __atomic_op_acquire(cmpxchg, __VA_ARGS__) -#endif - -#ifndef cmpxchg_release -#define cmpxchg_release(...) \ - __atomic_op_release(cmpxchg, __VA_ARGS__) -#endif - -#ifndef cmpxchg -#define cmpxchg(...) \ - __atomic_op_fence(cmpxchg, __VA_ARGS__) -#endif -#endif /* cmpxchg_relaxed */ - -/* cmpxchg64_relaxed */ -#ifndef cmpxchg64_relaxed -#define cmpxchg64_relaxed cmpxchg64 -#define cmpxchg64_acquire cmpxchg64 -#define cmpxchg64_release cmpxchg64 - -#else /* cmpxchg64_relaxed */ - -#ifndef cmpxchg64_acquire -#define cmpxchg64_acquire(...) \ - __atomic_op_acquire(cmpxchg64, __VA_ARGS__) -#endif - -#ifndef cmpxchg64_release -#define cmpxchg64_release(...) \ - __atomic_op_release(cmpxchg64, __VA_ARGS__) -#endif - -#ifndef cmpxchg64 -#define cmpxchg64(...) \ - __atomic_op_fence(cmpxchg64, __VA_ARGS__) -#endif -#endif /* cmpxchg64_relaxed */ - -/* xchg_relaxed */ -#ifndef xchg_relaxed -#define xchg_relaxed xchg -#define xchg_acquire xchg -#define xchg_release xchg - -#else /* xchg_relaxed */ - -#ifndef xchg_acquire -#define xchg_acquire(...) __atomic_op_acquire(xchg, __VA_ARGS__) -#endif - -#ifndef xchg_release -#define xchg_release(...) __atomic_op_release(xchg, __VA_ARGS__) -#endif - -#ifndef xchg -#define xchg(...) __atomic_op_fence(xchg, __VA_ARGS__) -#endif -#endif /* xchg_relaxed */ - -/** - * atomic_fetch_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns the original value of @v. - */ -#ifndef atomic_fetch_add_unless -static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) -{ - int c = atomic_read(v); - - do { - if (unlikely(c == u)) - break; - } while (!atomic_try_cmpxchg(v, &c, c + a)); - - return c; -} -#endif - -/** - * atomic_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns true if the addition was done. - */ -static inline bool atomic_add_unless(atomic_t *v, int a, int u) -{ - return atomic_fetch_add_unless(v, a, u) != u; -} - -/** - * atomic_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1, if @v is non-zero. - * Returns true if the increment was done. - */ -#ifndef atomic_inc_not_zero -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -#endif - -/** - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic_inc_and_test -static inline bool atomic_inc_and_test(atomic_t *v) -{ - return atomic_inc_return(v) == 0; -} -#endif - -/** - * atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -#ifndef atomic_dec_and_test -static inline bool atomic_dec_and_test(atomic_t *v) -{ - return atomic_dec_return(v) == 0; -} -#endif - -/** - * atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic_sub_and_test -static inline bool atomic_sub_and_test(int i, atomic_t *v) -{ - return atomic_sub_return(i, v) == 0; -} -#endif - -/** - * atomic_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -#ifndef atomic_add_negative -static inline bool atomic_add_negative(int i, atomic_t *v) -{ - return atomic_add_return(i, v) < 0; -} -#endif - -#ifndef atomic_inc_unless_negative -static inline bool atomic_inc_unless_negative(atomic_t *v) -{ - int c = atomic_read(v); - - do { - if (unlikely(c < 0)) - return false; - } while (!atomic_try_cmpxchg(v, &c, c + 1)); - - return true; -} -#endif - -#ifndef atomic_dec_unless_positive -static inline bool atomic_dec_unless_positive(atomic_t *v) -{ - int c = atomic_read(v); - - do { - if (unlikely(c > 0)) - return false; - } while (!atomic_try_cmpxchg(v, &c, c - 1)); - - return true; -} -#endif - -/* - * atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ -#ifndef atomic_dec_if_positive -static inline int atomic_dec_if_positive(atomic_t *v) -{ - int dec, c = atomic_read(v); - - do { - dec = c - 1; - if (unlikely(dec < 0)) - break; - } while (!atomic_try_cmpxchg(v, &c, dec)); - - return dec; -} -#endif - -#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) -#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) - -#ifdef CONFIG_GENERIC_ATOMIC64 -#include -#endif - -#ifndef atomic64_read_acquire -#define atomic64_read_acquire(v) smp_load_acquire(&(v)->counter) -#endif - -#ifndef atomic64_set_release -#define atomic64_set_release(v, i) smp_store_release(&(v)->counter, (i)) -#endif - -/* atomic64_add_return_relaxed */ -#ifndef atomic64_add_return_relaxed -#define atomic64_add_return_relaxed atomic64_add_return -#define atomic64_add_return_acquire atomic64_add_return -#define atomic64_add_return_release atomic64_add_return - -#else /* atomic64_add_return_relaxed */ - -#ifndef atomic64_add_return_acquire -#define atomic64_add_return_acquire(...) \ - __atomic_op_acquire(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return_release -#define atomic64_add_return_release(...) \ - __atomic_op_release(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return -#define atomic64_add_return(...) \ - __atomic_op_fence(atomic64_add_return, __VA_ARGS__) -#endif -#endif /* atomic64_add_return_relaxed */ - -#ifndef atomic64_inc -#define atomic64_inc(v) atomic64_add(1, (v)) -#endif - -/* atomic64_inc_return_relaxed */ -#ifndef atomic64_inc_return_relaxed - -#ifndef atomic64_inc_return -#define atomic64_inc_return(v) atomic64_add_return(1, (v)) -#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) -#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) -#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) -#else /* atomic64_inc_return */ -#define atomic64_inc_return_relaxed atomic64_inc_return -#define atomic64_inc_return_acquire atomic64_inc_return -#define atomic64_inc_return_release atomic64_inc_return -#endif /* atomic64_inc_return */ - -#else /* atomic64_inc_return_relaxed */ - -#ifndef atomic64_inc_return_acquire -#define atomic64_inc_return_acquire(...) \ - __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return_release -#define atomic64_inc_return_release(...) \ - __atomic_op_release(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return -#define atomic64_inc_return(...) \ - __atomic_op_fence(atomic64_inc_return, __VA_ARGS__) -#endif -#endif /* atomic64_inc_return_relaxed */ - - -/* atomic64_sub_return_relaxed */ -#ifndef atomic64_sub_return_relaxed -#define atomic64_sub_return_relaxed atomic64_sub_return -#define atomic64_sub_return_acquire atomic64_sub_return -#define atomic64_sub_return_release atomic64_sub_return - -#else /* atomic64_sub_return_relaxed */ - -#ifndef atomic64_sub_return_acquire -#define atomic64_sub_return_acquire(...) \ - __atomic_op_acquire(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return_release -#define atomic64_sub_return_release(...) \ - __atomic_op_release(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return -#define atomic64_sub_return(...) \ - __atomic_op_fence(atomic64_sub_return, __VA_ARGS__) -#endif -#endif /* atomic64_sub_return_relaxed */ - -#ifndef atomic64_dec -#define atomic64_dec(v) atomic64_sub(1, (v)) -#endif - -/* atomic64_dec_return_relaxed */ -#ifndef atomic64_dec_return_relaxed - -#ifndef atomic64_dec_return -#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) -#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) -#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) -#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) -#else /* atomic64_dec_return */ -#define atomic64_dec_return_relaxed atomic64_dec_return -#define atomic64_dec_return_acquire atomic64_dec_return -#define atomic64_dec_return_release atomic64_dec_return -#endif /* atomic64_dec_return */ - -#else /* atomic64_dec_return_relaxed */ - -#ifndef atomic64_dec_return_acquire -#define atomic64_dec_return_acquire(...) \ - __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return_release -#define atomic64_dec_return_release(...) \ - __atomic_op_release(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return -#define atomic64_dec_return(...) \ - __atomic_op_fence(atomic64_dec_return, __VA_ARGS__) -#endif -#endif /* atomic64_dec_return_relaxed */ - - -/* atomic64_fetch_add_relaxed */ -#ifndef atomic64_fetch_add_relaxed -#define atomic64_fetch_add_relaxed atomic64_fetch_add -#define atomic64_fetch_add_acquire atomic64_fetch_add -#define atomic64_fetch_add_release atomic64_fetch_add - -#else /* atomic64_fetch_add_relaxed */ - -#ifndef atomic64_fetch_add_acquire -#define atomic64_fetch_add_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_add_release -#define atomic64_fetch_add_release(...) \ - __atomic_op_release(atomic64_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_add -#define atomic64_fetch_add(...) \ - __atomic_op_fence(atomic64_fetch_add, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_add_relaxed */ - -/* atomic64_fetch_inc_relaxed */ -#ifndef atomic64_fetch_inc_relaxed - -#ifndef atomic64_fetch_inc -#define atomic64_fetch_inc(v) atomic64_fetch_add(1, (v)) -#define atomic64_fetch_inc_relaxed(v) atomic64_fetch_add_relaxed(1, (v)) -#define atomic64_fetch_inc_acquire(v) atomic64_fetch_add_acquire(1, (v)) -#define atomic64_fetch_inc_release(v) atomic64_fetch_add_release(1, (v)) -#else /* atomic64_fetch_inc */ -#define atomic64_fetch_inc_relaxed atomic64_fetch_inc -#define atomic64_fetch_inc_acquire atomic64_fetch_inc -#define atomic64_fetch_inc_release atomic64_fetch_inc -#endif /* atomic64_fetch_inc */ - -#else /* atomic64_fetch_inc_relaxed */ - -#ifndef atomic64_fetch_inc_acquire -#define atomic64_fetch_inc_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_inc_release -#define atomic64_fetch_inc_release(...) \ - __atomic_op_release(atomic64_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_inc -#define atomic64_fetch_inc(...) \ - __atomic_op_fence(atomic64_fetch_inc, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_inc_relaxed */ - -/* atomic64_fetch_sub_relaxed */ -#ifndef atomic64_fetch_sub_relaxed -#define atomic64_fetch_sub_relaxed atomic64_fetch_sub -#define atomic64_fetch_sub_acquire atomic64_fetch_sub -#define atomic64_fetch_sub_release atomic64_fetch_sub - -#else /* atomic64_fetch_sub_relaxed */ - -#ifndef atomic64_fetch_sub_acquire -#define atomic64_fetch_sub_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_sub_release -#define atomic64_fetch_sub_release(...) \ - __atomic_op_release(atomic64_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_sub -#define atomic64_fetch_sub(...) \ - __atomic_op_fence(atomic64_fetch_sub, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_sub_relaxed */ - -/* atomic64_fetch_dec_relaxed */ -#ifndef atomic64_fetch_dec_relaxed - -#ifndef atomic64_fetch_dec -#define atomic64_fetch_dec(v) atomic64_fetch_sub(1, (v)) -#define atomic64_fetch_dec_relaxed(v) atomic64_fetch_sub_relaxed(1, (v)) -#define atomic64_fetch_dec_acquire(v) atomic64_fetch_sub_acquire(1, (v)) -#define atomic64_fetch_dec_release(v) atomic64_fetch_sub_release(1, (v)) -#else /* atomic64_fetch_dec */ -#define atomic64_fetch_dec_relaxed atomic64_fetch_dec -#define atomic64_fetch_dec_acquire atomic64_fetch_dec -#define atomic64_fetch_dec_release atomic64_fetch_dec -#endif /* atomic64_fetch_dec */ - -#else /* atomic64_fetch_dec_relaxed */ - -#ifndef atomic64_fetch_dec_acquire -#define atomic64_fetch_dec_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_dec_release -#define atomic64_fetch_dec_release(...) \ - __atomic_op_release(atomic64_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_dec -#define atomic64_fetch_dec(...) \ - __atomic_op_fence(atomic64_fetch_dec, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_dec_relaxed */ - -/* atomic64_fetch_or_relaxed */ -#ifndef atomic64_fetch_or_relaxed -#define atomic64_fetch_or_relaxed atomic64_fetch_or -#define atomic64_fetch_or_acquire atomic64_fetch_or -#define atomic64_fetch_or_release atomic64_fetch_or - -#else /* atomic64_fetch_or_relaxed */ - -#ifndef atomic64_fetch_or_acquire -#define atomic64_fetch_or_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_or_release -#define atomic64_fetch_or_release(...) \ - __atomic_op_release(atomic64_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_or -#define atomic64_fetch_or(...) \ - __atomic_op_fence(atomic64_fetch_or, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_or_relaxed */ - -/* atomic64_fetch_and_relaxed */ -#ifndef atomic64_fetch_and_relaxed -#define atomic64_fetch_and_relaxed atomic64_fetch_and -#define atomic64_fetch_and_acquire atomic64_fetch_and -#define atomic64_fetch_and_release atomic64_fetch_and - -#else /* atomic64_fetch_and_relaxed */ - -#ifndef atomic64_fetch_and_acquire -#define atomic64_fetch_and_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_and_release -#define atomic64_fetch_and_release(...) \ - __atomic_op_release(atomic64_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_and -#define atomic64_fetch_and(...) \ - __atomic_op_fence(atomic64_fetch_and, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_and_relaxed */ - -#ifndef atomic64_andnot -#define atomic64_andnot(i, v) atomic64_and(~(long long)(i), (v)) -#endif - -#ifndef atomic64_fetch_andnot_relaxed - -#ifndef atomic64_fetch_andnot -#define atomic64_fetch_andnot(i, v) atomic64_fetch_and(~(long long)(i), (v)) -#define atomic64_fetch_andnot_relaxed(i, v) atomic64_fetch_and_relaxed(~(long long)(i), (v)) -#define atomic64_fetch_andnot_acquire(i, v) atomic64_fetch_and_acquire(~(long long)(i), (v)) -#define atomic64_fetch_andnot_release(i, v) atomic64_fetch_and_release(~(long long)(i), (v)) -#else /* atomic64_fetch_andnot */ -#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot -#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot -#define atomic64_fetch_andnot_release atomic64_fetch_andnot -#endif /* atomic64_fetch_andnot */ - -#else /* atomic64_fetch_andnot_relaxed */ - -#ifndef atomic64_fetch_andnot_acquire -#define atomic64_fetch_andnot_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_andnot_release -#define atomic64_fetch_andnot_release(...) \ - __atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_andnot -#define atomic64_fetch_andnot(...) \ - __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_andnot_relaxed */ - -/* atomic64_fetch_xor_relaxed */ -#ifndef atomic64_fetch_xor_relaxed -#define atomic64_fetch_xor_relaxed atomic64_fetch_xor -#define atomic64_fetch_xor_acquire atomic64_fetch_xor -#define atomic64_fetch_xor_release atomic64_fetch_xor - -#else /* atomic64_fetch_xor_relaxed */ - -#ifndef atomic64_fetch_xor_acquire -#define atomic64_fetch_xor_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_xor_release -#define atomic64_fetch_xor_release(...) \ - __atomic_op_release(atomic64_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_xor -#define atomic64_fetch_xor(...) \ - __atomic_op_fence(atomic64_fetch_xor, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_xor_relaxed */ - - -/* atomic64_xchg_relaxed */ -#ifndef atomic64_xchg_relaxed -#define atomic64_xchg_relaxed atomic64_xchg -#define atomic64_xchg_acquire atomic64_xchg -#define atomic64_xchg_release atomic64_xchg - -#else /* atomic64_xchg_relaxed */ - -#ifndef atomic64_xchg_acquire -#define atomic64_xchg_acquire(...) \ - __atomic_op_acquire(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg_release -#define atomic64_xchg_release(...) \ - __atomic_op_release(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg -#define atomic64_xchg(...) \ - __atomic_op_fence(atomic64_xchg, __VA_ARGS__) -#endif -#endif /* atomic64_xchg_relaxed */ - -/* atomic64_cmpxchg_relaxed */ -#ifndef atomic64_cmpxchg_relaxed -#define atomic64_cmpxchg_relaxed atomic64_cmpxchg -#define atomic64_cmpxchg_acquire atomic64_cmpxchg -#define atomic64_cmpxchg_release atomic64_cmpxchg - -#else /* atomic64_cmpxchg_relaxed */ - -#ifndef atomic64_cmpxchg_acquire -#define atomic64_cmpxchg_acquire(...) \ - __atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg_release -#define atomic64_cmpxchg_release(...) \ - __atomic_op_release(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg -#define atomic64_cmpxchg(...) \ - __atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__) -#endif -#endif /* atomic64_cmpxchg_relaxed */ - -#ifndef atomic64_try_cmpxchg - -#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ -({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __r, __o = *__po; \ - __r = atomic64_cmpxchg##type((_p), __o, (_n)); \ - if (unlikely(__r != __o)) \ - *__po = __r; \ - likely(__r == __o); \ -}) - -#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) -#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) - -#else /* atomic64_try_cmpxchg */ -#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg -#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg -#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg -#endif /* atomic64_try_cmpxchg */ - -/** - * atomic64_fetch_add_unless - add unless the number is already a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns the original value of @v. - */ -#ifndef atomic64_fetch_add_unless -static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, - long long u) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c == u)) - break; - } while (!atomic64_try_cmpxchg(v, &c, c + a)); - - return c; -} -#endif - -/** - * atomic64_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns true if the addition was done. - */ -static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) -{ - return atomic64_fetch_add_unless(v, a, u) != u; -} - -/** - * atomic64_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic64_t - * - * Atomically increments @v by 1, if @v is non-zero. - * Returns true if the increment was done. - */ -#ifndef atomic64_inc_not_zero -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -#endif - -/** - * atomic64_inc_and_test - increment and test - * @v: pointer of type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic64_inc_and_test -static inline bool atomic64_inc_and_test(atomic64_t *v) -{ - return atomic64_inc_return(v) == 0; -} -#endif - -/** - * atomic64_dec_and_test - decrement and test - * @v: pointer of type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -#ifndef atomic64_dec_and_test -static inline bool atomic64_dec_and_test(atomic64_t *v) -{ - return atomic64_dec_return(v) == 0; -} -#endif - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic64_sub_and_test -static inline bool atomic64_sub_and_test(long long i, atomic64_t *v) -{ - return atomic64_sub_return(i, v) == 0; -} -#endif - -/** - * atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -#ifndef atomic64_add_negative -static inline bool atomic64_add_negative(long long i, atomic64_t *v) -{ - return atomic64_add_return(i, v) < 0; -} -#endif - -#ifndef atomic64_inc_unless_negative -static inline bool atomic64_inc_unless_negative(atomic64_t *v) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c < 0)) - return false; - } while (!atomic64_try_cmpxchg(v, &c, c + 1)); - - return true; -} -#endif - -#ifndef atomic64_dec_unless_positive -static inline bool atomic64_dec_unless_positive(atomic64_t *v) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c > 0)) - return false; - } while (!atomic64_try_cmpxchg(v, &c, c - 1)); - - return true; -} -#endif - -/* - * atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - * - * The function returns the old value of *v minus 1, even if - * the atomic64 variable, v, was not decremented. - */ -#ifndef atomic64_dec_if_positive -static inline long long atomic64_dec_if_positive(atomic64_t *v) -{ - long long dec, c = atomic64_read(v); - - do { - dec = c - 1; - if (unlikely(dec < 0)) - break; - } while (!atomic64_try_cmpxchg(v, &c, dec)); - - return dec; -} -#endif - -#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) -#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#include #include -- cgit From b312d8ca3a7cebe19941d969a51f2b7f899b81e2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 14 Nov 2018 16:11:06 +0100 Subject: dma-buf: make fence sequence numbers 64 bit v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a lot of use cases we need 64bit sequence numbers. Currently drivers overload the dma_fence structure to store the additional bits. Stop doing that and make the sequence number in the dma_fence always 64bit. For compatibility with hardware which can do only 32bit sequences the comparisons in __dma_fence_is_later only takes the lower 32bits as significant when the upper 32bits are all zero. v2: change the logic in __dma_fence_is_later Signed-off-by: Christian König Reviewed-by: Chunming Zhou Link: https://patchwork.freedesktop.org/patch/266927/ --- drivers/dma-buf/dma-fence.c | 2 +- drivers/dma-buf/sw_sync.c | 2 +- drivers/dma-buf/sync_file.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +- drivers/gpu/drm/i915/i915_sw_fence.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 4 ++-- include/linux/dma-fence.h | 22 +++++++++++++++------- 8 files changed, 24 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 136ec04d683f..3aa8733f832a 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -649,7 +649,7 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout); */ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, - spinlock_t *lock, u64 context, unsigned seqno) + spinlock_t *lock, u64 context, u64 seqno) { BUG_ON(!lock); BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 53c1d6d36a64..32dcf7b4c935 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -172,7 +172,7 @@ static bool timeline_fence_enable_signaling(struct dma_fence *fence) static void timeline_fence_value_str(struct dma_fence *fence, char *str, int size) { - snprintf(str, size, "%d", fence->seqno); + snprintf(str, size, "%lld", fence->seqno); } static void timeline_fence_timeline_value_str(struct dma_fence *fence, diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 35dd06479867..4f6305ca52c8 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -144,7 +144,7 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) } else { struct dma_fence *fence = sync_file->fence; - snprintf(buf, len, "%s-%s%llu-%d", + snprintf(buf, len, "%s-%s%llu-%lld", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), fence->context, @@ -258,7 +258,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, i_b++; } else { - if (pt_a->seqno - pt_b->seqno <= INT_MAX) + if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno)) add_fence(fences, &i, pt_a); else add_fence(fences, &i, pt_b); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 12f2bf97611f..bfaf5c6323be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -388,7 +388,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, soffset, eoffset, eoffset - soffset); if (i->fence) - seq_printf(m, " protected by 0x%08x on context %llu", + seq_printf(m, " protected by 0x%016llx on context %llu", i->fence->seqno, i->fence->context); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 6dbeed079ae5..11bcdabd5177 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -393,7 +393,7 @@ static void timer_i915_sw_fence_wake(struct timer_list *t) if (!fence) return; - pr_notice("Asynchronous wait on fence %s:%s:%x timed out (hint:%pS)\n", + pr_notice("Asynchronous wait on fence %s:%s:%llx timed out (hint:%pS)\n", cb->dma->ops->get_driver_name(cb->dma), cb->dma->ops->get_timeline_name(cb->dma), cb->dma->seqno, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 759c0fd58f8c..dfafa79171df 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1239,7 +1239,7 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", + drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index c1c420afe2dd..eb17c0cd3727 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -53,13 +53,13 @@ static void vgem_fence_release(struct dma_fence *base) static void vgem_fence_value_str(struct dma_fence *fence, char *str, int size) { - snprintf(str, size, "%u", fence->seqno); + snprintf(str, size, "%llu", fence->seqno); } static void vgem_fence_timeline_value_str(struct dma_fence *fence, char *str, int size) { - snprintf(str, size, "%u", + snprintf(str, size, "%llu", dma_fence_is_signaled(fence) ? fence->seqno : 0); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 999e4b104410..6b788467b2e3 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -77,7 +77,7 @@ struct dma_fence { struct list_head cb_list; spinlock_t *lock; u64 context; - unsigned seqno; + u64 seqno; unsigned long flags; ktime_t timestamp; int error; @@ -244,7 +244,7 @@ struct dma_fence_ops { }; void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, - spinlock_t *lock, u64 context, unsigned seqno); + spinlock_t *lock, u64 context, u64 seqno); void dma_fence_release(struct kref *kref); void dma_fence_free(struct dma_fence *fence); @@ -414,9 +414,17 @@ dma_fence_is_signaled(struct dma_fence *fence) * Returns true if f1 is chronologically later than f2. Both fences must be * from the same context, since a seqno is not common across contexts. */ -static inline bool __dma_fence_is_later(u32 f1, u32 f2) +static inline bool __dma_fence_is_later(u64 f1, u64 f2) { - return (int)(f1 - f2) > 0; + /* This is for backward compatibility with drivers which can only handle + * 32bit sequence numbers. Use a 64bit compare when any of the higher + * bits are none zero, otherwise use a 32bit compare with wrap around + * handling. + */ + if (upper_32_bits(f1) || upper_32_bits(f2)) + return f1 > f2; + + return (int)(lower_32_bits(f1) - lower_32_bits(f2)) > 0; } /** @@ -548,21 +556,21 @@ u64 dma_fence_context_alloc(unsigned num); do { \ struct dma_fence *__ff = (f); \ if (IS_ENABLED(CONFIG_DMA_FENCE_TRACE)) \ - pr_info("f %llu#%u: " fmt, \ + pr_info("f %llu#%llu: " fmt, \ __ff->context, __ff->seqno, ##args); \ } while (0) #define DMA_FENCE_WARN(f, fmt, args...) \ do { \ struct dma_fence *__ff = (f); \ - pr_warn("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ + pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ ##args); \ } while (0) #define DMA_FENCE_ERR(f, fmt, args...) \ do { \ struct dma_fence *__ff = (f); \ - pr_err("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ + pr_err("f %llu#%llu: " fmt, __ff->context, __ff->seqno, \ ##args); \ } while (0) -- cgit From ebc40be2b8eec093abbbd87658a6726ff84a61f5 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Wed, 7 Nov 2018 11:18:34 +0100 Subject: remoteproc: fix kernel-doc comment for parse_fw Fix the kernel-doc comment for "parse_fw" and fix a typo. Signed-off-by: Fabien Dessenne Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 507a2b524208..68e72f33c705 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -345,9 +345,9 @@ struct firmware; * @stop: power off the device * @kick: kick a virtqueue (virtqueue id given as a parameter) * @da_to_va: optional platform hook to perform address translations - * @load_rsc_table: load resource table from firmware image + * @parse_fw: parse firmware to extract information (e.g. resource table) * @find_loaded_rsc_table: find the loaded resouce table - * @load: load firmeware to memory, where the remote processor + * @load: load firmware to memory, where the remote processor * expects to find it * @sanity_check: sanity check the fw image * @get_boot_addr: get boot address to entry point specified in firmware -- cgit From d7dba6be0f31ae61f5f3296aa130f45d57d30f74 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:36 +0200 Subject: dmaengine: dw: Remove misleading is_private property The commit a9ddb575d6d6 ("dmaengine: dw_dmac: Enhance device tree support") introduces is_private property in uncertain understanding what does it mean. First of all, documentation defines DMA_PRIVATE capability as Documentation/crypto/async-tx-api.txt: The DMA_PRIVATE capability flag is used to tag dma devices that should not be used by the general-purpose allocator. It can be set at initialization time if it is known that a channel will always be private. Alternatively, it is set when dma_request_channel() finds an unused "public" channel. A couple caveats to note when implementing a driver and consumer: 1/ Once a channel has been privately allocated it will no longer be considered by the general-purpose allocator even after a call to dma_release_channel(). 2/ Since capabilities are specified at the device level a dma_device with multiple channels will either have all channels public, or all channels private. Documentation/driver-api/dmaengine/provider.rst: - DMA_PRIVATE The devices only supports slave transfers, and as such isn't available for async transfers. The capability had been introduced by the commit 59b5ec21446b ("dmaengine: introduce dma_request_channel and private channels") and some code didn't changed from that times ever. Taking into consideration above and the fact that on all known platforms Synopsys DesignWare DMA engine is attached to serve slave transfers, the DMA_PRIVATE capability must be enabled for this device unconditionally. Otherwise, as rightfully noticed in drivers/dma/at_xdmac.c: /* * Without DMA_PRIVATE the driver is not able to allocate more than * one channel, second allocation fails in private_candidate. */ because of of a caveats mentioned in above documentation excerpts. So, remove conditional around DMA_PRIVATE followed by removal leftovers. If someone wonders, DMA_PRIVATE can be not used if and only if the all channels of the DMA controller are supposed to serve memory-to-memory like operations. For example, EP93xx has two controllers, one of which can only perform memory-to-memory transfers Note, this change doesn't affect dmatest to be able to test such controllers. Cc: Greg Kroah-Hartman (maintainer:SERIAL DRIVERS) Cc: Dan Williams Signed-off-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/snps-dma.txt | 2 -- drivers/dma/dw/core.c | 4 +--- drivers/dma/dw/pci.c | 1 - drivers/dma/dw/platform.c | 3 --- drivers/tty/serial/8250/8250_lpss.c | 1 - include/linux/platform_data/dma-dw.h | 3 --- 6 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt index db757df7057d..0bedceed1963 100644 --- a/Documentation/devicetree/bindings/dma/snps-dma.txt +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt @@ -23,8 +23,6 @@ Deprecated properties: Optional properties: -- is_private: The device channels should be marked as private and not for by the - general purpose DMA channel allocator. False if not passed. - multi-block: Multi block transfers supported by hardware. Array property with one cell per channel. 0: not supported, 1 (default): supported. - snps,dma-protection-control: AHB HPROT[3:1] protection setting. diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index dc053e62f894..e25503986680 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1227,7 +1227,6 @@ int dw_dma_probe(struct dw_dma_chip *chip) pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); /* Fill platform data with the default values */ - pdata->is_private = true; pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; @@ -1340,8 +1339,7 @@ int dw_dma_probe(struct dw_dma_chip *chip) /* Set capabilities */ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); - if (pdata->is_private) - dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); if (pdata->is_memcpy) dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 313ba10c6224..570498faadc3 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -17,7 +17,6 @@ static struct dw_dma_platform_data mrfld_pdata = { .nr_channels = 8, - .is_private = true, .is_memcpy = true, .is_idma32 = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 31ff8113c3de..6dd8cd1820c1 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -128,9 +128,6 @@ dw_dma_parse_dt(struct platform_device *pdev) pdata->nr_masters = nr_masters; pdata->nr_channels = nr_channels; - if (of_property_read_bool(np, "is_private")) - pdata->is_private = true; - /* * All known devices, which use DT for configuration, support * memory-to-memory transfers. So enable it by default. diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index 98dbc796353f..53ca9ba6ab4b 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -153,7 +153,6 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) #ifdef CONFIG_SERIAL_8250_DMA static const struct dw_dma_platform_data qrk_serial_dma_pdata = { .nr_channels = 2, - .is_private = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, .chan_priority = CHAN_PRIORITY_ASCENDING, .block_size = 4095, diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 1a1d58ebffbf..d443025c5c72 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -38,8 +38,6 @@ struct dw_dma_slave { /** * struct dw_dma_platform_data - Controller configuration parameters * @nr_channels: Number of channels supported by hardware (max 8) - * @is_private: The device channels should be marked as private and not for - * by the general purpose DMA channel allocator. * @is_memcpy: The device channels do support memory-to-memory transfers. * @is_idma32: The type of the DMA controller is iDMA32 * @chan_allocation_order: Allocate channels starting from 0 or 7 @@ -53,7 +51,6 @@ struct dw_dma_slave { */ struct dw_dma_platform_data { unsigned int nr_channels; - bool is_private; bool is_memcpy; bool is_idma32; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ -- cgit From 078165779608873e7b6eae1316a39c73af9f3edc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:37 +0200 Subject: dmaengine: dw: Remove unused internal property All known devices, which use DT for configuration, support memory-to-memory transfers. So enable it by default. The rest two cases, i.e. Intel Quark and PPC460ex, instantiate DMA driver and use its channels exclusively for hardware, which means there is no available channel for any other purposes anyway. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 4 +--- drivers/dma/dw/pci.c | 1 - drivers/dma/dw/platform.c | 6 ------ include/linux/platform_data/dma-dw.h | 2 -- 4 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index e25503986680..4982e443869c 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1227,7 +1227,6 @@ int dw_dma_probe(struct dw_dma_chip *chip) pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); /* Fill platform data with the default values */ - pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { @@ -1340,8 +1339,7 @@ int dw_dma_probe(struct dw_dma_chip *chip) /* Set capabilities */ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); - if (pdata->is_memcpy) - dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); dw->dma.dev = chip->dev; dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 570498faadc3..66d98d7ccad0 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -17,7 +17,6 @@ static struct dw_dma_platform_data mrfld_pdata = { .nr_channels = 8, - .is_memcpy = true, .is_idma32 = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, .chan_priority = CHAN_PRIORITY_ASCENDING, diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 6dd8cd1820c1..58fc1ba02a1e 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -128,12 +128,6 @@ dw_dma_parse_dt(struct platform_device *pdev) pdata->nr_masters = nr_masters; pdata->nr_channels = nr_channels; - /* - * All known devices, which use DT for configuration, support - * memory-to-memory transfers. So enable it by default. - */ - pdata->is_memcpy = true; - if (!of_property_read_u32(np, "chan_allocation_order", &tmp)) pdata->chan_allocation_order = (unsigned char)tmp; diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index d443025c5c72..1c85eeee4171 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -38,7 +38,6 @@ struct dw_dma_slave { /** * struct dw_dma_platform_data - Controller configuration parameters * @nr_channels: Number of channels supported by hardware (max 8) - * @is_memcpy: The device channels do support memory-to-memory transfers. * @is_idma32: The type of the DMA controller is iDMA32 * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. @@ -51,7 +50,6 @@ struct dw_dma_slave { */ struct dw_dma_platform_data { unsigned int nr_channels; - bool is_memcpy; bool is_idma32; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ -- cgit From 69da8be90d5e85e60b5377c47384154b9dabf592 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:38 +0200 Subject: dmaengine: dw: Split DW and iDMA 32-bit operations Here is a kinda big refactoring that should have been done in the first place, when Intel iDMA 32-bit support appeared. It splits operations which are different to Synopsys DesignWare and Intel iDMA 32-bit controllers. No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/Makefile | 2 +- drivers/dma/dw/core.c | 190 +++++++---------------------------- drivers/dma/dw/dw.c | 112 +++++++++++++++++++++ drivers/dma/dw/idma32.c | 138 +++++++++++++++++++++++++ drivers/dma/dw/internal.h | 10 +- drivers/dma/dw/pci.c | 45 ++++++--- drivers/dma/dw/platform.c | 8 +- drivers/dma/dw/regs.h | 13 +++ include/linux/dma/dw.h | 4 + include/linux/platform_data/dma-dw.h | 2 - 10 files changed, 343 insertions(+), 181 deletions(-) create mode 100644 drivers/dma/dw/dw.c create mode 100644 drivers/dma/dw/idma32.c (limited to 'include/linux') diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile index 2b949c2e4504..63ed895c09aa 100644 --- a/drivers/dma/dw/Makefile +++ b/drivers/dma/dw/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DW_DMAC_CORE) += dw_dmac_core.o -dw_dmac_core-objs := core.o +dw_dmac_core-objs := core.o dw.o idma32.o obj-$(CONFIG_DW_DMAC) += dw_dmac.o dw_dmac-objs := platform.o diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 4982e443869c..8a581d86ea8d 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -138,44 +138,6 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) dwc->descs_allocated--; } -static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc) -{ - u32 cfghi = 0; - u32 cfglo = 0; - - /* Set default burst alignment */ - cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN; - - /* Low 4 bits of the request lines */ - cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf); - cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf); - - /* Request line extension (2 bits) */ - cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3); - cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3); - - channel_writel(dwc, CFG_LO, cfglo); - channel_writel(dwc, CFG_HI, cfghi); -} - -static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc) -{ - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - u32 cfghi = DWC_CFGH_FIFO_MODE; - u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); - bool hs_polarity = dwc->dws.hs_polarity; - - cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); - cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); - cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); - - /* Set polarity of handshake interface */ - cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; - - channel_writel(dwc, CFG_LO, cfglo); - channel_writel(dwc, CFG_HI, cfghi); -} - static void dwc_initialize(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); @@ -183,10 +145,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc) if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags)) return; - if (dw->pdata->is_idma32) - dwc_initialize_chan_idma32(dwc); - else - dwc_initialize_chan_dw(dwc); + dw->initialize_chan(dwc); /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); @@ -215,37 +174,6 @@ static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc) cpu_relax(); } -static u32 bytes2block(struct dw_dma_chan *dwc, size_t bytes, - unsigned int width, size_t *len) -{ - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - u32 block; - - /* Always in bytes for iDMA 32-bit */ - if (dw->pdata->is_idma32) - width = 0; - - if ((bytes >> width) > dwc->block_size) { - block = dwc->block_size; - *len = block << width; - } else { - block = bytes >> width; - *len = bytes; - } - - return block; -} - -static size_t block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) -{ - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - - if (dw->pdata->is_idma32) - return IDMA32C_CTLH_BLOCK_TS(block); - - return DWC_CTLH_BLOCK_TS(block) << width; -} - /*----------------------------------------------------------------------*/ /* Perform single block transfer */ @@ -391,10 +319,11 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) /* Returns how many bytes were already received from source */ static inline u32 dwc_get_sent(struct dw_dma_chan *dwc) { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); u32 ctlhi = channel_readl(dwc, CTL_HI); u32 ctllo = channel_readl(dwc, CTL_LO); - return block2bytes(dwc, ctlhi, ctllo >> 4 & 7); + return dw->block2bytes(dwc, ctlhi, ctllo >> 4 & 7); } static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) @@ -651,7 +580,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, unsigned int src_width; unsigned int dst_width; unsigned int data_width = dw->pdata->data_width[m_master]; - u32 ctllo; + u32 ctllo, ctlhi; u8 lms = DWC_LLP_LMS(m_master); dev_vdbg(chan2dev(chan), @@ -680,10 +609,12 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (!desc) goto err_desc_get; + ctlhi = dw->bytes2block(dwc, len - offset, src_width, &xfer_count); + lli_write(desc, sar, src + offset); lli_write(desc, dar, dest + offset); lli_write(desc, ctllo, ctllo); - lli_write(desc, ctlhi, bytes2block(dwc, len - offset, src_width, &xfer_count)); + lli_write(desc, ctlhi, ctlhi); desc->len = xfer_count; if (!first) { @@ -721,7 +652,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct dma_slave_config *sconfig = &dwc->dma_sconfig; struct dw_desc *prev; struct dw_desc *first; - u32 ctllo; + u32 ctllo, ctlhi; u8 m_master = dwc->dws.m_master; u8 lms = DWC_LLP_LMS(m_master); dma_addr_t reg; @@ -768,9 +699,11 @@ slave_sg_todev_fill_desc: if (!desc) goto err_desc_get; + ctlhi = dw->bytes2block(dwc, len, mem_width, &dlen); + lli_write(desc, sar, mem); lli_write(desc, dar, reg); - lli_write(desc, ctlhi, bytes2block(dwc, len, mem_width, &dlen)); + lli_write(desc, ctlhi, ctlhi); lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); desc->len = dlen; @@ -814,9 +747,11 @@ slave_sg_fromdev_fill_desc: if (!desc) goto err_desc_get; + ctlhi = dw->bytes2block(dwc, len, reg_width, &dlen); + lli_write(desc, sar, reg); lli_write(desc, dar, mem); - lli_write(desc, ctlhi, bytes2block(dwc, len, reg_width, &dlen)); + lli_write(desc, ctlhi, ctlhi); mem_width = __ffs(data_width | mem | dlen); lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); desc->len = dlen; @@ -876,22 +811,12 @@ EXPORT_SYMBOL_GPL(dw_dma_filter); static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dma_slave_config *sc = &dwc->dma_sconfig; struct dw_dma *dw = to_dw_dma(chan->device); - /* - * Fix sconfig's burst size according to dw_dmac. We need to convert - * them as: - * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. - * - * NOTE: burst size 2 is not supported by DesignWare controller. - * iDMA 32-bit supports it. - */ - u32 s = dw->pdata->is_idma32 ? 1 : 2; memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); - sc->src_maxburst = sc->src_maxburst > 1 ? fls(sc->src_maxburst) - s : 0; - sc->dst_maxburst = sc->dst_maxburst > 1 ? fls(sc->dst_maxburst) - s : 0; + dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst); + dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst); return 0; } @@ -900,16 +825,9 @@ static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); unsigned int count = 20; /* timeout iterations */ - u32 cfglo; - cfglo = channel_readl(dwc, CFG_LO); - if (dw->pdata->is_idma32) { - if (drain) - cfglo |= IDMA32C_CFGL_CH_DRAIN; - else - cfglo &= ~IDMA32C_CFGL_CH_DRAIN; - } - channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); + dw->suspend_chan(dwc, drain); + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) udelay(2); @@ -1058,33 +976,7 @@ static void dwc_issue_pending(struct dma_chan *chan) /*----------------------------------------------------------------------*/ -/* - * Program FIFO size of channels. - * - * By default full FIFO (512 bytes) is assigned to channel 0. Here we - * slice FIFO on equal parts between channels. - */ -static void idma32_fifo_partition(struct dw_dma *dw) -{ - u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) | - IDMA32C_FP_UPDATE; - u64 fifo_partition = 0; - - if (!dw->pdata->is_idma32) - return; - - /* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */ - fifo_partition |= value << 0; - - /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */ - fifo_partition |= value << 32; - - /* Program FIFO Partition registers - 64 bytes per channel */ - idma32_writeq(dw, FIFO_PARTITION1, fifo_partition); - idma32_writeq(dw, FIFO_PARTITION0, fifo_partition); -} - -static void dw_dma_off(struct dw_dma *dw) +void do_dw_dma_off(struct dw_dma *dw) { unsigned int i; @@ -1103,7 +995,7 @@ static void dw_dma_off(struct dw_dma *dw) clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags); } -static void dw_dma_on(struct dw_dma *dw) +void do_dw_dma_on(struct dw_dma *dw) { dma_writel(dw, CFG, DW_CFG_DMA_EN); } @@ -1139,7 +1031,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) /* Enable controller here if needed */ if (!dw->in_use) - dw_dma_on(dw); + do_dw_dma_on(dw); dw->in_use |= dwc->mask; return 0; @@ -1177,30 +1069,25 @@ static void dwc_free_chan_resources(struct dma_chan *chan) /* Disable controller in case it was a last user */ dw->in_use &= ~dwc->mask; if (!dw->in_use) - dw_dma_off(dw); + do_dw_dma_off(dw); dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } -int dw_dma_probe(struct dw_dma_chip *chip) +int do_dma_probe(struct dw_dma_chip *chip) { + struct dw_dma *dw = chip->dw; struct dw_dma_platform_data *pdata; - struct dw_dma *dw; bool autocfg = false; unsigned int dw_params; unsigned int i; int err; - dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); - if (!dw) - return -ENOMEM; - dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); if (!dw->pdata) return -ENOMEM; dw->regs = chip->regs; - chip->dw = dw; pm_runtime_get_sync(chip->dev); @@ -1250,15 +1137,10 @@ int dw_dma_probe(struct dw_dma_chip *chip) dw->all_chan_mask = (1 << pdata->nr_channels) - 1; /* Force dma off, just in case */ - dw_dma_off(dw); - - idma32_fifo_partition(dw); + dw->disable(dw); /* Device and instance ID for IRQ and DMA pool */ - if (pdata->is_idma32) - snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", chip->id); - else - snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", chip->id); + dw->set_device_name(dw, chip->id); /* Create a pool of consistent memory blocks for hardware descriptors */ dw->desc_pool = dmam_pool_create(dw->name, chip->dev, @@ -1380,16 +1262,15 @@ err_pdata: pm_runtime_put_sync_suspend(chip->dev); return err; } -EXPORT_SYMBOL_GPL(dw_dma_probe); -int dw_dma_remove(struct dw_dma_chip *chip) +int do_dma_remove(struct dw_dma_chip *chip) { struct dw_dma *dw = chip->dw; struct dw_dma_chan *dwc, *_dwc; pm_runtime_get_sync(chip->dev); - dw_dma_off(dw); + do_dw_dma_off(dw); dma_async_device_unregister(&dw->dma); free_irq(chip->irq, dw); @@ -1404,27 +1285,24 @@ int dw_dma_remove(struct dw_dma_chip *chip) pm_runtime_put_sync_suspend(chip->dev); return 0; } -EXPORT_SYMBOL_GPL(dw_dma_remove); -int dw_dma_disable(struct dw_dma_chip *chip) +int do_dw_dma_disable(struct dw_dma_chip *chip) { struct dw_dma *dw = chip->dw; - dw_dma_off(dw); + dw->disable(dw); return 0; } -EXPORT_SYMBOL_GPL(dw_dma_disable); +EXPORT_SYMBOL_GPL(do_dw_dma_disable); -int dw_dma_enable(struct dw_dma_chip *chip) +int do_dw_dma_enable(struct dw_dma_chip *chip) { struct dw_dma *dw = chip->dw; - idma32_fifo_partition(dw); - - dw_dma_on(dw); + dw->enable(dw); return 0; } -EXPORT_SYMBOL_GPL(dw_dma_enable); +EXPORT_SYMBOL_GPL(do_dw_dma_enable); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver"); diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c new file mode 100644 index 000000000000..977aa28bf81d --- /dev/null +++ b/drivers/dma/dw/dw.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2007-2008 Atmel Corporation +// Copyright (C) 2010-2011 ST Microelectronics +// Copyright (C) 2013,2018 Intel Corporation + +#include +#include +#include +#include + +#include "internal.h" + +static void dw_dma_initialize_chan(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u32 cfghi = DWC_CFGH_FIFO_MODE; + u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); + bool hs_polarity = dwc->dws.hs_polarity; + + cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); + cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); + + /* Set polarity of handshake interface */ + cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); +} + +static void dw_dma_suspend_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); +} + +static u32 dw_dma_bytes2block(struct dw_dma_chan *dwc, + size_t bytes, unsigned int width, size_t *len) +{ + u32 block; + + if ((bytes >> width) > dwc->block_size) { + block = dwc->block_size; + *len = dwc->block_size << width; + } else { + block = bytes >> width; + *len = bytes; + } + + return block; +} + +static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) +{ + return DWC_CTLH_BLOCK_TS(block) << width; +} + +static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + /* + * Fix burst size according to dw_dmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. + */ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0; +} + +static void dw_dma_set_device_name(struct dw_dma *dw, int id) +{ + snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", id); +} + +static void dw_dma_disable(struct dw_dma *dw) +{ + do_dw_dma_off(dw); +} + +static void dw_dma_enable(struct dw_dma *dw) +{ + do_dw_dma_on(dw); +} + +int dw_dma_probe(struct dw_dma_chip *chip) +{ + struct dw_dma *dw; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + /* Channel operations */ + dw->initialize_chan = dw_dma_initialize_chan; + dw->suspend_chan = dw_dma_suspend_chan; + dw->encode_maxburst = dw_dma_encode_maxburst; + dw->bytes2block = dw_dma_bytes2block; + dw->block2bytes = dw_dma_block2bytes; + + /* Device operations */ + dw->set_device_name = dw_dma_set_device_name; + dw->disable = dw_dma_disable; + dw->enable = dw_dma_enable; + + chip->dw = dw; + return do_dma_probe(chip); +} +EXPORT_SYMBOL_GPL(dw_dma_probe); + +int dw_dma_remove(struct dw_dma_chip *chip) +{ + return do_dma_remove(chip); +} +EXPORT_SYMBOL_GPL(dw_dma_remove); diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c new file mode 100644 index 000000000000..8707830f39ad --- /dev/null +++ b/drivers/dma/dw/idma32.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2013,2018 Intel Corporation + +#include +#include +#include +#include + +#include "internal.h" + +static void idma32_initialize_chan(struct dw_dma_chan *dwc) +{ + u32 cfghi = 0; + u32 cfglo = 0; + + /* Set default burst alignment */ + cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN; + + /* Low 4 bits of the request lines */ + cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf); + cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf); + + /* Request line extension (2 bits) */ + cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3); + cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3); + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); +} + +static void idma32_suspend_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + if (drain) + cfglo |= IDMA32C_CFGL_CH_DRAIN; + else + cfglo &= ~IDMA32C_CFGL_CH_DRAIN; + + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); +} + +static u32 idma32_bytes2block(struct dw_dma_chan *dwc, + size_t bytes, unsigned int width, size_t *len) +{ + u32 block; + + if (bytes > dwc->block_size) { + block = dwc->block_size; + *len = dwc->block_size; + } else { + block = bytes; + *len = bytes; + } + + return block; +} + +static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) +{ + return IDMA32C_CTLH_BLOCK_TS(block); +} + +static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0; +} + +static void idma32_set_device_name(struct dw_dma *dw, int id) +{ + snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", id); +} + +/* + * Program FIFO size of channels. + * + * By default full FIFO (512 bytes) is assigned to channel 0. Here we + * slice FIFO on equal parts between channels. + */ +static void idma32_fifo_partition(struct dw_dma *dw) +{ + u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) | + IDMA32C_FP_UPDATE; + u64 fifo_partition = 0; + + /* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */ + fifo_partition |= value << 0; + + /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */ + fifo_partition |= value << 32; + + /* Program FIFO Partition registers - 64 bytes per channel */ + idma32_writeq(dw, FIFO_PARTITION1, fifo_partition); + idma32_writeq(dw, FIFO_PARTITION0, fifo_partition); +} + +static void idma32_disable(struct dw_dma *dw) +{ + do_dw_dma_off(dw); + idma32_fifo_partition(dw); +} + +static void idma32_enable(struct dw_dma *dw) +{ + idma32_fifo_partition(dw); + do_dw_dma_on(dw); +} + +int idma32_dma_probe(struct dw_dma_chip *chip) +{ + struct dw_dma *dw; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + /* Channel operations */ + dw->initialize_chan = idma32_initialize_chan; + dw->suspend_chan = idma32_suspend_chan; + dw->encode_maxburst = idma32_encode_maxburst; + dw->bytes2block = idma32_bytes2block; + dw->block2bytes = idma32_block2bytes; + + /* Device operations */ + dw->set_device_name = idma32_set_device_name; + dw->disable = idma32_disable; + dw->enable = idma32_enable; + + chip->dw = dw; + return do_dma_probe(chip); +} +EXPORT_SYMBOL_GPL(idma32_dma_probe); + +int idma32_dma_remove(struct dw_dma_chip *chip) +{ + return do_dma_remove(chip); +} +EXPORT_SYMBOL_GPL(idma32_dma_remove); diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h index 41439732ff6b..fdcac21ea665 100644 --- a/drivers/dma/dw/internal.h +++ b/drivers/dma/dw/internal.h @@ -15,8 +15,14 @@ #include "regs.h" -int dw_dma_disable(struct dw_dma_chip *chip); -int dw_dma_enable(struct dw_dma_chip *chip); +int do_dma_probe(struct dw_dma_chip *chip); +int do_dma_remove(struct dw_dma_chip *chip); + +void do_dw_dma_on(struct dw_dma *dw); +void do_dw_dma_off(struct dw_dma *dw); + +int do_dw_dma_disable(struct dw_dma_chip *chip); +int do_dw_dma_enable(struct dw_dma_chip *chip); extern bool dw_dma_filter(struct dma_chan *chan, void *param); diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 66d98d7ccad0..e9ba25b4f950 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -15,9 +15,17 @@ #include "internal.h" -static struct dw_dma_platform_data mrfld_pdata = { +struct dw_dma_pci_data { + const struct dw_dma_platform_data *pdata; + int (*probe)(struct dw_dma_chip *chip); +}; + +static const struct dw_dma_pci_data dw_pci_data = { + .probe = dw_dma_probe, +}; + +static const struct dw_dma_platform_data idma32_pdata = { .nr_channels = 8, - .is_idma32 = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, .chan_priority = CHAN_PRIORITY_ASCENDING, .block_size = 131071, @@ -26,9 +34,14 @@ static struct dw_dma_platform_data mrfld_pdata = { .multi_block = {1, 1, 1, 1, 1, 1, 1, 1}, }; +static const struct dw_dma_pci_data idma32_pci_data = { + .pdata = &idma32_pdata, + .probe = idma32_dma_probe, +}; + static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) { - const struct dw_dma_platform_data *pdata = (void *)pid->driver_data; + const struct dw_dma_pci_data *data = (void *)pid->driver_data; struct dw_dma_chip *chip; int ret; @@ -61,9 +74,9 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) chip->id = pdev->devfn; chip->regs = pcim_iomap_table(pdev)[0]; chip->irq = pdev->irq; - chip->pdata = pdata; + chip->pdata = data->pdata; - ret = dw_dma_probe(chip); + ret = data->probe(chip); if (ret) return ret; @@ -89,7 +102,7 @@ static int dw_pci_suspend_late(struct device *dev) struct pci_dev *pci = to_pci_dev(dev); struct dw_dma_chip *chip = pci_get_drvdata(pci); - return dw_dma_disable(chip); + return do_dw_dma_disable(chip); }; static int dw_pci_resume_early(struct device *dev) @@ -97,7 +110,7 @@ static int dw_pci_resume_early(struct device *dev) struct pci_dev *pci = to_pci_dev(dev); struct dw_dma_chip *chip = pci_get_drvdata(pci); - return dw_dma_enable(chip); + return do_dw_dma_enable(chip); }; #endif /* CONFIG_PM_SLEEP */ @@ -108,24 +121,24 @@ static const struct dev_pm_ops dw_pci_dev_pm_ops = { static const struct pci_device_id dw_pci_id_table[] = { /* Medfield (GPDMA) */ - { PCI_VDEVICE(INTEL, 0x0827) }, + { PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_data }, /* BayTrail */ - { PCI_VDEVICE(INTEL, 0x0f06) }, - { PCI_VDEVICE(INTEL, 0x0f40) }, + { PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_pci_data }, + { PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_pci_data }, - /* Merrifield iDMA 32-bit (GPDMA) */ - { PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&mrfld_pdata }, + /* Merrifield */ + { PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&idma32_pci_data }, /* Braswell */ - { PCI_VDEVICE(INTEL, 0x2286) }, - { PCI_VDEVICE(INTEL, 0x22c0) }, + { PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_pci_data }, + { PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_pci_data }, /* Haswell */ - { PCI_VDEVICE(INTEL, 0x9c60) }, + { PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_pci_data }, /* Broadwell */ - { PCI_VDEVICE(INTEL, 0x9ce0) }, + { PCI_VDEVICE(INTEL, 0x9ce0), (kernel_ulong_t)&dw_pci_data }, { } }; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 58fc1ba02a1e..d5196c97e4f4 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -255,7 +255,7 @@ static void dw_shutdown(struct platform_device *pdev) struct dw_dma_chip *chip = platform_get_drvdata(pdev); /* - * We have to call dw_dma_disable() to stop any ongoing transfer. On + * We have to call do_dw_dma_disable() to stop any ongoing transfer. On * some platforms we can't do that since DMA device is powered off. * Moreover we have no possibility to check if the platform is affected * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put() @@ -264,7 +264,7 @@ static void dw_shutdown(struct platform_device *pdev) * used by the driver. */ pm_runtime_get_sync(chip->dev); - dw_dma_disable(chip); + do_dw_dma_disable(chip); pm_runtime_put_sync_suspend(chip->dev); clk_disable_unprepare(chip->clk); @@ -294,7 +294,7 @@ static int dw_suspend_late(struct device *dev) { struct dw_dma_chip *chip = dev_get_drvdata(dev); - dw_dma_disable(chip); + do_dw_dma_disable(chip); clk_disable_unprepare(chip->clk); return 0; @@ -309,7 +309,7 @@ static int dw_resume_early(struct device *dev) if (ret) return ret; - return dw_dma_enable(chip); + return do_dw_dma_enable(chip); } #endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 646c9c960c07..66aa8b227248 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -312,6 +312,19 @@ struct dw_dma { u8 all_chan_mask; u8 in_use; + /* Channel operations */ + void (*initialize_chan)(struct dw_dma_chan *dwc); + void (*suspend_chan)(struct dw_dma_chan *dwc, bool drain); + void (*encode_maxburst)(struct dw_dma_chan *dwc, u32 *maxburst); + u32 (*bytes2block)(struct dw_dma_chan *dwc, size_t bytes, + unsigned int width, size_t *len); + size_t (*block2bytes)(struct dw_dma_chan *dwc, u32 block, u32 width); + + /* Device operations */ + void (*set_device_name)(struct dw_dma *dw, int id); + void (*disable)(struct dw_dma *dw); + void (*enable)(struct dw_dma *dw); + /* platform data */ struct dw_dma_platform_data *pdata; }; diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index e166cac8e870..d643d331c20e 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -45,9 +45,13 @@ struct dw_dma_chip { #if IS_ENABLED(CONFIG_DW_DMAC_CORE) int dw_dma_probe(struct dw_dma_chip *chip); int dw_dma_remove(struct dw_dma_chip *chip); +int idma32_dma_probe(struct dw_dma_chip *chip); +int idma32_dma_remove(struct dw_dma_chip *chip); #else static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; } static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; } +static inline int idma32_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; } +static inline int idma32_dma_remove(struct dw_dma_chip *chip) { return 0; } #endif /* CONFIG_DW_DMAC_CORE */ #endif /* _DMA_DW_H */ diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 1c85eeee4171..576048433809 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -38,7 +38,6 @@ struct dw_dma_slave { /** * struct dw_dma_platform_data - Controller configuration parameters * @nr_channels: Number of channels supported by hardware (max 8) - * @is_idma32: The type of the DMA controller is iDMA32 * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. * @block_size: Maximum block size supported by the controller @@ -50,7 +49,6 @@ struct dw_dma_slave { */ struct dw_dma_platform_data { unsigned int nr_channels; - bool is_idma32; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ unsigned char chan_allocation_order; -- cgit From b466a37fbcc99ef79ea59e40ef6aa8391430b0d8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:41 +0200 Subject: dmaengine: dw: convert to SPDX identifiers This patch updates license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/Kconfig | 2 ++ drivers/dma/dw/core.c | 5 +---- drivers/dma/dw/internal.h | 5 +---- drivers/dma/dw/pci.c | 5 +---- drivers/dma/dw/platform.c | 5 +---- drivers/dma/dw/regs.h | 5 +---- include/linux/dma/dw.h | 5 +---- include/linux/platform_data/dma-dw.h | 5 +---- 8 files changed, 9 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index 04b9728c1d26..e5162690de8f 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + # # DMA engine configuration for dw # diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index b7e4dab28f8a..3a1ab52ffae0 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Core driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007-2008 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics * Copyright (C) 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h index fdcac21ea665..1dd7a4e6dd23 100644 --- a/drivers/dma/dw/internal.h +++ b/drivers/dma/dw/internal.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _DMA_DW_INTERNAL_H diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index e9ba25b4f950..e79a75db0852 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * PCI driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2013 Intel Corporation * Author: Andy Shevchenko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index d5196c97e4f4..382dfd9e9600 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Platform driver for the Synopsys DesignWare DMA Controller * @@ -6,10 +7,6 @@ * Copyright (C) 2013 Intel Corporation * * Some parts of this driver are derived from the original dw_dmac. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 07f91325e559..3fce66ecee7a 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare AHB DMA Controller * * Copyright (C) 2005-2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics * Copyright (C) 2016 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index d643d331c20e..9752f3745f76 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics * Copyright (C) 2014 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _DMA_DW_H #define _DMA_DW_H diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 576048433809..f3eaf9ec00a1 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _PLATFORM_DATA_DMA_DW_H #define _PLATFORM_DATA_DMA_DW_H -- cgit From d0dcde6426ce071ad447fb9d91c85ab649026114 Mon Sep 17 00:00:00 2001 From: Otto Sabart Date: Sun, 6 Jan 2019 00:29:15 +0100 Subject: doc: networking: convert offload files into RST and update references This patch renames offload files. This is necessary for Sphinx. Also update reference to checksum-offloads.rst file. Whole kernel code was grepped for references using: $ grep -r "\(segmentation\|checksum\)-offloads.txt" . There should be no other references to {segmentation,checksum}-offloads.txt files. Signed-off-by: Otto Sabart Acked-by: David S. Miller Signed-off-by: Jonathan Corbet --- Documentation/networking/checksum-offloads.rst | 143 ++++++++++++++++ Documentation/networking/checksum-offloads.txt | 143 ---------------- Documentation/networking/segmentation-offloads.rst | 184 +++++++++++++++++++++ Documentation/networking/segmentation-offloads.txt | 184 --------------------- include/linux/skbuff.h | 2 +- 5 files changed, 328 insertions(+), 328 deletions(-) create mode 100644 Documentation/networking/checksum-offloads.rst delete mode 100644 Documentation/networking/checksum-offloads.txt create mode 100644 Documentation/networking/segmentation-offloads.rst delete mode 100644 Documentation/networking/segmentation-offloads.txt (limited to 'include/linux') diff --git a/Documentation/networking/checksum-offloads.rst b/Documentation/networking/checksum-offloads.rst new file mode 100644 index 000000000000..1a1cd94a3f6d --- /dev/null +++ b/Documentation/networking/checksum-offloads.rst @@ -0,0 +1,143 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================== +Checksum Offloads in the Linux Networking Stack +=============================================== + + +Introduction +============ + +This document describes a set of techniques in the Linux networking stack to +take advantage of checksum offload capabilities of various NICs. + +The following technologies are described: + +* TX Checksum Offload +* LCO: Local Checksum Offload +* RCO: Remote Checksum Offload + +Things that should be documented here but aren't yet: + +* RX Checksum Offload +* CHECKSUM_UNNECESSARY conversion + + +TX Checksum Offload +=================== + +The interface for offloading a transmit checksum to a device is explained in +detail in comments near the top of include/linux/skbuff.h. + +In brief, it allows to request the device fill in a single ones-complement +checksum defined by the sk_buff fields skb->csum_start and skb->csum_offset. +The device should compute the 16-bit ones-complement checksum (i.e. the +'IP-style' checksum) from csum_start to the end of the packet, and fill in the +result at (csum_start + csum_offset). + +Because csum_offset cannot be negative, this ensures that the previous value of +the checksum field is included in the checksum computation, thus it can be used +to supply any needed corrections to the checksum (such as the sum of the +pseudo-header for UDP or TCP). + +This interface only allows a single checksum to be offloaded. Where +encapsulation is used, the packet may have multiple checksum fields in +different header layers, and the rest will have to be handled by another +mechanism such as LCO or RCO. + +CRC32c can also be offloaded using this interface, by means of filling +skb->csum_start and skb->csum_offset as described above, and setting +skb->csum_not_inet: see skbuff.h comment (section 'D') for more details. + +No offloading of the IP header checksum is performed; it is always done in +software. This is OK because when we build the IP header, we obviously have it +in cache, so summing it isn't expensive. It's also rather short. + +The requirements for GSO are more complicated, because when segmenting an +encapsulated packet both the inner and outer checksums may need to be edited or +recomputed for each resulting segment. See the skbuff.h comment (section 'E') +for more details. + +A driver declares its offload capabilities in netdev->hw_features; see +Documentation/networking/netdev-features.txt for more. Note that a device +which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and +csum_offset given in the SKB; if it tries to deduce these itself in hardware +(as some NICs do) the driver should check that the values in the SKB match +those which the hardware will deduce, and if not, fall back to checksumming in +software instead (with skb_csum_hwoffload_help() or one of the +skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in +include/linux/skbuff.h). + +The stack should, for the most part, assume that checksum offload is supported +by the underlying device. The only place that should check is +validate_xmit_skb(), and the functions it calls directly or indirectly. That +function compares the offload features requested by the SKB (which may include +other offloads besides TX Checksum Offload) and, if they are not supported or +enabled on the device (determined by netdev->features), performs the +corresponding offload in software. In the case of TX Checksum Offload, that +means calling skb_csum_hwoffload_help(skb, features). + + +LCO: Local Checksum Offload +=========================== + +LCO is a technique for efficiently computing the outer checksum of an +encapsulated datagram when the inner checksum is due to be offloaded. + +The ones-complement sum of a correctly checksummed TCP or UDP packet is equal +to the complement of the sum of the pseudo header, because everything else gets +'cancelled out' by the checksum field. This is because the sum was +complemented before being written to the checksum field. + +More generally, this holds in any case where the 'IP-style' ones complement +checksum is used, and thus any checksum that TX Checksum Offload supports. + +That is, if we have set up TX Checksum Offload with a start/offset pair, we +know that after the device has filled in that checksum, the ones complement sum +from csum_start to the end of the packet will be equal to the complement of +whatever value we put in the checksum field beforehand. This allows us to +compute the outer checksum without looking at the payload: we simply stop +summing when we get to csum_start, then add the complement of the 16-bit word +at (csum_start + csum_offset). + +Then, when the true inner checksum is filled in (either by hardware or by +skb_checksum_help()), the outer checksum will become correct by virtue of the +arithmetic. + +LCO is performed by the stack when constructing an outer UDP header for an +encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for the +IPv6 equivalents, in udp6_set_csum(). + +It is also performed when constructing an IPv4 GRE header, in +net/ipv4/ip_gre.c:build_header(). It is *not* currently performed when +constructing an IPv6 GRE header; the GRE checksum is computed over the whole +packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be possible to use +LCO here as IPv6 GRE still uses an IP-style checksum. + +All of the LCO implementations use a helper function lco_csum(), in +include/linux/skbuff.h. + +LCO can safely be used for nested encapsulations; in this case, the outer +encapsulation layer will sum over both its own header and the 'middle' header. +This does mean that the 'middle' header will get summed multiple times, but +there doesn't seem to be a way to avoid that without incurring bigger costs +(e.g. in SKB bloat). + + +RCO: Remote Checksum Offload +============================ + +RCO is a technique for eliding the inner checksum of an encapsulated datagram, +allowing the outer checksum to be offloaded. It does, however, involve a +change to the encapsulation protocols, which the receiver must also support. +For this reason, it is disabled by default. + +RCO is detailed in the following Internet-Drafts: + +* https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00 +* https://tools.ietf.org/html/draft-herbert-vxlan-rco-00 + +In Linux, RCO is implemented individually in each encapsulation protocol, and +most tunnel types have flags controlling its use. For instance, VXLAN has the +flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that RCO should be +used when transmitting to a given remote destination. diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt deleted file mode 100644 index 1a1cd94a3f6d..000000000000 --- a/Documentation/networking/checksum-offloads.txt +++ /dev/null @@ -1,143 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=============================================== -Checksum Offloads in the Linux Networking Stack -=============================================== - - -Introduction -============ - -This document describes a set of techniques in the Linux networking stack to -take advantage of checksum offload capabilities of various NICs. - -The following technologies are described: - -* TX Checksum Offload -* LCO: Local Checksum Offload -* RCO: Remote Checksum Offload - -Things that should be documented here but aren't yet: - -* RX Checksum Offload -* CHECKSUM_UNNECESSARY conversion - - -TX Checksum Offload -=================== - -The interface for offloading a transmit checksum to a device is explained in -detail in comments near the top of include/linux/skbuff.h. - -In brief, it allows to request the device fill in a single ones-complement -checksum defined by the sk_buff fields skb->csum_start and skb->csum_offset. -The device should compute the 16-bit ones-complement checksum (i.e. the -'IP-style' checksum) from csum_start to the end of the packet, and fill in the -result at (csum_start + csum_offset). - -Because csum_offset cannot be negative, this ensures that the previous value of -the checksum field is included in the checksum computation, thus it can be used -to supply any needed corrections to the checksum (such as the sum of the -pseudo-header for UDP or TCP). - -This interface only allows a single checksum to be offloaded. Where -encapsulation is used, the packet may have multiple checksum fields in -different header layers, and the rest will have to be handled by another -mechanism such as LCO or RCO. - -CRC32c can also be offloaded using this interface, by means of filling -skb->csum_start and skb->csum_offset as described above, and setting -skb->csum_not_inet: see skbuff.h comment (section 'D') for more details. - -No offloading of the IP header checksum is performed; it is always done in -software. This is OK because when we build the IP header, we obviously have it -in cache, so summing it isn't expensive. It's also rather short. - -The requirements for GSO are more complicated, because when segmenting an -encapsulated packet both the inner and outer checksums may need to be edited or -recomputed for each resulting segment. See the skbuff.h comment (section 'E') -for more details. - -A driver declares its offload capabilities in netdev->hw_features; see -Documentation/networking/netdev-features.txt for more. Note that a device -which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and -csum_offset given in the SKB; if it tries to deduce these itself in hardware -(as some NICs do) the driver should check that the values in the SKB match -those which the hardware will deduce, and if not, fall back to checksumming in -software instead (with skb_csum_hwoffload_help() or one of the -skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in -include/linux/skbuff.h). - -The stack should, for the most part, assume that checksum offload is supported -by the underlying device. The only place that should check is -validate_xmit_skb(), and the functions it calls directly or indirectly. That -function compares the offload features requested by the SKB (which may include -other offloads besides TX Checksum Offload) and, if they are not supported or -enabled on the device (determined by netdev->features), performs the -corresponding offload in software. In the case of TX Checksum Offload, that -means calling skb_csum_hwoffload_help(skb, features). - - -LCO: Local Checksum Offload -=========================== - -LCO is a technique for efficiently computing the outer checksum of an -encapsulated datagram when the inner checksum is due to be offloaded. - -The ones-complement sum of a correctly checksummed TCP or UDP packet is equal -to the complement of the sum of the pseudo header, because everything else gets -'cancelled out' by the checksum field. This is because the sum was -complemented before being written to the checksum field. - -More generally, this holds in any case where the 'IP-style' ones complement -checksum is used, and thus any checksum that TX Checksum Offload supports. - -That is, if we have set up TX Checksum Offload with a start/offset pair, we -know that after the device has filled in that checksum, the ones complement sum -from csum_start to the end of the packet will be equal to the complement of -whatever value we put in the checksum field beforehand. This allows us to -compute the outer checksum without looking at the payload: we simply stop -summing when we get to csum_start, then add the complement of the 16-bit word -at (csum_start + csum_offset). - -Then, when the true inner checksum is filled in (either by hardware or by -skb_checksum_help()), the outer checksum will become correct by virtue of the -arithmetic. - -LCO is performed by the stack when constructing an outer UDP header for an -encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for the -IPv6 equivalents, in udp6_set_csum(). - -It is also performed when constructing an IPv4 GRE header, in -net/ipv4/ip_gre.c:build_header(). It is *not* currently performed when -constructing an IPv6 GRE header; the GRE checksum is computed over the whole -packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be possible to use -LCO here as IPv6 GRE still uses an IP-style checksum. - -All of the LCO implementations use a helper function lco_csum(), in -include/linux/skbuff.h. - -LCO can safely be used for nested encapsulations; in this case, the outer -encapsulation layer will sum over both its own header and the 'middle' header. -This does mean that the 'middle' header will get summed multiple times, but -there doesn't seem to be a way to avoid that without incurring bigger costs -(e.g. in SKB bloat). - - -RCO: Remote Checksum Offload -============================ - -RCO is a technique for eliding the inner checksum of an encapsulated datagram, -allowing the outer checksum to be offloaded. It does, however, involve a -change to the encapsulation protocols, which the receiver must also support. -For this reason, it is disabled by default. - -RCO is detailed in the following Internet-Drafts: - -* https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00 -* https://tools.ietf.org/html/draft-herbert-vxlan-rco-00 - -In Linux, RCO is implemented individually in each encapsulation protocol, and -most tunnel types have flags controlling its use. For instance, VXLAN has the -flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that RCO should be -used when transmitting to a given remote destination. diff --git a/Documentation/networking/segmentation-offloads.rst b/Documentation/networking/segmentation-offloads.rst new file mode 100644 index 000000000000..1794bfe98196 --- /dev/null +++ b/Documentation/networking/segmentation-offloads.rst @@ -0,0 +1,184 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================================== +Segmentation Offloads in the Linux Networking Stack +=================================================== + + +Introduction +============ + +This document describes a set of techniques in the Linux networking stack +to take advantage of segmentation offload capabilities of various NICs. + +The following technologies are described: + * TCP Segmentation Offload - TSO + * UDP Fragmentation Offload - UFO + * IPIP, SIT, GRE, and UDP Tunnel Offloads + * Generic Segmentation Offload - GSO + * Generic Receive Offload - GRO + * Partial Generic Segmentation Offload - GSO_PARTIAL + * SCTP accelleration with GSO - GSO_BY_FRAGS + + +TCP Segmentation Offload +======================== + +TCP segmentation allows a device to segment a single frame into multiple +frames with a data payload size specified in skb_shinfo()->gso_size. +When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or +SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and +skb_shinfo()->gso_size should be set to a non-zero value. + +TCP segmentation is dependent on support for the use of partial checksum +offload. For this reason TSO is normally disabled if the Tx checksum +offload for a given device is disabled. + +In order to support TCP segmentation offload it is necessary to populate +the network and transport header offsets of the skbuff so that the device +drivers will be able determine the offsets of the IP or IPv6 header and the +TCP header. In addition as CHECKSUM_PARTIAL is required csum_start should +also point to the TCP header of the packet. + +For IPv4 segmentation we support one of two types in terms of the IP ID. +The default behavior is to increment the IP ID with every segment. If the +GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP +ID and all segments will use the same IP ID. If a device has +NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO +and we will either increment the IP ID for all frames, or leave it at a +static value based on driver preference. + + +UDP Fragmentation Offload +========================= + +UDP fragmentation offload allows a device to fragment an oversized UDP +datagram into multiple IPv4 fragments. Many of the requirements for UDP +fragmentation offload are the same as TSO. However the IPv4 ID for +fragments should not increment as a single IPv4 datagram is fragmented. + +UFO is deprecated: modern kernels will no longer generate UFO skbs, but can +still receive them from tuntap and similar devices. Offload of UDP-based +tunnel protocols is still supported. + + +IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads +======================================================== + +In addition to the offloads described above it is possible for a frame to +contain additional headers such as an outer tunnel. In order to account +for such instances an additional set of segmentation offload types were +introduced including SKB_GSO_IPXIP4, SKB_GSO_IPXIP6, SKB_GSO_GRE, and +SKB_GSO_UDP_TUNNEL. These extra segmentation types are used to identify +cases where there are more than just 1 set of headers. For example in the +case of IPIP and SIT we should have the network and transport headers moved +from the standard list of headers to "inner" header offsets. + +Currently only two levels of headers are supported. The convention is to +refer to the tunnel headers as the outer headers, while the encapsulated +data is normally referred to as the inner headers. Below is the list of +calls to access the given headers: + +IPIP/SIT Tunnel:: + + Outer Inner + MAC skb_mac_header + Network skb_network_header skb_inner_network_header + Transport skb_transport_header + +UDP/GRE Tunnel:: + + Outer Inner + MAC skb_mac_header skb_inner_mac_header + Network skb_network_header skb_inner_network_header + Transport skb_transport_header skb_inner_transport_header + +In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and +SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the +fact that the outer header also requests to have a non-zero checksum +included in the outer header. + +Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel +header has requested a remote checksum offload. In this case the inner +headers will be left with a partial checksum and only the outer header +checksum will be computed. + + +Generic Segmentation Offload +============================ + +Generic segmentation offload is a pure software offload that is meant to +deal with cases where device drivers cannot perform the offloads described +above. What occurs in GSO is that a given skbuff will have its data broken +out over multiple skbuffs that have been resized to match the MSS provided +via skb_shinfo()->gso_size. + +Before enabling any hardware segmentation offload a corresponding software +offload is required in GSO. Otherwise it becomes possible for a frame to +be re-routed between devices and end up being unable to be transmitted. + + +Generic Receive Offload +======================= + +Generic receive offload is the complement to GSO. Ideally any frame +assembled by GRO should be segmented to create an identical sequence of +frames using GSO, and any sequence of frames segmented by GSO should be +able to be reassembled back to the original by GRO. The only exception to +this is IPv4 ID in the case that the DF bit is set for a given IP header. +If the value of the IPv4 ID is not sequentially incrementing it will be +altered so that it is when a frame assembled via GRO is segmented via GSO. + + +Partial Generic Segmentation Offload +==================================== + +Partial generic segmentation offload is a hybrid between TSO and GSO. What +it effectively does is take advantage of certain traits of TCP and tunnels +so that instead of having to rewrite the packet headers for each segment +only the inner-most transport header and possibly the outer-most network +header need to be updated. This allows devices that do not support tunnel +offloads or tunnel offloads with checksum to still make use of segmentation. + +With the partial offload what occurs is that all headers excluding the +inner transport header are updated such that they will contain the correct +values for if the header was simply duplicated. The one exception to this +is the outer IPv4 ID field. It is up to the device drivers to guarantee +that the IPv4 ID field is incremented in the case that a given header does +not have the DF bit set. + + +SCTP accelleration with GSO +=========================== + +SCTP - despite the lack of hardware support - can still take advantage of +GSO to pass one large packet through the network stack, rather than +multiple small packets. + +This requires a different approach to other offloads, as SCTP packets +cannot be just segmented to (P)MTU. Rather, the chunks must be contained in +IP segments, padding respected. So unlike regular GSO, SCTP can't just +generate a big skb, set gso_size to the fragmentation point and deliver it +to IP layer. + +Instead, the SCTP protocol layer builds an skb with the segments correctly +padded and stored as chained skbs, and skb_segment() splits based on those. +To signal this, gso_size is set to the special value GSO_BY_FRAGS. + +Therefore, any code in the core networking stack must be aware of the +possibility that gso_size will be GSO_BY_FRAGS and handle that case +appropriately. + +There are some helpers to make this easier: + +- skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if + an skb is an SCTP GSO skb. + +- For size checks, the skb_gso_validate_*_len family of helpers correctly + considers GSO_BY_FRAGS. + +- For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size + will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs. + +This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits +set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE. diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt deleted file mode 100644 index 1794bfe98196..000000000000 --- a/Documentation/networking/segmentation-offloads.txt +++ /dev/null @@ -1,184 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=================================================== -Segmentation Offloads in the Linux Networking Stack -=================================================== - - -Introduction -============ - -This document describes a set of techniques in the Linux networking stack -to take advantage of segmentation offload capabilities of various NICs. - -The following technologies are described: - * TCP Segmentation Offload - TSO - * UDP Fragmentation Offload - UFO - * IPIP, SIT, GRE, and UDP Tunnel Offloads - * Generic Segmentation Offload - GSO - * Generic Receive Offload - GRO - * Partial Generic Segmentation Offload - GSO_PARTIAL - * SCTP accelleration with GSO - GSO_BY_FRAGS - - -TCP Segmentation Offload -======================== - -TCP segmentation allows a device to segment a single frame into multiple -frames with a data payload size specified in skb_shinfo()->gso_size. -When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or -SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and -skb_shinfo()->gso_size should be set to a non-zero value. - -TCP segmentation is dependent on support for the use of partial checksum -offload. For this reason TSO is normally disabled if the Tx checksum -offload for a given device is disabled. - -In order to support TCP segmentation offload it is necessary to populate -the network and transport header offsets of the skbuff so that the device -drivers will be able determine the offsets of the IP or IPv6 header and the -TCP header. In addition as CHECKSUM_PARTIAL is required csum_start should -also point to the TCP header of the packet. - -For IPv4 segmentation we support one of two types in terms of the IP ID. -The default behavior is to increment the IP ID with every segment. If the -GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP -ID and all segments will use the same IP ID. If a device has -NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO -and we will either increment the IP ID for all frames, or leave it at a -static value based on driver preference. - - -UDP Fragmentation Offload -========================= - -UDP fragmentation offload allows a device to fragment an oversized UDP -datagram into multiple IPv4 fragments. Many of the requirements for UDP -fragmentation offload are the same as TSO. However the IPv4 ID for -fragments should not increment as a single IPv4 datagram is fragmented. - -UFO is deprecated: modern kernels will no longer generate UFO skbs, but can -still receive them from tuntap and similar devices. Offload of UDP-based -tunnel protocols is still supported. - - -IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads -======================================================== - -In addition to the offloads described above it is possible for a frame to -contain additional headers such as an outer tunnel. In order to account -for such instances an additional set of segmentation offload types were -introduced including SKB_GSO_IPXIP4, SKB_GSO_IPXIP6, SKB_GSO_GRE, and -SKB_GSO_UDP_TUNNEL. These extra segmentation types are used to identify -cases where there are more than just 1 set of headers. For example in the -case of IPIP and SIT we should have the network and transport headers moved -from the standard list of headers to "inner" header offsets. - -Currently only two levels of headers are supported. The convention is to -refer to the tunnel headers as the outer headers, while the encapsulated -data is normally referred to as the inner headers. Below is the list of -calls to access the given headers: - -IPIP/SIT Tunnel:: - - Outer Inner - MAC skb_mac_header - Network skb_network_header skb_inner_network_header - Transport skb_transport_header - -UDP/GRE Tunnel:: - - Outer Inner - MAC skb_mac_header skb_inner_mac_header - Network skb_network_header skb_inner_network_header - Transport skb_transport_header skb_inner_transport_header - -In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and -SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the -fact that the outer header also requests to have a non-zero checksum -included in the outer header. - -Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel -header has requested a remote checksum offload. In this case the inner -headers will be left with a partial checksum and only the outer header -checksum will be computed. - - -Generic Segmentation Offload -============================ - -Generic segmentation offload is a pure software offload that is meant to -deal with cases where device drivers cannot perform the offloads described -above. What occurs in GSO is that a given skbuff will have its data broken -out over multiple skbuffs that have been resized to match the MSS provided -via skb_shinfo()->gso_size. - -Before enabling any hardware segmentation offload a corresponding software -offload is required in GSO. Otherwise it becomes possible for a frame to -be re-routed between devices and end up being unable to be transmitted. - - -Generic Receive Offload -======================= - -Generic receive offload is the complement to GSO. Ideally any frame -assembled by GRO should be segmented to create an identical sequence of -frames using GSO, and any sequence of frames segmented by GSO should be -able to be reassembled back to the original by GRO. The only exception to -this is IPv4 ID in the case that the DF bit is set for a given IP header. -If the value of the IPv4 ID is not sequentially incrementing it will be -altered so that it is when a frame assembled via GRO is segmented via GSO. - - -Partial Generic Segmentation Offload -==================================== - -Partial generic segmentation offload is a hybrid between TSO and GSO. What -it effectively does is take advantage of certain traits of TCP and tunnels -so that instead of having to rewrite the packet headers for each segment -only the inner-most transport header and possibly the outer-most network -header need to be updated. This allows devices that do not support tunnel -offloads or tunnel offloads with checksum to still make use of segmentation. - -With the partial offload what occurs is that all headers excluding the -inner transport header are updated such that they will contain the correct -values for if the header was simply duplicated. The one exception to this -is the outer IPv4 ID field. It is up to the device drivers to guarantee -that the IPv4 ID field is incremented in the case that a given header does -not have the DF bit set. - - -SCTP accelleration with GSO -=========================== - -SCTP - despite the lack of hardware support - can still take advantage of -GSO to pass one large packet through the network stack, rather than -multiple small packets. - -This requires a different approach to other offloads, as SCTP packets -cannot be just segmented to (P)MTU. Rather, the chunks must be contained in -IP segments, padding respected. So unlike regular GSO, SCTP can't just -generate a big skb, set gso_size to the fragmentation point and deliver it -to IP layer. - -Instead, the SCTP protocol layer builds an skb with the segments correctly -padded and stored as chained skbs, and skb_segment() splits based on those. -To signal this, gso_size is set to the special value GSO_BY_FRAGS. - -Therefore, any code in the core networking stack must be aware of the -possibility that gso_size will be GSO_BY_FRAGS and handle that case -appropriately. - -There are some helpers to make this easier: - -- skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if - an skb is an SCTP GSO skb. - -- For size checks, the skb_gso_validate_*_len family of helpers correctly - considers GSO_BY_FRAGS. - -- For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size - will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs. - -This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits -set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 93f56fddd92a..4e671b46e767 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4296,7 +4296,7 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb) /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. - * See Documentation/networking/checksum-offloads.txt for + * See Documentation/networking/checksum-offloads.rst for * explanation of how this works. * Fill in outer checksum adjustment (e.g. with sum of outer * pseudo-header) before calling. -- cgit From 9ac6cb5fbb1781d120ca0ad29d014d35c9c3f0c4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 19 Dec 2018 17:48:17 +0100 Subject: i2c: add suspended flag and accessors for i2c adapters A few drivers open code the handling of suspended adapters. It could be handled by the core, though, to ensure generic handling. This patch adds the flag and accessor functions. The usage of these helpers is optional, though. See the kerneldoc in this patch. Using the new flag, we now reject further transfers if the adapter is already marked suspended. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/fault-codes | 4 ++++ drivers/i2c/i2c-core-base.c | 3 +++ include/linux/i2c.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes index 47c25abb7d52..0cee0fc545b4 100644 --- a/Documentation/i2c/fault-codes +++ b/Documentation/i2c/fault-codes @@ -112,6 +112,10 @@ EPROTO case is when the length of an SMBus block data response (from the SMBus slave) is outside the range 1-32 bytes. +ESHUTDOWN + Returned when a transfer was requested using an adapter + which is already suspended. + ETIMEDOUT This is returned by drivers when an operation took too much time, and was aborted before it completed. diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 28460f6a60cc..926ca0a7477f 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1232,6 +1232,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap) if (!adap->lock_ops) adap->lock_ops = &i2c_adapter_lock_ops; + adap->locked_flags = 0; rt_mutex_init(&adap->bus_lock); rt_mutex_init(&adap->mux_lock); mutex_init(&adap->userspace_clients_lock); @@ -1865,6 +1866,8 @@ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (WARN_ON(!msgs || num < 1)) return -EINVAL; + if (WARN_ON(test_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags))) + return -ESHUTDOWN; if (adap->quirks && i2c_check_for_quirks(adap, msgs, num)) return -EOPNOTSUPP; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 65b4eaed1d96..cba59d66c00d 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -680,6 +680,8 @@ struct i2c_adapter { int timeout; /* in jiffies */ int retries; struct device dev; /* the adapter device */ + unsigned long locked_flags; /* owned by the I2C core */ +#define I2C_ALF_IS_SUSPENDED 0 int nr; char name[48]; @@ -762,6 +764,38 @@ i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags) adapter->lock_ops->unlock_bus(adapter, flags); } +/** + * i2c_mark_adapter_suspended - Report suspended state of the adapter to the core + * @adap: Adapter to mark as suspended + * + * When using this helper to mark an adapter as suspended, the core will reject + * further transfers to this adapter. The usage of this helper is optional but + * recommended for devices having distinct handlers for system suspend and + * runtime suspend. More complex devices are free to implement custom solutions + * to reject transfers when suspended. + */ +static inline void i2c_mark_adapter_suspended(struct i2c_adapter *adap) +{ + i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); + set_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); + i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); +} + +/** + * i2c_mark_adapter_resumed - Report resumed state of the adapter to the core + * @adap: Adapter to mark as resumed + * + * When using this helper to mark an adapter as resumed, the core will allow + * further transfers to this adapter. See also further notes to + * @i2c_mark_adapter_suspended(). + */ +static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) +{ + i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); + clear_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); + i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); +} + /*flags for the client struct: */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ -- cgit From 47008e5161fa097ce9b848dee194b43262b743a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Sep 2018 16:13:25 -0700 Subject: LSM: Introduce LSM_FLAG_LEGACY_MAJOR This adds a flag for the current "major" LSMs to distinguish them when we have a universal method for ordering all LSMs. It's called "legacy" since the distinction of "major" will go away in the blob-sharing world. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen --- include/linux/lsm_hooks.h | 3 +++ security/apparmor/lsm.c | 1 + security/selinux/hooks.c | 1 + security/smack/smack_lsm.c | 1 + security/tomoyo/tomoyo.c | 1 + 5 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9a0bdf91e646..318d93f918c3 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2042,8 +2042,11 @@ extern char *lsm_names; extern void security_add_hooks(struct security_hook_list *hooks, int count, char *lsm); +#define LSM_FLAG_LEGACY_MAJOR BIT(0) + struct lsm_info { const char *name; /* Required. */ + unsigned long flags; /* Optional: flags describing LSM */ int (*init)(void); /* Required. */ }; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 2c010874329f..e49c50e0d5ab 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1729,5 +1729,6 @@ alloc_out: DEFINE_LSM(apparmor) = { .name = "apparmor", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = apparmor_init, }; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f0e36c3492ba..41908d2d6149 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6999,6 +6999,7 @@ void selinux_complete_init(void) all processes and objects when they are created. */ DEFINE_LSM(selinux) = { .name = "selinux", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = selinux_init, }; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 430d4f35e55c..d72d215d7fde 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4812,5 +4812,6 @@ static __init int smack_init(void) */ DEFINE_LSM(smack) = { .name = "smack", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = smack_init, }; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 1b5b5097efd7..09f7af130d3a 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -552,5 +552,6 @@ static int __init tomoyo_init(void) DEFINE_LSM(tomoyo) = { .name = "tomoyo", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = tomoyo_init, }; -- cgit From c5459b829b716dafd226ad270f25c9a3050f7586 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 13 Sep 2018 22:28:48 -0700 Subject: LSM: Plumb visibility into optional "enabled" state In preparation for lifting the "is this LSM enabled?" logic out of the individual LSMs, pass in any special enabled state tracking (as needed for SELinux, AppArmor, and LoadPin). This should be an "int" to include handling any future cases where "enabled" is exposed via sysctl which has no "bool" type. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen --- include/linux/lsm_hooks.h | 1 + security/apparmor/lsm.c | 5 +++-- security/selinux/hooks.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 318d93f918c3..7bbe5e287161 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2047,6 +2047,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, struct lsm_info { const char *name; /* Required. */ unsigned long flags; /* Optional: flags describing LSM */ + int *enabled; /* Optional: NULL means enabled. */ int (*init)(void); /* Required. */ }; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index e49c50e0d5ab..a4652ff622cf 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1333,8 +1333,8 @@ bool aa_g_paranoid_load = true; module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO); /* Boot time disable flag */ -static bool apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; -module_param_named(enabled, apparmor_enabled, bool, S_IRUGO); +static int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; +module_param_named(enabled, apparmor_enabled, int, 0444); static int __init apparmor_enabled_setup(char *str) { @@ -1730,5 +1730,6 @@ alloc_out: DEFINE_LSM(apparmor) = { .name = "apparmor", .flags = LSM_FLAG_LEGACY_MAJOR, + .enabled = &apparmor_enabled, .init = apparmor_init, }; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 41908d2d6149..f847514d6f03 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -7000,6 +7000,7 @@ void selinux_complete_init(void) DEFINE_LSM(selinux) = { .name = "selinux", .flags = LSM_FLAG_LEGACY_MAJOR, + .enabled = &selinux_enabled, .init = selinux_init, }; -- cgit From f4941d75b9cba5e1fae1aebe0139dcca0703a294 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 13 Sep 2018 23:17:50 -0700 Subject: LSM: Lift LSM selection out of individual LSMs As a prerequisite to adjusting LSM selection logic in the future, this moves the selection logic up out of the individual major LSMs, making their init functions only run when actually enabled. This considers all LSMs enabled by default unless they specified an external "enable" variable. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen --- include/linux/lsm_hooks.h | 1 - security/apparmor/lsm.c | 6 --- security/security.c | 102 +++++++++++++++++++++++++++++++-------------- security/selinux/hooks.c | 10 ----- security/smack/smack_lsm.c | 3 -- security/tomoyo/tomoyo.c | 2 - 6 files changed, 71 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7bbe5e287161..be1581d18e3e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2088,7 +2088,6 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ -extern int __init security_module_enable(const char *module); extern void __init capability_add_hooks(void); #ifdef CONFIG_SECURITY_YAMA extern void __init yama_add_hooks(void); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index a4652ff622cf..dfc5fbf8ba82 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1663,12 +1663,6 @@ static int __init apparmor_init(void) { int error; - if (!apparmor_enabled || !security_module_enable("apparmor")) { - aa_info_message("AppArmor disabled by boot time parameter"); - apparmor_enabled = false; - return 0; - } - aa_secids_init(); error = aa_setup_dfa_engine(); diff --git a/security/security.c b/security/security.c index 6bc591f77b1a..c900d7a1441a 100644 --- a/security/security.c +++ b/security/security.c @@ -52,33 +52,96 @@ static __initdata bool debug; pr_info(__VA_ARGS__); \ } while (0) +static bool __init is_enabled(struct lsm_info *lsm) +{ + if (!lsm->enabled || *lsm->enabled) + return true; + + return false; +} + +/* Mark an LSM's enabled flag. */ +static int lsm_enabled_true __initdata = 1; +static int lsm_enabled_false __initdata = 0; +static void __init set_enabled(struct lsm_info *lsm, bool enabled) +{ + /* + * When an LSM hasn't configured an enable variable, we can use + * a hard-coded location for storing the default enabled state. + */ + if (!lsm->enabled) { + if (enabled) + lsm->enabled = &lsm_enabled_true; + else + lsm->enabled = &lsm_enabled_false; + } else if (lsm->enabled == &lsm_enabled_true) { + if (!enabled) + lsm->enabled = &lsm_enabled_false; + } else if (lsm->enabled == &lsm_enabled_false) { + if (enabled) + lsm->enabled = &lsm_enabled_true; + } else { + *lsm->enabled = enabled; + } +} + +/* Is an LSM allowed to be initialized? */ +static bool __init lsm_allowed(struct lsm_info *lsm) +{ + /* Skip if the LSM is disabled. */ + if (!is_enabled(lsm)) + return false; + + /* Skip major-specific checks if not a major LSM. */ + if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0) + return true; + + /* Disabled if this LSM isn't the chosen one. */ + if (strcmp(lsm->name, chosen_lsm) != 0) + return false; + + return true; +} + +/* Check if LSM should be initialized. */ +static void __init maybe_initialize_lsm(struct lsm_info *lsm) +{ + int enabled = lsm_allowed(lsm); + + /* Record enablement (to handle any following exclusive LSMs). */ + set_enabled(lsm, enabled); + + /* If selected, initialize the LSM. */ + if (enabled) { + int ret; + + init_debug("initializing %s\n", lsm->name); + ret = lsm->init(); + WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); + } +} + static void __init ordered_lsm_init(void) { struct lsm_info *lsm; - int ret; for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) != 0) continue; - init_debug("initializing %s\n", lsm->name); - ret = lsm->init(); - WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); + maybe_initialize_lsm(lsm); } } static void __init major_lsm_init(void) { struct lsm_info *lsm; - int ret; for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0) continue; - init_debug("initializing %s\n", lsm->name); - ret = lsm->init(); - WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); + maybe_initialize_lsm(lsm); } } @@ -168,29 +231,6 @@ static int lsm_append(char *new, char **result) return 0; } -/** - * security_module_enable - Load given security module on boot ? - * @module: the name of the module - * - * Each LSM must pass this method before registering its own operations - * to avoid security registration races. This method may also be used - * to check if your LSM is currently loaded during kernel initialization. - * - * Returns: - * - * true if: - * - * - The passed LSM is the one chosen by user at boot time, - * - or the passed LSM is configured as the default and the user did not - * choose an alternate LSM at boot time. - * - * Otherwise, return false. - */ -int __init security_module_enable(const char *module) -{ - return !strcmp(module, chosen_lsm); -} - /** * security_add_hooks - Add a modules hooks to the hook lists. * @hooks: the hooks to add diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f847514d6f03..0f8ae2fbd14a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6928,16 +6928,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { static __init int selinux_init(void) { - if (!security_module_enable("selinux")) { - selinux_enabled = 0; - return 0; - } - - if (!selinux_enabled) { - pr_info("SELinux: Disabled at boot.\n"); - return 0; - } - pr_info("SELinux: Initializing.\n"); memset(&selinux_state, 0, sizeof(selinux_state)); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index d72d215d7fde..580e9d6e5680 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4762,9 +4762,6 @@ static __init int smack_init(void) struct cred *cred; struct task_smack *tsp; - if (!security_module_enable("smack")) - return 0; - smack_inode_cache = KMEM_CACHE(inode_smack, 0); if (!smack_inode_cache) return -ENOMEM; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 09f7af130d3a..a46f6bc1e97c 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -540,8 +540,6 @@ static int __init tomoyo_init(void) { struct cred *cred = (struct cred *) current_cred(); - if (!security_module_enable("tomoyo")) - return 0; /* register ourselves with the security framework */ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo"); printk(KERN_INFO "TOMOYO Linux initialized\n"); -- cgit From a8027fb0d188599ccdb2096f49f708bae04d86c4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 9 Oct 2018 14:42:57 -0700 Subject: LSM: Tie enabling logic to presence in ordered list Until now, any LSM without an enable storage variable was considered enabled. This inverts the logic and sets defaults to true only if the LSM gets added to the ordered initialization list. (And an exception continues for the major LSMs until they are integrated into the ordered initialization in a later patch.) Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 2 +- security/security.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index be1581d18e3e..e28a3aa639e8 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2047,7 +2047,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, struct lsm_info { const char *name; /* Required. */ unsigned long flags; /* Optional: flags describing LSM */ - int *enabled; /* Optional: NULL means enabled. */ + int *enabled; /* Optional: controlled by CONFIG_LSM */ int (*init)(void); /* Required. */ }; diff --git a/security/security.c b/security/security.c index 2e1f48e8a6f2..b6d3456978a4 100644 --- a/security/security.c +++ b/security/security.c @@ -63,10 +63,10 @@ static __initdata bool debug; static bool __init is_enabled(struct lsm_info *lsm) { - if (!lsm->enabled || *lsm->enabled) - return true; + if (!lsm->enabled) + return false; - return false; + return *lsm->enabled; } /* Mark an LSM's enabled flag. */ @@ -117,7 +117,11 @@ static void __init append_ordered_lsm(struct lsm_info *lsm, const char *from) if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from)) return; + /* Enable this LSM, if it is not already set. */ + if (!lsm->enabled) + lsm->enabled = &lsm_enabled_true; ordered_lsms[last_lsm++] = lsm; + init_debug("%s ordering: %s (%sabled)\n", from, lsm->name, is_enabled(lsm) ? "en" : "dis"); } @@ -210,6 +214,10 @@ static void __init major_lsm_init(void) if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0) continue; + /* Enable this LSM, if it is not already set. */ + if (!lsm->enabled) + lsm->enabled = &lsm_enabled_true; + maybe_initialize_lsm(lsm); } } -- cgit From 14bd99c821f7ace0e8110a1bfdfaa27e1788e20f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Sep 2018 19:57:06 -0700 Subject: LSM: Separate idea of "major" LSM from "exclusive" LSM In order to both support old "security=" Legacy Major LSM selection, and handling real exclusivity, this creates LSM_FLAG_EXCLUSIVE and updates the selection logic to handle them. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 1 + security/apparmor/lsm.c | 2 +- security/security.c | 12 ++++++++++++ security/selinux/hooks.c | 2 +- security/smack/smack_lsm.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 6 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e28a3aa639e8..c3843b33da9e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2043,6 +2043,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, char *lsm); #define LSM_FLAG_LEGACY_MAJOR BIT(0) +#define LSM_FLAG_EXCLUSIVE BIT(1) struct lsm_info { const char *name; /* Required. */ diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index dfc5fbf8ba82..149a3e16b5da 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1723,7 +1723,7 @@ alloc_out: DEFINE_LSM(apparmor) = { .name = "apparmor", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &apparmor_enabled, .init = apparmor_init, }; diff --git a/security/security.c b/security/security.c index 88de6b073246..a8dd7defe30a 100644 --- a/security/security.c +++ b/security/security.c @@ -49,6 +49,7 @@ static __initconst const char * const builtin_lsm_order = CONFIG_LSM; /* Ordered list of LSMs to initialize. */ static __initdata struct lsm_info **ordered_lsms; +static __initdata struct lsm_info *exclusive; static __initdata bool debug; #define init_debug(...) \ @@ -129,6 +130,12 @@ static bool __init lsm_allowed(struct lsm_info *lsm) if (!is_enabled(lsm)) return false; + /* Not allowed if another exclusive LSM already initialized. */ + if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && exclusive) { + init_debug("exclusive disabled: %s\n", lsm->name); + return false; + } + return true; } @@ -144,6 +151,11 @@ static void __init maybe_initialize_lsm(struct lsm_info *lsm) if (enabled) { int ret; + if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && !exclusive) { + exclusive = lsm; + init_debug("exclusive chosen: %s\n", lsm->name); + } + init_debug("initializing %s\n", lsm->name); ret = lsm->init(); WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 0f8ae2fbd14a..49865f119b16 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6989,7 +6989,7 @@ void selinux_complete_init(void) all processes and objects when they are created. */ DEFINE_LSM(selinux) = { .name = "selinux", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &selinux_enabled, .init = selinux_init, }; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 580e9d6e5680..780733341d02 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4809,6 +4809,6 @@ static __init int smack_init(void) */ DEFINE_LSM(smack) = { .name = "smack", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .init = smack_init, }; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index a46f6bc1e97c..daff7d7897ad 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -550,6 +550,6 @@ static int __init tomoyo_init(void) DEFINE_LSM(tomoyo) = { .name = "tomoyo", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .init = tomoyo_init, }; -- cgit From 70b62c25665f636c9f6c700b26af7df296b0887e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Sep 2018 15:26:37 -0700 Subject: LoadPin: Initialize as ordered LSM This converts LoadPin from being a direct "minor" LSM into an ordered LSM. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 5 ----- security/Kconfig | 39 +-------------------------------------- security/loadpin/loadpin.c | 8 +++++++- security/security.c | 1 - 4 files changed, 8 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c3843b33da9e..fb1a653ccfcb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2095,10 +2095,5 @@ extern void __init yama_add_hooks(void); #else static inline void __init yama_add_hooks(void) { } #endif -#ifdef CONFIG_SECURITY_LOADPIN -void __init loadpin_add_hooks(void); -#else -static inline void loadpin_add_hooks(void) { }; -#endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/Kconfig b/security/Kconfig index cedf69e8a22c..2cd737ba7660 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -239,46 +239,9 @@ source "security/yama/Kconfig" source "security/integrity/Kconfig" -choice - prompt "Default security module" - default DEFAULT_SECURITY_SELINUX if SECURITY_SELINUX - default DEFAULT_SECURITY_SMACK if SECURITY_SMACK - default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO - default DEFAULT_SECURITY_APPARMOR if SECURITY_APPARMOR - default DEFAULT_SECURITY_DAC - - help - Select the security module that will be used by default if the - kernel parameter security= is not specified. - - config DEFAULT_SECURITY_SELINUX - bool "SELinux" if SECURITY_SELINUX=y - - config DEFAULT_SECURITY_SMACK - bool "Simplified Mandatory Access Control" if SECURITY_SMACK=y - - config DEFAULT_SECURITY_TOMOYO - bool "TOMOYO" if SECURITY_TOMOYO=y - - config DEFAULT_SECURITY_APPARMOR - bool "AppArmor" if SECURITY_APPARMOR=y - - config DEFAULT_SECURITY_DAC - bool "Unix Discretionary Access Controls" - -endchoice - -config DEFAULT_SECURITY - string - default "selinux" if DEFAULT_SECURITY_SELINUX - default "smack" if DEFAULT_SECURITY_SMACK - default "tomoyo" if DEFAULT_SECURITY_TOMOYO - default "apparmor" if DEFAULT_SECURITY_APPARMOR - default "" if DEFAULT_SECURITY_DAC - config LSM string "Ordered list of enabled LSMs" - default "integrity" + default "loadpin,integrity,selinux,smack,tomoyo,apparmor" help A comma-separated list of LSMs, in initialization order. Any LSMs left off this list will be ignored. This can be diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 48f39631b370..055fb0a64169 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -187,13 +187,19 @@ static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(kernel_load_data, loadpin_load_data), }; -void __init loadpin_add_hooks(void) +static int __init loadpin_init(void) { pr_info("ready to pin (currently %senforcing)\n", enforce ? "" : "not "); security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin"); + return 0; } +DEFINE_LSM(loadpin) = { + .name = "loadpin", + .init = loadpin_init, +}; + /* Should not be mutable after boot, so not listed in sysfs (perm == 0). */ module_param(enforce, int, 0); MODULE_PARM_DESC(enforce, "Enforce module/firmware pinning"); diff --git a/security/security.c b/security/security.c index 46c5b0fa515e..b8d75f5a948d 100644 --- a/security/security.c +++ b/security/security.c @@ -275,7 +275,6 @@ int __init security_init(void) */ capability_add_hooks(); yama_add_hooks(); - loadpin_add_hooks(); /* Load LSMs in specified order. */ ordered_lsm_init(); -- cgit From d6aed64b74b73b64278c059eacd59d87167aa968 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Sep 2018 15:37:20 -0700 Subject: Yama: Initialize as ordered LSM This converts Yama from being a direct "minor" LSM into an ordered LSM. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 5 ----- security/Kconfig | 2 +- security/security.c | 1 - security/yama/yama_lsm.c | 8 +++++++- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index fb1a653ccfcb..2849e9b2c01d 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2090,10 +2090,5 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ extern void __init capability_add_hooks(void); -#ifdef CONFIG_SECURITY_YAMA -extern void __init yama_add_hooks(void); -#else -static inline void __init yama_add_hooks(void) { } -#endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/Kconfig b/security/Kconfig index 2cd737ba7660..78dc12b7eeb3 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -241,7 +241,7 @@ source "security/integrity/Kconfig" config LSM string "Ordered list of enabled LSMs" - default "loadpin,integrity,selinux,smack,tomoyo,apparmor" + default "yama,loadpin,integrity,selinux,smack,tomoyo,apparmor" help A comma-separated list of LSMs, in initialization order. Any LSMs left off this list will be ignored. This can be diff --git a/security/security.c b/security/security.c index b8d75f5a948d..35f93b7c585b 100644 --- a/security/security.c +++ b/security/security.c @@ -274,7 +274,6 @@ int __init security_init(void) * Load minor LSMs, with the capability module always first. */ capability_add_hooks(); - yama_add_hooks(); /* Load LSMs in specified order. */ ordered_lsm_init(); diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index ffda91a4a1aa..eb1da1303d2e 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -477,9 +477,15 @@ static void __init yama_init_sysctl(void) static inline void yama_init_sysctl(void) { } #endif /* CONFIG_SYSCTL */ -void __init yama_add_hooks(void) +static int __init yama_init(void) { pr_info("Yama: becoming mindful.\n"); security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks), "yama"); yama_init_sysctl(); + return 0; } + +DEFINE_LSM(yama) = { + .name = "yama", + .init = yama_init, +}; -- cgit From e2bc445b66cad25b0627391df8138a83d0e48f97 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Sep 2018 17:48:21 -0700 Subject: LSM: Introduce enum lsm_order In preparation for distinguishing the "capability" LSM from other LSMs, it must be ordered first. This introduces LSM_ORDER_MUTABLE for the general LSMs and LSM_ORDER_FIRST for capability. In the future LSM_ORDER_LAST for could be added for anything that must run last (e.g. Landlock may use this). Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 6 ++++++ security/security.c | 9 ++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 2849e9b2c01d..27d4db9588bb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2045,8 +2045,14 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, #define LSM_FLAG_LEGACY_MAJOR BIT(0) #define LSM_FLAG_EXCLUSIVE BIT(1) +enum lsm_order { + LSM_ORDER_FIRST = -1, /* This is only for capabilities. */ + LSM_ORDER_MUTABLE = 0, +}; + struct lsm_info { const char *name; /* Required. */ + enum lsm_order order; /* Optional: default is LSM_ORDER_MUTABLE */ unsigned long flags; /* Optional: flags describing LSM */ int *enabled; /* Optional: controlled by CONFIG_LSM */ int (*init)(void); /* Required. */ diff --git a/security/security.c b/security/security.c index 35f93b7c585b..8b673bb2a0dd 100644 --- a/security/security.c +++ b/security/security.c @@ -174,6 +174,12 @@ static void __init ordered_lsm_parse(const char *order, const char *origin) struct lsm_info *lsm; char *sep, *name, *next; + /* LSM_ORDER_FIRST is always first. */ + for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { + if (lsm->order == LSM_ORDER_FIRST) + append_ordered_lsm(lsm, "first"); + } + /* Process "security=", if given. */ if (chosen_major_lsm) { struct lsm_info *major; @@ -202,7 +208,8 @@ static void __init ordered_lsm_parse(const char *order, const char *origin) bool found = false; for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { - if (strcmp(lsm->name, name) == 0) { + if (lsm->order == LSM_ORDER_MUTABLE && + strcmp(lsm->name, name) == 0) { append_ordered_lsm(lsm, origin); found = true; } -- cgit From d117a154e6128abac5409d3f173584e7b25981a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Sep 2018 15:40:45 -0700 Subject: capability: Initialize as LSM_ORDER_FIRST This converts capabilities to use the new LSM_ORDER_FIRST position. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 2 -- security/commoncap.c | 9 ++++++++- security/security.c | 5 ----- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 27d4db9588bb..0c908c091a03 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2095,6 +2095,4 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ -extern void __init capability_add_hooks(void); - #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/commoncap.c b/security/commoncap.c index 232db019f051..52e04136bfa8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1362,10 +1362,17 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory), }; -void __init capability_add_hooks(void) +static int __init capability_init(void) { security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks), "capability"); + return 0; } +DEFINE_LSM(capability) = { + .name = "capability", + .order = LSM_ORDER_FIRST, + .init = capability_init, +}; + #endif /* CONFIG_SECURITY */ diff --git a/security/security.c b/security/security.c index 8b673bb2a0dd..9411f659454b 100644 --- a/security/security.c +++ b/security/security.c @@ -277,11 +277,6 @@ int __init security_init(void) i++) INIT_HLIST_HEAD(&list[i]); - /* - * Load minor LSMs, with the capability module always first. - */ - capability_add_hooks(); - /* Load LSMs in specified order. */ ordered_lsm_init(); -- cgit From 6d9c939dbe4d0bcea09cd4b410f624cde1acb678 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:16:59 -0700 Subject: procfs: add smack subdir to attrs Back in 2007 I made what turned out to be a rather serious mistake in the implementation of the Smack security module. The SELinux module used an interface in /proc to manipulate the security context on processes. Rather than use a similar interface, I used the same interface. The AppArmor team did likewise. Now /proc/.../attr/current will tell you the security "context" of the process, but it will be different depending on the security module you're using. This patch provides a subdirectory in /proc/.../attr for Smack. Smack user space can use the "current" file in this subdirectory and never have to worry about getting SELinux attributes by mistake. Programs that use the old interface will continue to work (or fail, as the case may be) as before. The proposed S.A.R.A security module is dependent on the mechanism to create its own attr subdirectory. The original implementation is by Kees Cook. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Signed-off-by: Kees Cook --- Documentation/admin-guide/LSM/index.rst | 13 +++++-- fs/proc/base.c | 64 ++++++++++++++++++++++++++++----- fs/proc/internal.h | 1 + include/linux/security.h | 15 +++++--- security/security.c | 24 ++++++++++--- 5 files changed, 96 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst index c980dfe9abf1..9842e21afd4a 100644 --- a/Documentation/admin-guide/LSM/index.rst +++ b/Documentation/admin-guide/LSM/index.rst @@ -17,9 +17,8 @@ MAC extensions, other extensions can be built using the LSM to provide specific changes to system operation when these tweaks are not available in the core functionality of Linux itself. -Without a specific LSM built into the kernel, the default LSM will be the -Linux capabilities system. Most LSMs choose to extend the capabilities -system, building their checks on top of the defined capability hooks. +The Linux capabilities modules will always be included. This may be +followed by any number of "minor" modules and at most one "major" module. For more details on capabilities, see ``capabilities(7)`` in the Linux man-pages project. @@ -30,6 +29,14 @@ order in which checks are made. The capability module will always be first, followed by any "minor" modules (e.g. Yama) and then the one "major" module (e.g. SELinux) if there is one configured. +Process attributes associated with "major" security modules should +be accessed and maintained using the special files in ``/proc/.../attr``. +A security module may maintain a module specific subdirectory there, +named after the module. ``/proc/.../attr/smack`` is provided by the Smack +security module and contains all its special files. The files directly +in ``/proc/.../attr`` remain as legacy interfaces for modules that provide +subdirectories. + .. toctree:: :maxdepth: 1 diff --git a/fs/proc/base.c b/fs/proc/base.c index 633a63462573..c9d775fd24ef 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -140,9 +140,13 @@ struct pid_entry { #define REG(NAME, MODE, fops) \ NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) #define ONE(NAME, MODE, show) \ - NOD(NAME, (S_IFREG|(MODE)), \ + NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ { .proc_show = show } ) +#define ATTR(LSM, NAME, MODE) \ + NOD(NAME, (S_IFREG|(MODE)), \ + NULL, &proc_pid_attr_operations, \ + { .lsm = LSM }) /* * Count the number of hardlinks for the pid_entry table, excluding the . @@ -2525,7 +2529,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, if (!task) return -ESRCH; - length = security_getprocattr(task, + length = security_getprocattr(task, PROC_I(inode)->op.lsm, (char*)file->f_path.dentry->d_name.name, &p); put_task_struct(task); @@ -2574,7 +2578,9 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, if (rv < 0) goto out_free; - rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count); + rv = security_setprocattr(PROC_I(inode)->op.lsm, + file->f_path.dentry->d_name.name, page, + count); mutex_unlock(¤t->signal->cred_guard_mutex); out_free: kfree(page); @@ -2588,13 +2594,53 @@ static const struct file_operations proc_pid_attr_operations = { .llseek = generic_file_llseek, }; +#define LSM_DIR_OPS(LSM) \ +static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ + struct dir_context *ctx) \ +{ \ + return proc_pident_readdir(filp, ctx, \ + LSM##_attr_dir_stuff, \ + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ +} \ +\ +static const struct file_operations proc_##LSM##_attr_dir_ops = { \ + .read = generic_read_dir, \ + .iterate = proc_##LSM##_attr_dir_iterate, \ + .llseek = default_llseek, \ +}; \ +\ +static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ + struct dentry *dentry, unsigned int flags) \ +{ \ + return proc_pident_lookup(dir, dentry, \ + LSM##_attr_dir_stuff, \ + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ +} \ +\ +static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ + .lookup = proc_##LSM##_attr_dir_lookup, \ + .getattr = pid_getattr, \ + .setattr = proc_setattr, \ +} + +#ifdef CONFIG_SECURITY_SMACK +static const struct pid_entry smack_attr_dir_stuff[] = { + ATTR("smack", "current", 0666), +}; +LSM_DIR_OPS(smack); +#endif + static const struct pid_entry attr_dir_stuff[] = { - REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("prev", S_IRUGO, proc_pid_attr_operations), - REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + ATTR(NULL, "current", 0666), + ATTR(NULL, "prev", 0444), + ATTR(NULL, "exec", 0666), + ATTR(NULL, "fscreate", 0666), + ATTR(NULL, "keycreate", 0666), + ATTR(NULL, "sockcreate", 0666), +#ifdef CONFIG_SECURITY_SMACK + DIR("smack", 0555, + proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), +#endif }; static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5185d7f6a51e..d4f9989063d0 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -81,6 +81,7 @@ union proc_op { int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); + const char *lsm; }; struct proc_inode { diff --git a/include/linux/security.h b/include/linux/security.h index dbfb5a66babb..b2c5333ed4b5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -366,8 +366,10 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); -int security_getprocattr(struct task_struct *p, char *name, char **value); -int security_setprocattr(const char *name, void *value, size_t size); +int security_getprocattr(struct task_struct *p, const char *lsm, char *name, + char **value); +int security_setprocattr(const char *lsm, const char *name, void *value, + size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); @@ -1112,15 +1114,18 @@ static inline int security_sem_semop(struct kern_ipc_perm *sma, return 0; } -static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode) +static inline void security_d_instantiate(struct dentry *dentry, + struct inode *inode) { } -static inline int security_getprocattr(struct task_struct *p, char *name, char **value) +static inline int security_getprocattr(struct task_struct *p, const char *lsm, + char *name, char **value) { return -EINVAL; } -static inline int security_setprocattr(char *name, void *value, size_t size) +static inline int security_setprocattr(const char *lsm, char *name, + void *value, size_t size) { return -EINVAL; } diff --git a/security/security.c b/security/security.c index 9411f659454b..60b39db95c2f 100644 --- a/security/security.c +++ b/security/security.c @@ -1485,14 +1485,30 @@ void security_d_instantiate(struct dentry *dentry, struct inode *inode) } EXPORT_SYMBOL(security_d_instantiate); -int security_getprocattr(struct task_struct *p, char *name, char **value) +int security_getprocattr(struct task_struct *p, const char *lsm, char *name, + char **value) { - return call_int_hook(getprocattr, -EINVAL, p, name, value); + struct security_hook_list *hp; + + hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) { + if (lsm != NULL && strcmp(lsm, hp->lsm)) + continue; + return hp->hook.getprocattr(p, name, value); + } + return -EINVAL; } -int security_setprocattr(const char *name, void *value, size_t size) +int security_setprocattr(const char *lsm, const char *name, void *value, + size_t size) { - return call_int_hook(setprocattr, -EINVAL, name, value, size); + struct security_hook_list *hp; + + hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) { + if (lsm != NULL && strcmp(lsm, hp->lsm)) + continue; + return hp->hook.setprocattr(name, value, size); + } + return -EINVAL; } int security_netlink_send(struct sock *sk, struct sk_buff *skb) -- cgit From 3d252529480c68bfd6a6774652df7c8968b28e41 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:17:34 -0700 Subject: SELinux: Remove unused selinux_is_enabled There are no longer users of selinux_is_enabled(). Remove it. As selinux_is_enabled() is the only reason for include/linux/selinux.h remove that as well. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Signed-off-by: Kees Cook --- include/linux/cred.h | 1 - include/linux/selinux.h | 35 ----------------------------------- security/selinux/Makefile | 2 +- security/selinux/exports.c | 23 ----------------------- security/selinux/hooks.c | 1 - security/selinux/include/audit.h | 3 --- security/selinux/ss/services.c | 1 - 7 files changed, 1 insertion(+), 65 deletions(-) delete mode 100644 include/linux/selinux.h delete mode 100644 security/selinux/exports.c (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4907c9df86b3..ddd45bb74887 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/selinux.h b/include/linux/selinux.h deleted file mode 100644 index 44f459612690..000000000000 --- a/include/linux/selinux.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * SELinux services exported to the rest of the kernel. - * - * Author: James Morris - * - * Copyright (C) 2005 Red Hat, Inc., James Morris - * Copyright (C) 2006 Trusted Computer Solutions, Inc. - * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, - * as published by the Free Software Foundation. - */ -#ifndef _LINUX_SELINUX_H -#define _LINUX_SELINUX_H - -struct selinux_audit_rule; -struct audit_context; -struct kern_ipc_perm; - -#ifdef CONFIG_SECURITY_SELINUX - -/** - * selinux_is_enabled - is SELinux enabled? - */ -bool selinux_is_enabled(void); -#else - -static inline bool selinux_is_enabled(void) -{ - return false; -} -#endif /* CONFIG_SECURITY_SELINUX */ - -#endif /* _LINUX_SELINUX_H */ diff --git a/security/selinux/Makefile b/security/selinux/Makefile index c7161f8792b2..ccf950409384 100644 --- a/security/selinux/Makefile +++ b/security/selinux/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SECURITY_SELINUX) := selinux.o selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o \ - netnode.o netport.o ibpkey.o exports.o \ + netnode.o netport.o ibpkey.o \ ss/ebitmap.o ss/hashtab.o ss/symtab.o ss/sidtab.o ss/avtab.o \ ss/policydb.o ss/services.o ss/conditional.o ss/mls.o ss/status.o diff --git a/security/selinux/exports.c b/security/selinux/exports.c deleted file mode 100644 index e75dd94e2d2b..000000000000 --- a/security/selinux/exports.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * SELinux services exported to the rest of the kernel. - * - * Author: James Morris - * - * Copyright (C) 2005 Red Hat, Inc., James Morris - * Copyright (C) 2006 Trusted Computer Solutions, Inc. - * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, - * as published by the Free Software Foundation. - */ -#include -#include - -#include "security.h" - -bool selinux_is_enabled(void) -{ - return selinux_enabled; -} -EXPORT_SYMBOL_GPL(selinux_is_enabled); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ad227177550b..169cf5b3334b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -79,7 +79,6 @@ #include #include #include -#include #include #include #include diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 1bdf973433cc..36e1d44c0209 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -1,9 +1,6 @@ /* * SELinux support for the Audit LSM hooks * - * Most of below header was moved from include/linux/selinux.h which - * is released under below copyrights: - * * Author: James Morris * * Copyright (C) 2005 Red Hat, Inc., James Morris diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index dd44126c8d14..d6e7b4856d93 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include -- cgit From bbd3662a834813730912a58efb44dd6df6d952e6 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Mon, 12 Nov 2018 09:30:56 -0800 Subject: Infrastructure management of the cred security blob Move management of the cred security blob out of the security modules and into the security infrastructre. Instead of allocating and freeing space the security modules tell the infrastructure how much space they require. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 12 ++++++ security/apparmor/include/cred.h | 4 +- security/apparmor/include/lib.h | 4 ++ security/apparmor/lsm.c | 9 ++++ security/security.c | 89 ++++++++++++++++++++++++++++++++++++++- security/selinux/hooks.c | 51 +++++----------------- security/selinux/include/objsec.h | 4 +- security/smack/smack.h | 3 +- security/smack/smack_lsm.c | 79 +++++++++++----------------------- security/tomoyo/common.h | 3 +- security/tomoyo/tomoyo.c | 6 +++ 11 files changed, 162 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0c908c091a03..dd33666567bc 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2027,6 +2027,13 @@ struct security_hook_list { char *lsm; } __randomize_layout; +/* + * Security blob size or offset data. + */ +struct lsm_blob_sizes { + int lbs_cred; +}; + /* * Initializing a security_hook_list structure takes * up a lot of space in a source file. This macro takes @@ -2056,6 +2063,7 @@ struct lsm_info { unsigned long flags; /* Optional: flags describing LSM */ int *enabled; /* Optional: controlled by CONFIG_LSM */ int (*init)(void); /* Required. */ + struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */ }; extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; @@ -2095,4 +2103,8 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ +#ifdef CONFIG_SECURITY +void __init lsm_early_cred(struct cred *cred); +#endif + #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index a757370f2a0c..b9504a05fddc 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -25,7 +25,7 @@ static inline struct aa_label *cred_label(const struct cred *cred) { - struct aa_label **blob = cred->security; + struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred; AA_BUG(!blob); return *blob; @@ -34,7 +34,7 @@ static inline struct aa_label *cred_label(const struct cred *cred) static inline void set_cred_label(const struct cred *cred, struct aa_label *label) { - struct aa_label **blob = cred->security; + struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred; AA_BUG(!blob); *blob = label; diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 6505e1ad9e23..bbe9b384d71d 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -16,6 +16,7 @@ #include #include +#include #include "match.h" @@ -55,6 +56,9 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, size_t *ns_len); void aa_info_message(const char *str); +/* Security blob offsets */ +extern struct lsm_blob_sizes apparmor_blob_sizes; + /** * aa_strneq - compare null terminated @str to a non null terminated substring * @str: a null terminated string diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8c2cb4b1a6c3..d5e4a384f205 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1151,6 +1151,13 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb, } #endif +/* + * The cred blob is a pointer to, not an instance of, an aa_task_ctx. + */ +struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct aa_task_ctx *), +}; + static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), @@ -1485,6 +1492,7 @@ static int __init set_init_ctx(void) if (!ctx) return -ENOMEM; + lsm_early_cred(cred); set_cred_label(cred, aa_get_label(ns_unconfined(root_ns))); task_ctx(current) = ctx; @@ -1725,5 +1733,6 @@ DEFINE_LSM(apparmor) = { .name = "apparmor", .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &apparmor_enabled, + .blobs = &apparmor_blob_sizes, .init = apparmor_init, }; diff --git a/security/security.c b/security/security.c index 60b39db95c2f..09be8ce007a2 100644 --- a/security/security.c +++ b/security/security.c @@ -41,6 +41,8 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init; static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain); char *lsm_names; +static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init; + /* Boot-time LSM user choice */ static __initdata const char *chosen_lsm_order; static __initdata const char *chosen_major_lsm; @@ -139,6 +141,25 @@ static bool __init lsm_allowed(struct lsm_info *lsm) return true; } +static void __init lsm_set_blob_size(int *need, int *lbs) +{ + int offset; + + if (*need > 0) { + offset = *lbs; + *lbs += *need; + *need = offset; + } +} + +static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) +{ + if (!needed) + return; + + lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); +} + /* Prepare LSM for initialization. */ static void __init prepare_lsm(struct lsm_info *lsm) { @@ -153,6 +174,8 @@ static void __init prepare_lsm(struct lsm_info *lsm) exclusive = lsm; init_debug("exclusive chosen: %s\n", lsm->name); } + + lsm_set_blob_sizes(lsm->blobs); } } @@ -255,6 +278,8 @@ static void __init ordered_lsm_init(void) for (lsm = ordered_lsms; *lsm; lsm++) prepare_lsm(*lsm); + init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); + for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -382,6 +407,47 @@ int unregister_lsm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_lsm_notifier); +/** + * lsm_cred_alloc - allocate a composite cred blob + * @cred: the cred that needs a blob + * @gfp: allocation type + * + * Allocate the cred blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) +{ + if (blob_sizes.lbs_cred == 0) { + cred->security = NULL; + return 0; + } + + cred->security = kzalloc(blob_sizes.lbs_cred, gfp); + if (cred->security == NULL) + return -ENOMEM; + return 0; +} + +/** + * lsm_early_cred - during initialization allocate a composite cred blob + * @cred: the cred that needs a blob + * + * Allocate the cred blob for all the modules if it's not already there + */ +void __init lsm_early_cred(struct cred *cred) +{ + int rc; + + if (cred == NULL) + panic("%s: NULL cred.\n", __func__); + if (cred->security != NULL) + return; + rc = lsm_cred_alloc(cred, GFP_KERNEL); + if (rc) + panic("%s: Early cred alloc failed.\n", __func__); +} + /* * Hook list operation macros. * @@ -1195,17 +1261,36 @@ void security_task_free(struct task_struct *task) int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) { - return call_int_hook(cred_alloc_blank, 0, cred, gfp); + int rc = lsm_cred_alloc(cred, gfp); + + if (rc) + return rc; + + rc = call_int_hook(cred_alloc_blank, 0, cred, gfp); + if (rc) + security_cred_free(cred); + return rc; } void security_cred_free(struct cred *cred) { call_void_hook(cred_free, cred); + + kfree(cred->security); + cred->security = NULL; } int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) { - return call_int_hook(cred_prepare, 0, new, old, gfp); + int rc = lsm_cred_alloc(new, gfp); + + if (rc) + return rc; + + rc = call_int_hook(cred_prepare, 0, new, old, gfp); + if (rc) + security_cred_free(new); + return rc; } void security_transfer_creds(struct cred *new, const struct cred *old) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 169cf5b3334b..239b13b442e7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -210,12 +210,9 @@ static void cred_init_security(void) struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; - tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); - if (!tsec) - panic("SELinux: Failed to initialize initial task.\n"); - + lsm_early_cred(cred); + tsec = selinux_cred(cred); tsec->osid = tsec->sid = SECINITSID_KERNEL; - cred->security = tsec; } /* @@ -3685,47 +3682,16 @@ static int selinux_task_alloc(struct task_struct *task, sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL); } -/* - * allocate the SELinux part of blank credentials - */ -static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp) -{ - struct task_security_struct *tsec; - - tsec = kzalloc(sizeof(struct task_security_struct), gfp); - if (!tsec) - return -ENOMEM; - - cred->security = tsec; - return 0; -} - -/* - * detach and free the LSM part of a set of credentials - */ -static void selinux_cred_free(struct cred *cred) -{ - struct task_security_struct *tsec = selinux_cred(cred); - - kfree(tsec); -} - /* * prepare a new set of credentials for modification */ static int selinux_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { - const struct task_security_struct *old_tsec; - struct task_security_struct *tsec; - - old_tsec = selinux_cred(old); - - tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); - if (!tsec) - return -ENOMEM; + const struct task_security_struct *old_tsec = selinux_cred(old); + struct task_security_struct *tsec = selinux_cred(new); - new->security = tsec; + *tsec = *old_tsec; return 0; } @@ -6678,6 +6644,10 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif +struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct task_security_struct), +}; + static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -6761,8 +6731,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_open, selinux_file_open), LSM_HOOK_INIT(task_alloc, selinux_task_alloc), - LSM_HOOK_INIT(cred_alloc_blank, selinux_cred_alloc_blank), - LSM_HOOK_INIT(cred_free, selinux_cred_free), LSM_HOOK_INIT(cred_prepare, selinux_cred_prepare), LSM_HOOK_INIT(cred_transfer, selinux_cred_transfer), LSM_HOOK_INIT(cred_getsecid, selinux_cred_getsecid), @@ -6981,6 +6949,7 @@ DEFINE_LSM(selinux) = { .name = "selinux", .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &selinux_enabled, + .blobs = &selinux_blob_sizes, .init = selinux_init, }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 734b6833bdff..c2974b031d05 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "flask.h" #include "avc.h" @@ -158,9 +159,10 @@ struct bpf_security_struct { u32 sid; /*SID of bpf obj creater*/ }; +extern struct lsm_blob_sizes selinux_blob_sizes; static inline struct task_security_struct *selinux_cred(const struct cred *cred) { - return cred->security; + return cred->security + selinux_blob_sizes.lbs_cred; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 01a922856eba..b27eb252e953 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -336,6 +336,7 @@ extern struct smack_known *smack_syslog_label; extern struct smack_known *smack_unconfined; #endif extern int smack_ptrace_rule; +extern struct lsm_blob_sizes smack_blob_sizes; extern struct smack_known smack_known_floor; extern struct smack_known smack_known_hat; @@ -358,7 +359,7 @@ extern struct hlist_head smack_known_hash[SMACK_HASH_SLOTS]; static inline struct task_smack *smack_cred(const struct cred *cred) { - return cred->security; + return cred->security + smack_blob_sizes.lbs_cred; } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 9a050ca17296..bad27a8e1631 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -326,29 +326,20 @@ static struct inode_smack *new_inode_smack(struct smack_known *skp) } /** - * new_task_smack - allocate a task security blob + * init_task_smack - initialize a task security blob + * @tsp: blob to initialize * @task: a pointer to the Smack label for the running task * @forked: a pointer to the Smack label for the forked task - * @gfp: type of the memory for the allocation * - * Returns the new blob or NULL if there's no memory available */ -static struct task_smack *new_task_smack(struct smack_known *task, - struct smack_known *forked, gfp_t gfp) +static void init_task_smack(struct task_smack *tsp, struct smack_known *task, + struct smack_known *forked) { - struct task_smack *tsp; - - tsp = kzalloc(sizeof(struct task_smack), gfp); - if (tsp == NULL) - return NULL; - tsp->smk_task = task; tsp->smk_forked = forked; INIT_LIST_HEAD(&tsp->smk_rules); INIT_LIST_HEAD(&tsp->smk_relabel); mutex_init(&tsp->smk_rules_lock); - - return tsp; } /** @@ -1881,14 +1872,7 @@ static int smack_file_open(struct file *file) */ static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp) { - struct task_smack *tsp; - - tsp = new_task_smack(NULL, NULL, gfp); - if (tsp == NULL) - return -ENOMEM; - - cred->security = tsp; - + init_task_smack(smack_cred(cred), NULL, NULL); return 0; } @@ -1905,10 +1889,6 @@ static void smack_cred_free(struct cred *cred) struct list_head *l; struct list_head *n; - if (tsp == NULL) - return; - cred->security = NULL; - smk_destroy_label_list(&tsp->smk_relabel); list_for_each_safe(l, n, &tsp->smk_rules) { @@ -1916,7 +1896,6 @@ static void smack_cred_free(struct cred *cred) list_del(&rp->list); kfree(rp); } - kfree(tsp); } /** @@ -1931,14 +1910,10 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { struct task_smack *old_tsp = smack_cred(old); - struct task_smack *new_tsp; + struct task_smack *new_tsp = smack_cred(new); int rc; - new_tsp = new_task_smack(old_tsp->smk_task, old_tsp->smk_task, gfp); - if (new_tsp == NULL) - return -ENOMEM; - - new->security = new_tsp; + init_task_smack(new_tsp, old_tsp->smk_task, old_tsp->smk_task); rc = smk_copy_rules(&new_tsp->smk_rules, &old_tsp->smk_rules, gfp); if (rc != 0) @@ -1946,10 +1921,7 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old, rc = smk_copy_relabel(&new_tsp->smk_relabel, &old_tsp->smk_relabel, gfp); - if (rc != 0) - return rc; - - return 0; + return rc; } /** @@ -4581,6 +4553,10 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, return 0; } +struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct task_smack), +}; + static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, smack_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), @@ -4758,20 +4734,25 @@ static __init void init_smack_known_list(void) */ static __init int smack_init(void) { - struct cred *cred; + struct cred *cred = (struct cred *) current->cred; struct task_smack *tsp; smack_inode_cache = KMEM_CACHE(inode_smack, 0); if (!smack_inode_cache) return -ENOMEM; - tsp = new_task_smack(&smack_known_floor, &smack_known_floor, - GFP_KERNEL); - if (tsp == NULL) { - kmem_cache_destroy(smack_inode_cache); - return -ENOMEM; - } + lsm_early_cred(cred); + /* + * Set the security state for the initial task. + */ + tsp = smack_cred(cred); + init_task_smack(tsp, &smack_known_floor, &smack_known_floor); + + /* + * Register with LSM + */ + security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack"); smack_enabled = 1; pr_info("Smack: Initializing.\n"); @@ -4785,20 +4766,9 @@ static __init int smack_init(void) pr_info("Smack: IPv6 Netfilter enabled.\n"); #endif - /* - * Set the security state for the initial task. - */ - cred = (struct cred *) current->cred; - cred->security = tsp; - /* initialize the smack_known_list */ init_smack_known_list(); - /* - * Register with LSM - */ - security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack"); - return 0; } @@ -4809,5 +4779,6 @@ static __init int smack_init(void) DEFINE_LSM(smack) = { .name = "smack", .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, + .blobs = &smack_blob_sizes, .init = smack_init, }; diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 41898613d93b..4fc17294a12d 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -1087,6 +1087,7 @@ extern struct tomoyo_domain_info tomoyo_kernel_domain; extern struct tomoyo_policy_namespace tomoyo_kernel_namespace; extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT]; extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT]; +extern struct lsm_blob_sizes tomoyo_blob_sizes; /********** Inlined functions. **********/ @@ -1206,7 +1207,7 @@ static inline void tomoyo_put_group(struct tomoyo_group *group) */ static inline struct tomoyo_domain_info **tomoyo_cred(const struct cred *cred) { - return (struct tomoyo_domain_info **)&cred->security; + return cred->security + tomoyo_blob_sizes.lbs_cred; } /** diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 15864307925d..9094cf41a247 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -509,6 +509,10 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, return tomoyo_socket_sendmsg_permission(sock, msg, size); } +struct lsm_blob_sizes tomoyo_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct tomoyo_domain_info *), +}; + /* * tomoyo_security_ops is a "struct security_operations" which is used for * registering TOMOYO. @@ -562,6 +566,7 @@ static int __init tomoyo_init(void) /* register ourselves with the security framework */ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo"); printk(KERN_INFO "TOMOYO Linux initialized\n"); + lsm_early_cred(cred); blob = tomoyo_cred(cred); *blob = &tomoyo_kernel_domain; tomoyo_mm_init(); @@ -573,5 +578,6 @@ DEFINE_LSM(tomoyo) = { .name = "tomoyo", .enabled = &tomoyo_enabled, .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, + .blobs = &tomoyo_blob_sizes, .init = tomoyo_init, }; -- cgit From 33bf60cabcc7687b194a689b068b65e9ecd556be Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Mon, 12 Nov 2018 12:02:49 -0800 Subject: LSM: Infrastructure management of the file security Move management of the file->f_security blob out of the individual security modules and into the infrastructure. The modules no longer allocate or free the data, instead they tell the infrastructure how much space they require. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 1 + security/apparmor/include/file.h | 5 +++- security/apparmor/lsm.c | 19 +++++++------- security/security.c | 54 ++++++++++++++++++++++++++++++++++++--- security/selinux/hooks.c | 25 ++---------------- security/selinux/include/objsec.h | 2 +- security/smack/smack.h | 3 ++- security/smack/smack_lsm.c | 14 +--------- 8 files changed, 72 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index dd33666567bc..e8cef019b645 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2032,6 +2032,7 @@ struct security_hook_list { */ struct lsm_blob_sizes { int lbs_cred; + int lbs_file; }; /* diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 4c2c8ac8842f..8be09208cf7c 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -32,7 +32,10 @@ struct path; AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_LOCK | \ AA_EXEC_MMAP | AA_MAY_LINK) -#define file_ctx(X) ((struct aa_file_ctx *)(X)->f_security) +static inline struct aa_file_ctx *file_ctx(struct file *file) +{ + return file->f_security + apparmor_blob_sizes.lbs_file; +} /* struct aa_file_ctx - the AppArmor context the file was opened in * @lock: lock to update the ctx diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index d5e4a384f205..6821187b06ad 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -434,21 +434,21 @@ static int apparmor_file_open(struct file *file) static int apparmor_file_alloc_security(struct file *file) { - int error = 0; - - /* freed by apparmor_file_free_security */ + struct aa_file_ctx *ctx = file_ctx(file); struct aa_label *label = begin_current_label_crit_section(); - file->f_security = aa_alloc_file_ctx(label, GFP_KERNEL); - if (!file_ctx(file)) - error = -ENOMEM; - end_current_label_crit_section(label); - return error; + spin_lock_init(&ctx->lock); + rcu_assign_pointer(ctx->label, aa_get_label(label)); + end_current_label_crit_section(label); + return 0; } static void apparmor_file_free_security(struct file *file) { - aa_free_file_ctx(file_ctx(file)); + struct aa_file_ctx *ctx = file_ctx(file); + + if (ctx) + aa_put_label(rcu_access_pointer(ctx->label)); } static int common_file_perm(const char *op, struct file *file, u32 mask) @@ -1156,6 +1156,7 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb, */ struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct aa_task_ctx *), + .lbs_file = sizeof(struct aa_file_ctx), }; static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { diff --git a/security/security.c b/security/security.c index 09be8ce007a2..f32d7d2075c6 100644 --- a/security/security.c +++ b/security/security.c @@ -40,6 +40,8 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init; static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain); +static struct kmem_cache *lsm_file_cache; + char *lsm_names; static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init; @@ -158,6 +160,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) return; lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); + lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file); } /* Prepare LSM for initialization. */ @@ -279,6 +282,15 @@ static void __init ordered_lsm_init(void) prepare_lsm(*lsm); init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); + init_debug("file blob size = %d\n", blob_sizes.lbs_file); + + /* + * Create any kmem_caches needed for blobs + */ + if (blob_sizes.lbs_file) + lsm_file_cache = kmem_cache_create("lsm_file_cache", + blob_sizes.lbs_file, 0, + SLAB_PANIC, NULL); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -448,6 +460,27 @@ void __init lsm_early_cred(struct cred *cred) panic("%s: Early cred alloc failed.\n", __func__); } +/** + * lsm_file_alloc - allocate a composite file blob + * @file: the file that needs a blob + * + * Allocate the file blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_file_alloc(struct file *file) +{ + if (!lsm_file_cache) { + file->f_security = NULL; + return 0; + } + + file->f_security = kmem_cache_zalloc(lsm_file_cache, GFP_KERNEL); + if (file->f_security == NULL) + return -ENOMEM; + return 0; +} + /* * Hook list operation macros. * @@ -1144,12 +1177,27 @@ int security_file_permission(struct file *file, int mask) int security_file_alloc(struct file *file) { - return call_int_hook(file_alloc_security, 0, file); + int rc = lsm_file_alloc(file); + + if (rc) + return rc; + rc = call_int_hook(file_alloc_security, 0, file); + if (unlikely(rc)) + security_file_free(file); + return rc; } void security_file_free(struct file *file) { + void *blob; + call_void_hook(file_free_security, file); + + blob = file->f_security; + if (blob) { + file->f_security = NULL; + kmem_cache_free(lsm_file_cache, blob); + } } int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1267,7 +1315,7 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) return rc; rc = call_int_hook(cred_alloc_blank, 0, cred, gfp); - if (rc) + if (unlikely(rc)) security_cred_free(cred); return rc; } @@ -1288,7 +1336,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) return rc; rc = call_int_hook(cred_prepare, 0, new, old, gfp); - if (rc) + if (unlikely(rc)) security_cred_free(new); return rc; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 620be0367c0b..632813821da6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -146,7 +146,6 @@ static int __init checkreqprot_setup(char *str) __setup("checkreqprot=", checkreqprot_setup); static struct kmem_cache *sel_inode_cache; -static struct kmem_cache *file_security_cache; /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled @@ -378,27 +377,15 @@ static void inode_free_security(struct inode *inode) static int file_alloc_security(struct file *file) { - struct file_security_struct *fsec; + struct file_security_struct *fsec = selinux_file(file); u32 sid = current_sid(); - fsec = kmem_cache_zalloc(file_security_cache, GFP_KERNEL); - if (!fsec) - return -ENOMEM; - fsec->sid = sid; fsec->fown_sid = sid; - file->f_security = fsec; return 0; } -static void file_free_security(struct file *file) -{ - struct file_security_struct *fsec = selinux_file(file); - file->f_security = NULL; - kmem_cache_free(file_security_cache, fsec); -} - static int superblock_alloc_security(struct super_block *sb) { struct superblock_security_struct *sbsec; @@ -3345,11 +3332,6 @@ static int selinux_file_alloc_security(struct file *file) return file_alloc_security(file); } -static void selinux_file_free_security(struct file *file) -{ - file_free_security(file); -} - /* * Check whether a task has the ioctl permission and cmd * operation to an inode. @@ -6646,6 +6628,7 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), + .lbs_file = sizeof(struct file_security_struct), }; static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { @@ -6717,7 +6700,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_permission, selinux_file_permission), LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security), - LSM_HOOK_INIT(file_free_security, selinux_file_free_security), LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl), LSM_HOOK_INIT(mmap_file, selinux_mmap_file), LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr), @@ -6902,9 +6884,6 @@ static __init int selinux_init(void) sel_inode_cache = kmem_cache_create("selinux_inode_security", sizeof(struct inode_security_struct), 0, SLAB_PANIC, NULL); - file_security_cache = kmem_cache_create("selinux_file_security", - sizeof(struct file_security_struct), - 0, SLAB_PANIC, NULL); avc_init(); avtab_cache_init(); diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index e0ac2992e059..96374dbf4ace 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -167,7 +167,7 @@ static inline struct task_security_struct *selinux_cred(const struct cred *cred) static inline struct file_security_struct *selinux_file(const struct file *file) { - return file->f_security; + return file->f_security + selinux_blob_sizes.lbs_file; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 50854969a391..2007d38d0e46 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -364,7 +364,8 @@ static inline struct task_smack *smack_cred(const struct cred *cred) static inline struct smack_known **smack_file(const struct file *file) { - return (struct smack_known **)&file->f_security; + return (struct smack_known **)(file->f_security + + smack_blob_sizes.lbs_file); } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 8f72641f94ab..7c76668ea3a6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1495,18 +1495,6 @@ static int smack_file_alloc_security(struct file *file) return 0; } -/** - * smack_file_free_security - clear a file security blob - * @file: the object - * - * The security blob for a file is a pointer to the master - * label list, so no memory is freed. - */ -static void smack_file_free_security(struct file *file) -{ - file->f_security = NULL; -} - /** * smack_file_ioctl - Smack check on ioctls * @file: the object @@ -4559,6 +4547,7 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), + .lbs_file = sizeof(struct smack_known *), }; static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { @@ -4595,7 +4584,6 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(inode_getsecid, smack_inode_getsecid), LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security), - LSM_HOOK_INIT(file_free_security, smack_file_free_security), LSM_HOOK_INIT(file_ioctl, smack_file_ioctl), LSM_HOOK_INIT(file_lock, smack_file_lock), LSM_HOOK_INIT(file_fcntl, smack_file_fcntl), -- cgit From afb1cbe37440c7f38b9cf46fc331cc9dfd5cce21 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:19:29 -0700 Subject: LSM: Infrastructure management of the inode security Move management of the inode->i_security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 3 ++ security/security.c | 64 +++++++++++++++++++++++++++++++-- security/selinux/hooks.c | 37 ++++--------------- security/selinux/include/objsec.h | 9 +++-- security/smack/smack.h | 2 +- security/smack/smack_lsm.c | 76 +++++++++------------------------------ 6 files changed, 93 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e8cef019b645..1c798e842de2 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2033,6 +2033,7 @@ struct security_hook_list { struct lsm_blob_sizes { int lbs_cred; int lbs_file; + int lbs_inode; }; /* @@ -2104,6 +2105,8 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ +extern int lsm_inode_alloc(struct inode *inode); + #ifdef CONFIG_SECURITY void __init lsm_early_cred(struct cred *cred); #endif diff --git a/security/security.c b/security/security.c index f32d7d2075c6..4989fb65e662 100644 --- a/security/security.c +++ b/security/security.c @@ -41,6 +41,7 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init; static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain); static struct kmem_cache *lsm_file_cache; +static struct kmem_cache *lsm_inode_cache; char *lsm_names; static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init; @@ -161,6 +162,13 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file); + /* + * The inode blob gets an rcu_head in addition to + * what the modules might need. + */ + if (needed->lbs_inode && blob_sizes.lbs_inode == 0) + blob_sizes.lbs_inode = sizeof(struct rcu_head); + lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); } /* Prepare LSM for initialization. */ @@ -283,6 +291,7 @@ static void __init ordered_lsm_init(void) init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); + init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); /* * Create any kmem_caches needed for blobs @@ -291,6 +300,10 @@ static void __init ordered_lsm_init(void) lsm_file_cache = kmem_cache_create("lsm_file_cache", blob_sizes.lbs_file, 0, SLAB_PANIC, NULL); + if (blob_sizes.lbs_inode) + lsm_inode_cache = kmem_cache_create("lsm_inode_cache", + blob_sizes.lbs_inode, 0, + SLAB_PANIC, NULL); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -481,6 +494,27 @@ static int lsm_file_alloc(struct file *file) return 0; } +/** + * lsm_inode_alloc - allocate a composite inode blob + * @inode: the inode that needs a blob + * + * Allocate the inode blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_inode_alloc(struct inode *inode) +{ + if (!lsm_inode_cache) { + inode->i_security = NULL; + return 0; + } + + inode->i_security = kmem_cache_zalloc(lsm_inode_cache, GFP_NOFS); + if (inode->i_security == NULL) + return -ENOMEM; + return 0; +} + /* * Hook list operation macros. * @@ -740,14 +774,40 @@ EXPORT_SYMBOL(security_add_mnt_opt); int security_inode_alloc(struct inode *inode) { - inode->i_security = NULL; - return call_int_hook(inode_alloc_security, 0, inode); + int rc = lsm_inode_alloc(inode); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(inode_alloc_security, 0, inode); + if (unlikely(rc)) + security_inode_free(inode); + return rc; +} + +static void inode_free_by_rcu(struct rcu_head *head) +{ + /* + * The rcu head is at the start of the inode blob + */ + kmem_cache_free(lsm_inode_cache, head); } void security_inode_free(struct inode *inode) { integrity_inode_free(inode); call_void_hook(inode_free_security, inode); + /* + * The inode may still be referenced in a path walk and + * a call to security_inode_permission() can be made + * after inode_free_security() is called. Ideally, the VFS + * wouldn't do this, but fixing that is a much harder + * job. For now, simply free the i_security via RCU, and + * leave the current inode->i_security pointer intact. + * The inode will be freed after the RCU grace period too. + */ + if (inode->i_security) + call_rcu((struct rcu_head *)inode->i_security, + inode_free_by_rcu); } int security_dentry_init_security(struct dentry *dentry, int mode, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2d691e8dfbbf..23da46cd6e37 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -145,8 +145,6 @@ static int __init checkreqprot_setup(char *str) } __setup("checkreqprot=", checkreqprot_setup); -static struct kmem_cache *sel_inode_cache; - /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled * @@ -242,13 +240,9 @@ static inline u32 task_sid(const struct task_struct *task) static int inode_alloc_security(struct inode *inode) { - struct inode_security_struct *isec; + struct inode_security_struct *isec = selinux_inode(inode); u32 sid = current_sid(); - isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); - if (!isec) - return -ENOMEM; - spin_lock_init(&isec->lock); INIT_LIST_HEAD(&isec->list); isec->inode = inode; @@ -256,7 +250,6 @@ static int inode_alloc_security(struct inode *inode) isec->sclass = SECCLASS_FILE; isec->task_sid = sid; isec->initialized = LABEL_INVALID; - inode->i_security = isec; return 0; } @@ -334,19 +327,14 @@ static struct inode_security_struct *backing_inode_security(struct dentry *dentr return selinux_inode(inode); } -static void inode_free_rcu(struct rcu_head *head) -{ - struct inode_security_struct *isec; - - isec = container_of(head, struct inode_security_struct, rcu); - kmem_cache_free(sel_inode_cache, isec); -} - static void inode_free_security(struct inode *inode) { struct inode_security_struct *isec = selinux_inode(inode); - struct superblock_security_struct *sbsec = inode->i_sb->s_security; + struct superblock_security_struct *sbsec; + if (!isec) + return; + sbsec = inode->i_sb->s_security; /* * As not all inode security structures are in a list, we check for * empty list outside of the lock to make sure that we won't waste @@ -362,17 +350,6 @@ static void inode_free_security(struct inode *inode) list_del_init(&isec->list); spin_unlock(&sbsec->isec_lock); } - - /* - * The inode may still be referenced in a path walk and - * a call to selinux_inode_permission() can be made - * after inode_free_security() is called. Ideally, the VFS - * wouldn't do this, but fixing that is a much harder - * job. For now, simply free the i_security via RCU, and - * leave the current inode->i_security pointer intact. - * The inode will be freed after the RCU grace period too. - */ - call_rcu(&isec->rcu, inode_free_rcu); } static int file_alloc_security(struct file *file) @@ -6629,6 +6606,7 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), .lbs_file = sizeof(struct file_security_struct), + .lbs_inode = sizeof(struct inode_security_struct), }; static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { @@ -6881,9 +6859,6 @@ static __init int selinux_init(void) default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC); - sel_inode_cache = kmem_cache_create("selinux_inode_security", - sizeof(struct inode_security_struct), - 0, SLAB_PANIC, NULL); avc_init(); avtab_cache_init(); diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 26b4ff6b4d81..562fad58c56b 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -57,10 +57,7 @@ enum label_initialized { struct inode_security_struct { struct inode *inode; /* back pointer to inode object */ - union { - struct list_head list; /* list of inode_security_struct */ - struct rcu_head rcu; /* for freeing the inode_security_struct */ - }; + struct list_head list; /* list of inode_security_struct */ u32 task_sid; /* SID of creating task */ u32 sid; /* SID of this object */ u16 sclass; /* security class of this object */ @@ -173,7 +170,9 @@ static inline struct file_security_struct *selinux_file(const struct file *file) static inline struct inode_security_struct *selinux_inode( const struct inode *inode) { - return inode->i_security; + if (unlikely(!inode->i_security)) + return NULL; + return inode->i_security + selinux_blob_sizes.lbs_inode; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 436231dfae33..bf0abc35ca1c 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -370,7 +370,7 @@ static inline struct smack_known **smack_file(const struct file *file) static inline struct inode_smack *smack_inode(const struct inode *inode) { - return inode->i_security; + return inode->i_security + smack_blob_sizes.lbs_inode; } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index ddffda39d107..804897c82810 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -305,24 +305,18 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip, } /** - * new_inode_smack - allocate an inode security blob + * init_inode_smack - initialize an inode security blob + * @isp: the blob to initialize * @skp: a pointer to the Smack label entry to use in the blob * - * Returns the new blob or NULL if there's no memory available */ -static struct inode_smack *new_inode_smack(struct smack_known *skp) +static void init_inode_smack(struct inode *inode, struct smack_known *skp) { - struct inode_smack *isp; - - isp = kmem_cache_zalloc(smack_inode_cache, GFP_NOFS); - if (isp == NULL) - return NULL; + struct inode_smack *isp = smack_inode(inode); isp->smk_inode = skp; isp->smk_flags = 0; mutex_init(&isp->smk_lock); - - return isp; } /** @@ -709,6 +703,13 @@ static int smack_set_mnt_opts(struct super_block *sb, if (sp->smk_flags & SMK_SB_INITIALIZED) return 0; + if (inode->i_security == NULL) { + int rc = lsm_inode_alloc(inode); + + if (rc) + return rc; + } + if (!smack_privileged(CAP_MAC_ADMIN)) { /* * Unprivileged mounts don't get to specify Smack values. @@ -773,17 +774,12 @@ static int smack_set_mnt_opts(struct super_block *sb, /* * Initialize the root inode. */ - isp = smack_inode(inode); - if (isp == NULL) { - isp = new_inode_smack(sp->smk_root); - if (isp == NULL) - return -ENOMEM; - inode->i_security = isp; - } else - isp->smk_inode = sp->smk_root; + init_inode_smack(inode, sp->smk_root); - if (transmute) + if (transmute) { + isp = smack_inode(inode); isp->smk_flags |= SMK_INODE_TRANSMUTE; + } return 0; } @@ -881,48 +877,10 @@ static int smack_inode_alloc_security(struct inode *inode) { struct smack_known *skp = smk_of_current(); - inode->i_security = new_inode_smack(skp); - if (inode->i_security == NULL) - return -ENOMEM; + init_inode_smack(inode, skp); return 0; } -/** - * smack_inode_free_rcu - Free inode_smack blob from cache - * @head: the rcu_head for getting inode_smack pointer - * - * Call back function called from call_rcu() to free - * the i_security blob pointer in inode - */ -static void smack_inode_free_rcu(struct rcu_head *head) -{ - struct inode_smack *issp; - - issp = container_of(head, struct inode_smack, smk_rcu); - kmem_cache_free(smack_inode_cache, issp); -} - -/** - * smack_inode_free_security - free an inode blob using call_rcu() - * @inode: the inode with a blob - * - * Clears the blob pointer in inode using RCU - */ -static void smack_inode_free_security(struct inode *inode) -{ - struct inode_smack *issp = smack_inode(inode); - - /* - * The inode may still be referenced in a path walk and - * a call to smack_inode_permission() can be made - * after smack_inode_free_security() is called. - * To avoid race condition free the i_security via RCU - * and leave the current inode->i_security pointer intact. - * The inode will be freed after the RCU grace period too. - */ - call_rcu(&issp->smk_rcu, smack_inode_free_rcu); -} - /** * smack_inode_init_security - copy out the smack from an inode * @inode: the newly created inode @@ -4548,6 +4506,7 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), .lbs_file = sizeof(struct smack_known *), + .lbs_inode = sizeof(struct inode_smack), }; static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { @@ -4565,7 +4524,6 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bprm_set_creds, smack_bprm_set_creds), LSM_HOOK_INIT(inode_alloc_security, smack_inode_alloc_security), - LSM_HOOK_INIT(inode_free_security, smack_inode_free_security), LSM_HOOK_INIT(inode_init_security, smack_inode_init_security), LSM_HOOK_INIT(inode_link, smack_inode_link), LSM_HOOK_INIT(inode_unlink, smack_inode_unlink), -- cgit From f4ad8f2c40769b3cc9497ba0883bbaf823f7752f Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:19:37 -0700 Subject: LSM: Infrastructure management of the task security Move management of the task_struct->security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. The only user of this blob is AppArmor. The AppArmor use is abstracted to avoid future conflict. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 2 ++ security/apparmor/include/task.h | 18 +++----------- security/apparmor/lsm.c | 15 +++-------- security/security.c | 54 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 62 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 1c798e842de2..9b39fefa88c4 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2034,6 +2034,7 @@ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_inode; + int lbs_task; }; /* @@ -2109,6 +2110,7 @@ extern int lsm_inode_alloc(struct inode *inode); #ifdef CONFIG_SECURITY void __init lsm_early_cred(struct cred *cred); +void __init lsm_early_task(struct task_struct *task); #endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/apparmor/include/task.h b/security/apparmor/include/task.h index 55edaa1d83f8..039c1e60887a 100644 --- a/security/apparmor/include/task.h +++ b/security/apparmor/include/task.h @@ -14,7 +14,10 @@ #ifndef __AA_TASK_H #define __AA_TASK_H -#define task_ctx(X) ((X)->security) +static inline struct aa_task_ctx *task_ctx(struct task_struct *task) +{ + return task->security; +} /* * struct aa_task_ctx - information for current task label change @@ -36,17 +39,6 @@ int aa_set_current_hat(struct aa_label *label, u64 token); int aa_restore_previous_label(u64 cookie); struct aa_label *aa_get_task_label(struct task_struct *task); -/** - * aa_alloc_task_ctx - allocate a new task_ctx - * @flags: gfp flags for allocation - * - * Returns: allocated buffer or NULL on failure - */ -static inline struct aa_task_ctx *aa_alloc_task_ctx(gfp_t flags) -{ - return kzalloc(sizeof(struct aa_task_ctx), flags); -} - /** * aa_free_task_ctx - free a task_ctx * @ctx: task_ctx to free (MAYBE NULL) @@ -57,8 +49,6 @@ static inline void aa_free_task_ctx(struct aa_task_ctx *ctx) aa_put_label(ctx->nnp); aa_put_label(ctx->previous); aa_put_label(ctx->onexec); - - kzfree(ctx); } } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 6821187b06ad..60ef71268ccf 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -94,19 +94,14 @@ static void apparmor_task_free(struct task_struct *task) { aa_free_task_ctx(task_ctx(task)); - task_ctx(task) = NULL; } static int apparmor_task_alloc(struct task_struct *task, unsigned long clone_flags) { - struct aa_task_ctx *new = aa_alloc_task_ctx(GFP_KERNEL); - - if (!new) - return -ENOMEM; + struct aa_task_ctx *new = task_ctx(task); aa_dup_task_ctx(new, task_ctx(current)); - task_ctx(task) = new; return 0; } @@ -1157,6 +1152,7 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct aa_task_ctx *), .lbs_file = sizeof(struct aa_file_ctx), + .lbs_task = sizeof(struct aa_task_ctx), }; static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { @@ -1487,15 +1483,10 @@ static int param_set_mode(const char *val, const struct kernel_param *kp) static int __init set_init_ctx(void) { struct cred *cred = (struct cred *)current->real_cred; - struct aa_task_ctx *ctx; - - ctx = aa_alloc_task_ctx(GFP_KERNEL); - if (!ctx) - return -ENOMEM; lsm_early_cred(cred); + lsm_early_task(current); set_cred_label(cred, aa_get_label(ns_unconfined(root_ns))); - task_ctx(current) = ctx; return 0; } diff --git a/security/security.c b/security/security.c index 4989fb65e662..e59a1e1514ee 100644 --- a/security/security.c +++ b/security/security.c @@ -169,6 +169,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) if (needed->lbs_inode && blob_sizes.lbs_inode == 0) blob_sizes.lbs_inode = sizeof(struct rcu_head); lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); + lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task); } /* Prepare LSM for initialization. */ @@ -292,6 +293,7 @@ static void __init ordered_lsm_init(void) init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); + init_debug("task blob size = %d\n", blob_sizes.lbs_task); /* * Create any kmem_caches needed for blobs @@ -515,6 +517,46 @@ int lsm_inode_alloc(struct inode *inode) return 0; } +/** + * lsm_task_alloc - allocate a composite task blob + * @task: the task that needs a blob + * + * Allocate the task blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_task_alloc(struct task_struct *task) +{ + if (blob_sizes.lbs_task == 0) { + task->security = NULL; + return 0; + } + + task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL); + if (task->security == NULL) + return -ENOMEM; + return 0; +} + +/** + * lsm_early_task - during initialization allocate a composite task blob + * @task: the task that needs a blob + * + * Allocate the task blob for all the modules if it's not already there + */ +void __init lsm_early_task(struct task_struct *task) +{ + int rc; + + if (task == NULL) + panic("%s: task cred.\n", __func__); + if (task->security != NULL) + return; + rc = lsm_task_alloc(task); + if (rc) + panic("%s: Early task alloc failed.\n", __func__); +} + /* * Hook list operation macros. * @@ -1359,12 +1401,22 @@ int security_file_open(struct file *file) int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { - return call_int_hook(task_alloc, 0, task, clone_flags); + int rc = lsm_task_alloc(task); + + if (rc) + return rc; + rc = call_int_hook(task_alloc, 0, task, clone_flags); + if (unlikely(rc)) + security_task_free(task); + return rc; } void security_task_free(struct task_struct *task) { call_void_hook(task_free, task); + + kfree(task->security); + task->security = NULL; } int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) -- cgit From ecd5f82e05ddd9b06c258167ec7467ac79741d77 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 20 Nov 2018 11:55:02 -0800 Subject: LSM: Infrastructure management of the ipc security blob Move management of the kern_ipc_perm->security and msg_msg->security blobs out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 2 + security/security.c | 91 ++++++++++++++++++++++++++++++++++-- security/selinux/hooks.c | 98 ++++++--------------------------------- security/selinux/include/objsec.h | 4 +- security/smack/smack.h | 4 +- security/smack/smack_lsm.c | 32 ++----------- 6 files changed, 110 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9b39fefa88c4..40511a8a5ae6 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2034,6 +2034,8 @@ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_inode; + int lbs_ipc; + int lbs_msg_msg; int lbs_task; }; diff --git a/security/security.c b/security/security.c index e59a1e1514ee..953fc3ea18a9 100644 --- a/security/security.c +++ b/security/security.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #define MAX_LSM_EVM_XATTR 2 @@ -169,6 +170,8 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) if (needed->lbs_inode && blob_sizes.lbs_inode == 0) blob_sizes.lbs_inode = sizeof(struct rcu_head); lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); + lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc); + lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg); lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task); } @@ -293,6 +296,8 @@ static void __init ordered_lsm_init(void) init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); + init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc); + init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg); init_debug("task blob size = %d\n", blob_sizes.lbs_task); /* @@ -538,6 +543,48 @@ int lsm_task_alloc(struct task_struct *task) return 0; } +/** + * lsm_ipc_alloc - allocate a composite ipc blob + * @kip: the ipc that needs a blob + * + * Allocate the ipc blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_ipc_alloc(struct kern_ipc_perm *kip) +{ + if (blob_sizes.lbs_ipc == 0) { + kip->security = NULL; + return 0; + } + + kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL); + if (kip->security == NULL) + return -ENOMEM; + return 0; +} + +/** + * lsm_msg_msg_alloc - allocate a composite msg_msg blob + * @mp: the msg_msg that needs a blob + * + * Allocate the ipc blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_msg_msg_alloc(struct msg_msg *mp) +{ + if (blob_sizes.lbs_msg_msg == 0) { + mp->security = NULL; + return 0; + } + + mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL); + if (mp->security == NULL) + return -ENOMEM; + return 0; +} + /** * lsm_early_task - during initialization allocate a composite task blob * @task: the task that needs a blob @@ -1631,22 +1678,40 @@ void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) int security_msg_msg_alloc(struct msg_msg *msg) { - return call_int_hook(msg_msg_alloc_security, 0, msg); + int rc = lsm_msg_msg_alloc(msg); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(msg_msg_alloc_security, 0, msg); + if (unlikely(rc)) + security_msg_msg_free(msg); + return rc; } void security_msg_msg_free(struct msg_msg *msg) { call_void_hook(msg_msg_free_security, msg); + kfree(msg->security); + msg->security = NULL; } int security_msg_queue_alloc(struct kern_ipc_perm *msq) { - return call_int_hook(msg_queue_alloc_security, 0, msq); + int rc = lsm_ipc_alloc(msq); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(msg_queue_alloc_security, 0, msq); + if (unlikely(rc)) + security_msg_queue_free(msq); + return rc; } void security_msg_queue_free(struct kern_ipc_perm *msq) { call_void_hook(msg_queue_free_security, msq); + kfree(msq->security); + msq->security = NULL; } int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg) @@ -1673,12 +1738,21 @@ int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg, int security_shm_alloc(struct kern_ipc_perm *shp) { - return call_int_hook(shm_alloc_security, 0, shp); + int rc = lsm_ipc_alloc(shp); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(shm_alloc_security, 0, shp); + if (unlikely(rc)) + security_shm_free(shp); + return rc; } void security_shm_free(struct kern_ipc_perm *shp) { call_void_hook(shm_free_security, shp); + kfree(shp->security); + shp->security = NULL; } int security_shm_associate(struct kern_ipc_perm *shp, int shmflg) @@ -1698,12 +1772,21 @@ int security_shm_shmat(struct kern_ipc_perm *shp, char __user *shmaddr, int shmf int security_sem_alloc(struct kern_ipc_perm *sma) { - return call_int_hook(sem_alloc_security, 0, sma); + int rc = lsm_ipc_alloc(sma); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(sem_alloc_security, 0, sma); + if (unlikely(rc)) + security_sem_free(sma); + return rc; } void security_sem_free(struct kern_ipc_perm *sma) { call_void_hook(sem_free_security, sma); + kfree(sma->security); + sma->security = NULL; } int security_sem_associate(struct kern_ipc_perm *sma, int semflg) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4b64ad31326f..d98e1d8d18f6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5626,51 +5626,22 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) return selinux_nlmsg_perm(sk, skb); } -static int ipc_alloc_security(struct kern_ipc_perm *perm, - u16 sclass) +static void ipc_init_security(struct ipc_security_struct *isec, u16 sclass) { - struct ipc_security_struct *isec; - - isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); - if (!isec) - return -ENOMEM; - isec->sclass = sclass; isec->sid = current_sid(); - perm->security = isec; - - return 0; -} - -static void ipc_free_security(struct kern_ipc_perm *perm) -{ - struct ipc_security_struct *isec = perm->security; - perm->security = NULL; - kfree(isec); } static int msg_msg_alloc_security(struct msg_msg *msg) { struct msg_security_struct *msec; - msec = kzalloc(sizeof(struct msg_security_struct), GFP_KERNEL); - if (!msec) - return -ENOMEM; - + msec = selinux_msg_msg(msg); msec->sid = SECINITSID_UNLABELED; - msg->security = msec; return 0; } -static void msg_msg_free_security(struct msg_msg *msg) -{ - struct msg_security_struct *msec = msg->security; - - msg->security = NULL; - kfree(msec); -} - static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, u32 perms) { @@ -5692,11 +5663,6 @@ static int selinux_msg_msg_alloc_security(struct msg_msg *msg) return msg_msg_alloc_security(msg); } -static void selinux_msg_msg_free_security(struct msg_msg *msg) -{ - msg_msg_free_security(msg); -} - /* message queue security operations */ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) { @@ -5705,11 +5671,8 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) u32 sid = current_sid(); int rc; - rc = ipc_alloc_security(msq, SECCLASS_MSGQ); - if (rc) - return rc; - - isec = msq->security; + isec = selinux_ipc(msq); + ipc_init_security(isec, SECCLASS_MSGQ); ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->key; @@ -5717,16 +5680,7 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) rc = avc_has_perm(&selinux_state, sid, isec->sid, SECCLASS_MSGQ, MSGQ__CREATE, &ad); - if (rc) { - ipc_free_security(msq); - return rc; - } - return 0; -} - -static void selinux_msg_queue_free_security(struct kern_ipc_perm *msq) -{ - ipc_free_security(msq); + return rc; } static int selinux_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg) @@ -5856,11 +5810,8 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp) u32 sid = current_sid(); int rc; - rc = ipc_alloc_security(shp, SECCLASS_SHM); - if (rc) - return rc; - - isec = shp->security; + isec = selinux_ipc(shp); + ipc_init_security(isec, SECCLASS_SHM); ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = shp->key; @@ -5868,16 +5819,7 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp) rc = avc_has_perm(&selinux_state, sid, isec->sid, SECCLASS_SHM, SHM__CREATE, &ad); - if (rc) { - ipc_free_security(shp); - return rc; - } - return 0; -} - -static void selinux_shm_free_security(struct kern_ipc_perm *shp) -{ - ipc_free_security(shp); + return rc; } static int selinux_shm_associate(struct kern_ipc_perm *shp, int shmflg) @@ -5953,11 +5895,8 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma) u32 sid = current_sid(); int rc; - rc = ipc_alloc_security(sma, SECCLASS_SEM); - if (rc) - return rc; - - isec = sma->security; + isec = selinux_ipc(sma); + ipc_init_security(isec, SECCLASS_SEM); ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = sma->key; @@ -5965,16 +5904,7 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma) rc = avc_has_perm(&selinux_state, sid, isec->sid, SECCLASS_SEM, SEM__CREATE, &ad); - if (rc) { - ipc_free_security(sma); - return rc; - } - return 0; -} - -static void selinux_sem_free_security(struct kern_ipc_perm *sma) -{ - ipc_free_security(sma); + return rc; } static int selinux_sem_associate(struct kern_ipc_perm *sma, int semflg) @@ -6607,6 +6537,8 @@ struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), .lbs_file = sizeof(struct file_security_struct), .lbs_inode = sizeof(struct inode_security_struct), + .lbs_ipc = sizeof(struct ipc_security_struct), + .lbs_msg_msg = sizeof(struct msg_security_struct), }; static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { @@ -6718,24 +6650,20 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid), LSM_HOOK_INIT(msg_msg_alloc_security, selinux_msg_msg_alloc_security), - LSM_HOOK_INIT(msg_msg_free_security, selinux_msg_msg_free_security), LSM_HOOK_INIT(msg_queue_alloc_security, selinux_msg_queue_alloc_security), - LSM_HOOK_INIT(msg_queue_free_security, selinux_msg_queue_free_security), LSM_HOOK_INIT(msg_queue_associate, selinux_msg_queue_associate), LSM_HOOK_INIT(msg_queue_msgctl, selinux_msg_queue_msgctl), LSM_HOOK_INIT(msg_queue_msgsnd, selinux_msg_queue_msgsnd), LSM_HOOK_INIT(msg_queue_msgrcv, selinux_msg_queue_msgrcv), LSM_HOOK_INIT(shm_alloc_security, selinux_shm_alloc_security), - LSM_HOOK_INIT(shm_free_security, selinux_shm_free_security), LSM_HOOK_INIT(shm_associate, selinux_shm_associate), LSM_HOOK_INIT(shm_shmctl, selinux_shm_shmctl), LSM_HOOK_INIT(shm_shmat, selinux_shm_shmat), LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security), - LSM_HOOK_INIT(sem_free_security, selinux_sem_free_security), LSM_HOOK_INIT(sem_associate, selinux_sem_associate), LSM_HOOK_INIT(sem_semctl, selinux_sem_semctl), LSM_HOOK_INIT(sem_semop, selinux_sem_semop), diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 539cacf4a572..231262d8eac9 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -179,13 +179,13 @@ static inline struct inode_security_struct *selinux_inode( static inline struct msg_security_struct *selinux_msg_msg( const struct msg_msg *msg_msg) { - return msg_msg->security; + return msg_msg->security + selinux_blob_sizes.lbs_msg_msg; } static inline struct ipc_security_struct *selinux_ipc( const struct kern_ipc_perm *ipc) { - return ipc->security; + return ipc->security + selinux_blob_sizes.lbs_ipc; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 0adddbeecc62..9c7c95a5c497 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -376,12 +376,12 @@ static inline struct inode_smack *smack_inode(const struct inode *inode) static inline struct smack_known **smack_msg_msg(const struct msg_msg *msg) { - return (struct smack_known **)&msg->security; + return msg->security + smack_blob_sizes.lbs_msg_msg; } static inline struct smack_known **smack_ipc(const struct kern_ipc_perm *ipc) { - return (struct smack_known **)&ipc->security; + return ipc->security + smack_blob_sizes.lbs_ipc; } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 154521b6843b..0b848b1f6366 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2809,23 +2809,12 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - struct smack_known *skp = smk_of_current(); + struct smack_known **blob = smack_msg_msg(msg); - msg->security = skp; + *blob = smk_of_current(); return 0; } -/** - * smack_msg_msg_free_security - Clear the security blob for msg_msg - * @msg: the object - * - * Clears the blob pointer - */ -static void smack_msg_msg_free_security(struct msg_msg *msg) -{ - msg->security = NULL; -} - /** * smack_of_ipc - the smack pointer for the ipc * @isp: the object @@ -2853,17 +2842,6 @@ static int smack_ipc_alloc_security(struct kern_ipc_perm *isp) return 0; } -/** - * smack_ipc_free_security - Clear the security blob for ipc - * @isp: the object - * - * Clears the blob pointer - */ -static void smack_ipc_free_security(struct kern_ipc_perm *isp) -{ - isp->security = NULL; -} - /** * smk_curacc_shm : check if current has access on shm * @isp : the object @@ -4511,6 +4489,8 @@ struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), .lbs_file = sizeof(struct smack_known *), .lbs_inode = sizeof(struct inode_smack), + .lbs_ipc = sizeof(struct smack_known *), + .lbs_msg_msg = sizeof(struct smack_known *), }; static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { @@ -4581,23 +4561,19 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ipc_getsecid, smack_ipc_getsecid), LSM_HOOK_INIT(msg_msg_alloc_security, smack_msg_msg_alloc_security), - LSM_HOOK_INIT(msg_msg_free_security, smack_msg_msg_free_security), LSM_HOOK_INIT(msg_queue_alloc_security, smack_ipc_alloc_security), - LSM_HOOK_INIT(msg_queue_free_security, smack_ipc_free_security), LSM_HOOK_INIT(msg_queue_associate, smack_msg_queue_associate), LSM_HOOK_INIT(msg_queue_msgctl, smack_msg_queue_msgctl), LSM_HOOK_INIT(msg_queue_msgsnd, smack_msg_queue_msgsnd), LSM_HOOK_INIT(msg_queue_msgrcv, smack_msg_queue_msgrcv), LSM_HOOK_INIT(shm_alloc_security, smack_ipc_alloc_security), - LSM_HOOK_INIT(shm_free_security, smack_ipc_free_security), LSM_HOOK_INIT(shm_associate, smack_shm_associate), LSM_HOOK_INIT(shm_shmctl, smack_shm_shmctl), LSM_HOOK_INIT(shm_shmat, smack_shm_shmat), LSM_HOOK_INIT(sem_alloc_security, smack_ipc_alloc_security), - LSM_HOOK_INIT(sem_free_security, smack_ipc_free_security), LSM_HOOK_INIT(sem_associate, smack_sem_associate), LSM_HOOK_INIT(sem_semctl, smack_sem_semctl), LSM_HOOK_INIT(sem_semop, smack_sem_semop), -- cgit From 0ada768517dafa1504ef5986ba04f118b7436960 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Jan 2019 16:07:27 +0200 Subject: RDMA/mlx5: Delete declaration of already removed function The implementation of mlx5_core_page_fault_resume() was removed in commit d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA"). This patch removes declaration too. Fixes: d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..b6f5839f129a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -939,10 +939,6 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, size_t sz); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error); -#endif int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); -- cgit From 7b5618f4b834330a052958db934c3dffad4a15c2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Jan 2019 12:15:53 +0100 Subject: ACPI / PMIC: Add support for executing PMIC MIPI sequence elements DSI LCD panels describe an initialization sequence in the Video BIOS Tables using so called MIPI sequences. One possible element in these sequences is a PMIC specific element of 15 bytes. Although this is not really an ACPI opregion, the ACPI opregion code is the closest thing we have. We need to have support for these PMIC specific MIPI sequence elements somwhere. Since we already instantiate a special platform device for Intel PMICs for the ACPI PMIC OpRegion handler to bind to, with PMIC specific implementations of the OpRegion, the handling of MIPI sequence PMIC elements fits very well in the ACPI PMIC OpRegion code. This commit adds a new intel_soc_pmic_exec_mipi_pmic_seq_element() function, which is to be backed by a PMIC specific exec_mipi_pmic_seq_element callback. This function will be called by the i915 code to execture MIPI sequence PMIC elements. Reviewed-by: Mika Westerberg Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20190107111556.4510-2-hdegoede@redhat.com --- drivers/acpi/pmic/intel_pmic.c | 52 ++++++++++++++++++++++++++++++++++++++ drivers/acpi/pmic/intel_pmic.h | 2 ++ include/linux/mfd/intel_soc_pmic.h | 3 +++ 3 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/pmic/intel_pmic.c b/drivers/acpi/pmic/intel_pmic.c index ca18e0d23df9..471afeea87c2 100644 --- a/drivers/acpi/pmic/intel_pmic.c +++ b/drivers/acpi/pmic/intel_pmic.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include "intel_pmic.h" @@ -36,6 +37,8 @@ struct intel_pmic_opregion { struct intel_pmic_regs_handler_ctx ctx; }; +static struct intel_pmic_opregion *intel_pmic_opregion; + static int pmic_get_reg_bit(int address, struct pmic_table *table, int count, int *reg, int *bit) { @@ -304,6 +307,7 @@ int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle, } opregion->data = d; + intel_pmic_opregion = opregion; return 0; out_remove_thermal_handler: @@ -319,3 +323,51 @@ out_error: return ret; } EXPORT_SYMBOL_GPL(intel_pmic_install_opregion_handler); + +/** + * intel_soc_pmic_exec_mipi_pmic_seq_element - Execute PMIC MIPI sequence + * @i2c_address: I2C client address for the PMIC + * @reg_address: PMIC register address + * @value: New value for the register bits to change + * @mask: Mask indicating which register bits to change + * + * DSI LCD panels describe an initialization sequence in the i915 VBT (Video + * BIOS Tables) using so called MIPI sequences. One possible element in these + * sequences is a PMIC specific element of 15 bytes. + * + * This function executes these PMIC specific elements sending the embedded + * commands to the PMIC. + * + * Return 0 on success, < 0 on failure. + */ +int intel_soc_pmic_exec_mipi_pmic_seq_element(u16 i2c_address, u32 reg_address, + u32 value, u32 mask) +{ + struct intel_pmic_opregion_data *d; + int ret; + + if (!intel_pmic_opregion) { + pr_warn("%s: No PMIC registered\n", __func__); + return -ENXIO; + } + + d = intel_pmic_opregion->data; + + mutex_lock(&intel_pmic_opregion->lock); + + if (d->exec_mipi_pmic_seq_element) { + ret = d->exec_mipi_pmic_seq_element(intel_pmic_opregion->regmap, + i2c_address, reg_address, + value, mask); + } else { + pr_warn("%s: Not implemented\n", __func__); + pr_warn("%s: i2c-addr: 0x%x reg-addr 0x%x value 0x%x mask 0x%x\n", + __func__, i2c_address, reg_address, value, mask); + ret = -EOPNOTSUPP; + } + + mutex_unlock(&intel_pmic_opregion->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_soc_pmic_exec_mipi_pmic_seq_element); diff --git a/drivers/acpi/pmic/intel_pmic.h b/drivers/acpi/pmic/intel_pmic.h index 095afc96952e..5cd195fabca8 100644 --- a/drivers/acpi/pmic/intel_pmic.h +++ b/drivers/acpi/pmic/intel_pmic.h @@ -15,6 +15,8 @@ struct intel_pmic_opregion_data { int (*update_aux)(struct regmap *r, int reg, int raw_temp); int (*get_policy)(struct regmap *r, int reg, int bit, u64 *value); int (*update_policy)(struct regmap *r, int reg, int bit, int enable); + int (*exec_mipi_pmic_seq_element)(struct regmap *r, u16 i2c_address, + u32 reg_address, u32 value, u32 mask); struct pmic_table *power_table; int power_table_count; struct pmic_table *thermal_table; diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index ed1dfba5e5f9..bfecd6bd4990 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -26,4 +26,7 @@ struct intel_soc_pmic { struct device *dev; }; +int intel_soc_pmic_exec_mipi_pmic_seq_element(u16 i2c_address, u32 reg_address, + u32 value, u32 mask); + #endif /* __INTEL_SOC_PMIC_H__ */ -- cgit From 5e6acc3e678ed3db746ab4fb53a980861cd711b6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Dec 2018 15:51:47 -0800 Subject: bcm2835-pm: Move bcm2835-watchdog's DT probe to an MFD. The PM block that the wdt driver was binding to actually has multiple features we want to expose (power domains, reset, watchdog). Move the DT attachment to a MFD driver and make WDT probe against MFD. Signed-off-by: Eric Anholt Reviewed-by: Guenter Roeck Acked-by: Stefan Wahren Signed-off-by: Stefan Wahren --- arch/arm/mach-bcm/Kconfig | 1 + drivers/mfd/Makefile | 1 + drivers/mfd/bcm2835-pm.c | 64 ++++++++++++++++++++++++++++++++++++++++++ drivers/watchdog/bcm2835_wdt.c | 26 ++++++----------- include/linux/mfd/bcm2835-pm.h | 13 +++++++++ 5 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 drivers/mfd/bcm2835-pm.c create mode 100644 include/linux/mfd/bcm2835-pm.h (limited to 'include/linux') diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index a067adf9f1ee..4ef1e55f4a0b 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -167,6 +167,7 @@ config ARCH_BCM2835 select BCM2835_TIMER select PINCTRL select PINCTRL_BCM2835 + select MFD_CORE help This enables support for the Broadcom BCM2835 and BCM2836 SoCs. This SoC is used in the Raspberry Pi and Roku 2 devices. diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 12980a4ad460..ee6fb6af655e 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_MFD_88PM805) += 88pm805.o 88pm80x.o obj-$(CONFIG_MFD_ACT8945A) += act8945a.o obj-$(CONFIG_MFD_SM501) += sm501.o obj-$(CONFIG_MFD_ASIC3) += asic3.o tmio_core.o +obj-$(CONFIG_ARCH_BCM2835) += bcm2835-pm.o obj-$(CONFIG_MFD_BCM590XX) += bcm590xx.o obj-$(CONFIG_MFD_BD9571MWV) += bd9571mwv.o cros_ec_core-objs := cros_ec.o diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c new file mode 100644 index 000000000000..53839e6a81e7 --- /dev/null +++ b/drivers/mfd/bcm2835-pm.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PM MFD driver for Broadcom BCM2835 + * + * This driver binds to the PM block and creates the MFD device for + * the WDT driver. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct mfd_cell bcm2835_pm_devs[] = { + { .name = "bcm2835-wdt" }, +}; + +static int bcm2835_pm_probe(struct platform_device *pdev) +{ + struct resource *res; + struct device *dev = &pdev->dev; + struct bcm2835_pm *pm; + + pm = devm_kzalloc(dev, sizeof(*pm), GFP_KERNEL); + if (!pm) + return -ENOMEM; + platform_set_drvdata(pdev, pm); + + pm->dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pm->base = devm_ioremap_resource(dev, res); + if (IS_ERR(pm->base)) + return PTR_ERR(pm->base); + + return devm_mfd_add_devices(dev, -1, + bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs), + NULL, 0, NULL); +} + +static const struct of_device_id bcm2835_pm_of_match[] = { + { .compatible = "brcm,bcm2835-pm-wdt", }, + {}, +}; +MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match); + +static struct platform_driver bcm2835_pm_driver = { + .probe = bcm2835_pm_probe, + .driver = { + .name = "bcm2835-pm", + .of_match_table = bcm2835_pm_of_match, + }, +}; +module_platform_driver(bcm2835_pm_driver); + +MODULE_AUTHOR("Eric Anholt "); +MODULE_DESCRIPTION("Driver for Broadcom BCM2835 PM MFD"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c index ed05514cc2dc..1834524ae373 100644 --- a/drivers/watchdog/bcm2835_wdt.c +++ b/drivers/watchdog/bcm2835_wdt.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -47,6 +48,8 @@ struct bcm2835_wdt { spinlock_t lock; }; +static struct bcm2835_wdt *bcm2835_power_off_wdt; + static unsigned int heartbeat; static bool nowayout = WATCHDOG_NOWAYOUT; @@ -148,10 +151,7 @@ static struct watchdog_device bcm2835_wdt_wdd = { */ static void bcm2835_power_off(void) { - struct device_node *np = - of_find_compatible_node(NULL, NULL, "brcm,bcm2835-pm-wdt"); - struct platform_device *pdev = of_find_device_by_node(np); - struct bcm2835_wdt *wdt = platform_get_drvdata(pdev); + struct bcm2835_wdt *wdt = bcm2835_power_off_wdt; u32 val; /* @@ -169,7 +169,7 @@ static void bcm2835_power_off(void) static int bcm2835_wdt_probe(struct platform_device *pdev) { - struct resource *res; + struct bcm2835_pm *pm = dev_get_drvdata(pdev->dev.parent); struct device *dev = &pdev->dev; struct bcm2835_wdt *wdt; int err; @@ -181,10 +181,7 @@ static int bcm2835_wdt_probe(struct platform_device *pdev) spin_lock_init(&wdt->lock); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - wdt->base = devm_ioremap_resource(dev, res); - if (IS_ERR(wdt->base)) - return PTR_ERR(wdt->base); + wdt->base = pm->base; watchdog_set_drvdata(&bcm2835_wdt_wdd, wdt); watchdog_init_timeout(&bcm2835_wdt_wdd, heartbeat, dev); @@ -211,8 +208,10 @@ static int bcm2835_wdt_probe(struct platform_device *pdev) return err; } - if (pm_power_off == NULL) + if (pm_power_off == NULL) { pm_power_off = bcm2835_power_off; + bcm2835_power_off_wdt = wdt; + } dev_info(dev, "Broadcom BCM2835 watchdog timer"); return 0; @@ -226,18 +225,11 @@ static int bcm2835_wdt_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id bcm2835_wdt_of_match[] = { - { .compatible = "brcm,bcm2835-pm-wdt", }, - {}, -}; -MODULE_DEVICE_TABLE(of, bcm2835_wdt_of_match); - static struct platform_driver bcm2835_wdt_driver = { .probe = bcm2835_wdt_probe, .remove = bcm2835_wdt_remove, .driver = { .name = "bcm2835-wdt", - .of_match_table = bcm2835_wdt_of_match, }, }; module_platform_driver(bcm2835_wdt_driver); diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h new file mode 100644 index 000000000000..b7d0ee1feffa --- /dev/null +++ b/include/linux/mfd/bcm2835-pm.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef BCM2835_MFD_PM_H +#define BCM2835_MFD_PM_H + +#include + +struct bcm2835_pm { + struct device *dev; + void __iomem *base; +}; + +#endif /* BCM2835_MFD_PM_H */ -- cgit From 670c672608a1ffcbc7ac0f872734843593bb8b15 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Dec 2018 15:51:48 -0800 Subject: soc: bcm: bcm2835-pm: Add support for power domains under a new binding. This provides a free software alternative to raspberrypi-power.c's firmware calls to manage power domains. It also exposes a reset line, where previously the vc4 driver had to try to force power off the domain in order to trigger a reset. Signed-off-by: Eric Anholt Acked-by: Rob Herring Acked-by: Stefan Wahren Signed-off-by: Stefan Wahren --- drivers/mfd/bcm2835-pm.c | 36 +- drivers/soc/bcm/Kconfig | 11 + drivers/soc/bcm/Makefile | 1 + drivers/soc/bcm/bcm2835-power.c | 661 +++++++++++++++++++++++++++++++++++ include/dt-bindings/soc/bcm2835-pm.h | 28 ++ include/linux/mfd/bcm2835-pm.h | 1 + 6 files changed, 734 insertions(+), 4 deletions(-) create mode 100644 drivers/soc/bcm/bcm2835-power.c create mode 100644 include/dt-bindings/soc/bcm2835-pm.h (limited to 'include/linux') diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c index 53839e6a81e7..42fe67f1538e 100644 --- a/drivers/mfd/bcm2835-pm.c +++ b/drivers/mfd/bcm2835-pm.c @@ -3,7 +3,7 @@ * PM MFD driver for Broadcom BCM2835 * * This driver binds to the PM block and creates the MFD device for - * the WDT driver. + * the WDT and power drivers. */ #include @@ -21,11 +21,16 @@ static const struct mfd_cell bcm2835_pm_devs[] = { { .name = "bcm2835-wdt" }, }; +static const struct mfd_cell bcm2835_power_devs[] = { + { .name = "bcm2835-power" }, +}; + static int bcm2835_pm_probe(struct platform_device *pdev) { struct resource *res; struct device *dev = &pdev->dev; struct bcm2835_pm *pm; + int ret; pm = devm_kzalloc(dev, sizeof(*pm), GFP_KERNEL); if (!pm) @@ -39,13 +44,36 @@ static int bcm2835_pm_probe(struct platform_device *pdev) if (IS_ERR(pm->base)) return PTR_ERR(pm->base); - return devm_mfd_add_devices(dev, -1, - bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs), - NULL, 0, NULL); + ret = devm_mfd_add_devices(dev, -1, + bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs), + NULL, 0, NULL); + if (ret) + return ret; + + /* We'll use the presence of the AXI ASB regs in the + * bcm2835-pm binding as the key for whether we can reference + * the full PM register range and support power domains. + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (res) { + pm->asb = devm_ioremap_resource(dev, res); + if (IS_ERR(pm->asb)) + return PTR_ERR(pm->asb); + + ret = devm_mfd_add_devices(dev, -1, + bcm2835_power_devs, + ARRAY_SIZE(bcm2835_power_devs), + NULL, 0, NULL); + if (ret) + return ret; + } + + return 0; } static const struct of_device_id bcm2835_pm_of_match[] = { { .compatible = "brcm,bcm2835-pm-wdt", }, + { .compatible = "brcm,bcm2835-pm", }, {}, }; MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match); diff --git a/drivers/soc/bcm/Kconfig b/drivers/soc/bcm/Kconfig index 055a845ed979..fe1af29560e9 100644 --- a/drivers/soc/bcm/Kconfig +++ b/drivers/soc/bcm/Kconfig @@ -1,5 +1,16 @@ menu "Broadcom SoC drivers" +config BCM2835_POWER + bool "BCM2835 power domain driver" + depends on ARCH_BCM2835 || (COMPILE_TEST && OF) + select PM_GENERIC_DOMAINS if PM + select RESET_CONTROLLER + help + This enables support for the BCM2835 power domains and reset + controller. Any usage of power domains by the Raspberry Pi + firmware means that Linux usage of the same power domain + must be accessed using the RASPBERRYPI_POWER driver + config RASPBERRYPI_POWER bool "Raspberry Pi power domain driver" depends on ARCH_BCM2835 || (COMPILE_TEST && OF) diff --git a/drivers/soc/bcm/Makefile b/drivers/soc/bcm/Makefile index dc4fced72d21..c81df4b2403c 100644 --- a/drivers/soc/bcm/Makefile +++ b/drivers/soc/bcm/Makefile @@ -1,2 +1,3 @@ +obj-$(CONFIG_BCM2835_POWER) += bcm2835-power.o obj-$(CONFIG_RASPBERRYPI_POWER) += raspberrypi-power.o obj-$(CONFIG_SOC_BRCMSTB) += brcmstb/ diff --git a/drivers/soc/bcm/bcm2835-power.c b/drivers/soc/bcm/bcm2835-power.c new file mode 100644 index 000000000000..48412957ec7a --- /dev/null +++ b/drivers/soc/bcm/bcm2835-power.c @@ -0,0 +1,661 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Power domain driver for Broadcom BCM2835 + * + * Copyright (C) 2018 Broadcom + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PM_GNRIC 0x00 +#define PM_AUDIO 0x04 +#define PM_STATUS 0x18 +#define PM_RSTC 0x1c +#define PM_RSTS 0x20 +#define PM_WDOG 0x24 +#define PM_PADS0 0x28 +#define PM_PADS2 0x2c +#define PM_PADS3 0x30 +#define PM_PADS4 0x34 +#define PM_PADS5 0x38 +#define PM_PADS6 0x3c +#define PM_CAM0 0x44 +#define PM_CAM0_LDOHPEN BIT(2) +#define PM_CAM0_LDOLPEN BIT(1) +#define PM_CAM0_CTRLEN BIT(0) + +#define PM_CAM1 0x48 +#define PM_CAM1_LDOHPEN BIT(2) +#define PM_CAM1_LDOLPEN BIT(1) +#define PM_CAM1_CTRLEN BIT(0) + +#define PM_CCP2TX 0x4c +#define PM_CCP2TX_LDOEN BIT(1) +#define PM_CCP2TX_CTRLEN BIT(0) + +#define PM_DSI0 0x50 +#define PM_DSI0_LDOHPEN BIT(2) +#define PM_DSI0_LDOLPEN BIT(1) +#define PM_DSI0_CTRLEN BIT(0) + +#define PM_DSI1 0x54 +#define PM_DSI1_LDOHPEN BIT(2) +#define PM_DSI1_LDOLPEN BIT(1) +#define PM_DSI1_CTRLEN BIT(0) + +#define PM_HDMI 0x58 +#define PM_HDMI_RSTDR BIT(19) +#define PM_HDMI_LDOPD BIT(1) +#define PM_HDMI_CTRLEN BIT(0) + +#define PM_USB 0x5c +/* The power gates must be enabled with this bit before enabling the LDO in the + * USB block. + */ +#define PM_USB_CTRLEN BIT(0) + +#define PM_PXLDO 0x60 +#define PM_PXBG 0x64 +#define PM_DFT 0x68 +#define PM_SMPS 0x6c +#define PM_XOSC 0x70 +#define PM_SPAREW 0x74 +#define PM_SPARER 0x78 +#define PM_AVS_RSTDR 0x7c +#define PM_AVS_STAT 0x80 +#define PM_AVS_EVENT 0x84 +#define PM_AVS_INTEN 0x88 +#define PM_DUMMY 0xfc + +#define PM_IMAGE 0x108 +#define PM_GRAFX 0x10c +#define PM_PROC 0x110 +#define PM_ENAB BIT(12) +#define PM_ISPRSTN BIT(8) +#define PM_H264RSTN BIT(7) +#define PM_PERIRSTN BIT(6) +#define PM_V3DRSTN BIT(6) +#define PM_ISFUNC BIT(5) +#define PM_MRDONE BIT(4) +#define PM_MEMREP BIT(3) +#define PM_ISPOW BIT(2) +#define PM_POWOK BIT(1) +#define PM_POWUP BIT(0) +#define PM_INRUSH_SHIFT 13 +#define PM_INRUSH_3_5_MA 0 +#define PM_INRUSH_5_MA 1 +#define PM_INRUSH_10_MA 2 +#define PM_INRUSH_20_MA 3 +#define PM_INRUSH_MASK (3 << PM_INRUSH_SHIFT) + +#define PM_PASSWORD 0x5a000000 + +#define PM_WDOG_TIME_SET 0x000fffff +#define PM_RSTC_WRCFG_CLR 0xffffffcf +#define PM_RSTS_HADWRH_SET 0x00000040 +#define PM_RSTC_WRCFG_SET 0x00000030 +#define PM_RSTC_WRCFG_FULL_RESET 0x00000020 +#define PM_RSTC_RESET 0x00000102 + +#define PM_READ(reg) readl(power->base + (reg)) +#define PM_WRITE(reg, val) writel(PM_PASSWORD | (val), power->base + (reg)) + +#define ASB_BRDG_VERSION 0x00 +#define ASB_CPR_CTRL 0x04 + +#define ASB_V3D_S_CTRL 0x08 +#define ASB_V3D_M_CTRL 0x0c +#define ASB_ISP_S_CTRL 0x10 +#define ASB_ISP_M_CTRL 0x14 +#define ASB_H264_S_CTRL 0x18 +#define ASB_H264_M_CTRL 0x1c + +#define ASB_REQ_STOP BIT(0) +#define ASB_ACK BIT(1) +#define ASB_EMPTY BIT(2) +#define ASB_FULL BIT(3) + +#define ASB_AXI_BRDG_ID 0x20 + +#define ASB_READ(reg) readl(power->asb + (reg)) +#define ASB_WRITE(reg, val) writel(PM_PASSWORD | (val), power->asb + (reg)) + +struct bcm2835_power_domain { + struct generic_pm_domain base; + struct bcm2835_power *power; + u32 domain; + struct clk *clk; +}; + +struct bcm2835_power { + struct device *dev; + /* PM registers. */ + void __iomem *base; + /* AXI Async bridge registers. */ + void __iomem *asb; + + struct genpd_onecell_data pd_xlate; + struct bcm2835_power_domain domains[BCM2835_POWER_DOMAIN_COUNT]; + struct reset_controller_dev reset; +}; + +static int bcm2835_asb_enable(struct bcm2835_power *power, u32 reg) +{ + u64 start = ktime_get_ns(); + + /* Enable the module's async AXI bridges. */ + ASB_WRITE(reg, ASB_READ(reg) & ~ASB_REQ_STOP); + while (ASB_READ(reg) & ASB_ACK) { + cpu_relax(); + if (ktime_get_ns() - start >= 1000) + return -ETIMEDOUT; + } + + return 0; +} + +static int bcm2835_asb_disable(struct bcm2835_power *power, u32 reg) +{ + u64 start = ktime_get_ns(); + + /* Enable the module's async AXI bridges. */ + ASB_WRITE(reg, ASB_READ(reg) | ASB_REQ_STOP); + while (!(ASB_READ(reg) & ASB_ACK)) { + cpu_relax(); + if (ktime_get_ns() - start >= 1000) + return -ETIMEDOUT; + } + + return 0; +} + +static int bcm2835_power_power_off(struct bcm2835_power_domain *pd, u32 pm_reg) +{ + struct bcm2835_power *power = pd->power; + + /* Enable functional isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISFUNC); + + /* Enable electrical isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISPOW); + + /* Open the power switches. */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_POWUP); + + return 0; +} + +static int bcm2835_power_power_on(struct bcm2835_power_domain *pd, u32 pm_reg) +{ + struct bcm2835_power *power = pd->power; + struct device *dev = power->dev; + u64 start; + int ret; + int inrush; + bool powok; + + /* If it was already powered on by the fw, leave it that way. */ + if (PM_READ(pm_reg) & PM_POWUP) + return 0; + + /* Enable power. Allowing too much current at once may result + * in POWOK never getting set, so start low and ramp it up as + * necessary to succeed. + */ + powok = false; + for (inrush = PM_INRUSH_3_5_MA; inrush <= PM_INRUSH_20_MA; inrush++) { + PM_WRITE(pm_reg, + (PM_READ(pm_reg) & ~PM_INRUSH_MASK) | + (inrush << PM_INRUSH_SHIFT) | + PM_POWUP); + + start = ktime_get_ns(); + while (!(powok = !!(PM_READ(pm_reg) & PM_POWOK))) { + cpu_relax(); + if (ktime_get_ns() - start >= 3000) + break; + } + } + if (!powok) { + dev_err(dev, "Timeout waiting for %s power OK\n", + pd->base.name); + ret = -ETIMEDOUT; + goto err_disable_powup; + } + + /* Disable electrical isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_ISPOW); + + /* Repair memory */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_MEMREP); + start = ktime_get_ns(); + while (!(PM_READ(pm_reg) & PM_MRDONE)) { + cpu_relax(); + if (ktime_get_ns() - start >= 1000) { + dev_err(dev, "Timeout waiting for %s memory repair\n", + pd->base.name); + ret = -ETIMEDOUT; + goto err_disable_ispow; + } + } + + /* Disable functional isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_ISFUNC); + + return 0; + +err_disable_ispow: + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISPOW); +err_disable_powup: + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~(PM_POWUP | PM_INRUSH_MASK)); + return ret; +} + +static int bcm2835_asb_power_on(struct bcm2835_power_domain *pd, + u32 pm_reg, + u32 asb_m_reg, + u32 asb_s_reg, + u32 reset_flags) +{ + struct bcm2835_power *power = pd->power; + int ret; + + ret = clk_prepare_enable(pd->clk); + if (ret) { + dev_err(power->dev, "Failed to enable clock for %s\n", + pd->base.name); + return ret; + } + + /* Wait 32 clocks for reset to propagate, 1 us will be enough */ + udelay(1); + + clk_disable_unprepare(pd->clk); + + /* Deassert the resets. */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | reset_flags); + + ret = clk_prepare_enable(pd->clk); + if (ret) { + dev_err(power->dev, "Failed to enable clock for %s\n", + pd->base.name); + goto err_enable_resets; + } + + ret = bcm2835_asb_enable(power, asb_m_reg); + if (ret) { + dev_err(power->dev, "Failed to enable ASB master for %s\n", + pd->base.name); + goto err_disable_clk; + } + ret = bcm2835_asb_enable(power, asb_s_reg); + if (ret) { + dev_err(power->dev, "Failed to enable ASB slave for %s\n", + pd->base.name); + goto err_disable_asb_master; + } + + return 0; + +err_disable_asb_master: + bcm2835_asb_disable(power, asb_m_reg); +err_disable_clk: + clk_disable_unprepare(pd->clk); +err_enable_resets: + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~reset_flags); + return ret; +} + +static int bcm2835_asb_power_off(struct bcm2835_power_domain *pd, + u32 pm_reg, + u32 asb_m_reg, + u32 asb_s_reg, + u32 reset_flags) +{ + struct bcm2835_power *power = pd->power; + int ret; + + ret = bcm2835_asb_disable(power, asb_s_reg); + if (ret) { + dev_warn(power->dev, "Failed to disable ASB slave for %s\n", + pd->base.name); + return ret; + } + ret = bcm2835_asb_disable(power, asb_m_reg); + if (ret) { + dev_warn(power->dev, "Failed to disable ASB master for %s\n", + pd->base.name); + bcm2835_asb_enable(power, asb_s_reg); + return ret; + } + + clk_disable_unprepare(pd->clk); + + /* Assert the resets. */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~reset_flags); + + return 0; +} + +static int bcm2835_power_pd_power_on(struct generic_pm_domain *domain) +{ + struct bcm2835_power_domain *pd = + container_of(domain, struct bcm2835_power_domain, base); + struct bcm2835_power *power = pd->power; + + switch (pd->domain) { + case BCM2835_POWER_DOMAIN_GRAFX: + return bcm2835_power_power_on(pd, PM_GRAFX); + + case BCM2835_POWER_DOMAIN_GRAFX_V3D: + return bcm2835_asb_power_on(pd, PM_GRAFX, + ASB_V3D_M_CTRL, ASB_V3D_S_CTRL, + PM_V3DRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE: + return bcm2835_power_power_on(pd, PM_IMAGE); + + case BCM2835_POWER_DOMAIN_IMAGE_PERI: + return bcm2835_asb_power_on(pd, PM_IMAGE, + 0, 0, + PM_PERIRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_ISP: + return bcm2835_asb_power_on(pd, PM_IMAGE, + ASB_ISP_M_CTRL, ASB_ISP_S_CTRL, + PM_ISPRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_H264: + return bcm2835_asb_power_on(pd, PM_IMAGE, + ASB_H264_M_CTRL, ASB_H264_S_CTRL, + PM_H264RSTN); + + case BCM2835_POWER_DOMAIN_USB: + PM_WRITE(PM_USB, PM_USB_CTRLEN); + return 0; + + case BCM2835_POWER_DOMAIN_DSI0: + PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN); + PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN | PM_DSI0_LDOHPEN); + return 0; + + case BCM2835_POWER_DOMAIN_DSI1: + PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN); + PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN | PM_DSI1_LDOHPEN); + return 0; + + case BCM2835_POWER_DOMAIN_CCP2TX: + PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN); + PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN | PM_CCP2TX_LDOEN); + return 0; + + case BCM2835_POWER_DOMAIN_HDMI: + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_RSTDR); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_CTRLEN); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_LDOPD); + usleep_range(100, 200); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_RSTDR); + return 0; + + default: + dev_err(power->dev, "Invalid domain %d\n", pd->domain); + return -EINVAL; + } +} + +static int bcm2835_power_pd_power_off(struct generic_pm_domain *domain) +{ + struct bcm2835_power_domain *pd = + container_of(domain, struct bcm2835_power_domain, base); + struct bcm2835_power *power = pd->power; + + switch (pd->domain) { + case BCM2835_POWER_DOMAIN_GRAFX: + return bcm2835_power_power_off(pd, PM_GRAFX); + + case BCM2835_POWER_DOMAIN_GRAFX_V3D: + return bcm2835_asb_power_off(pd, PM_GRAFX, + ASB_V3D_M_CTRL, ASB_V3D_S_CTRL, + PM_V3DRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE: + return bcm2835_power_power_off(pd, PM_IMAGE); + + case BCM2835_POWER_DOMAIN_IMAGE_PERI: + return bcm2835_asb_power_off(pd, PM_IMAGE, + 0, 0, + PM_PERIRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_ISP: + return bcm2835_asb_power_off(pd, PM_IMAGE, + ASB_ISP_M_CTRL, ASB_ISP_S_CTRL, + PM_ISPRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_H264: + return bcm2835_asb_power_off(pd, PM_IMAGE, + ASB_H264_M_CTRL, ASB_H264_S_CTRL, + PM_H264RSTN); + + case BCM2835_POWER_DOMAIN_USB: + PM_WRITE(PM_USB, 0); + return 0; + + case BCM2835_POWER_DOMAIN_DSI0: + PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN); + PM_WRITE(PM_DSI0, 0); + return 0; + + case BCM2835_POWER_DOMAIN_DSI1: + PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN); + PM_WRITE(PM_DSI1, 0); + return 0; + + case BCM2835_POWER_DOMAIN_CCP2TX: + PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN); + PM_WRITE(PM_CCP2TX, 0); + return 0; + + case BCM2835_POWER_DOMAIN_HDMI: + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_LDOPD); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_CTRLEN); + return 0; + + default: + dev_err(power->dev, "Invalid domain %d\n", pd->domain); + return -EINVAL; + } +} + +static void +bcm2835_init_power_domain(struct bcm2835_power *power, + int pd_xlate_index, const char *name) +{ + struct device *dev = power->dev; + struct bcm2835_power_domain *dom = &power->domains[pd_xlate_index]; + + dom->clk = devm_clk_get(dev->parent, name); + + dom->base.name = name; + dom->base.power_on = bcm2835_power_pd_power_on; + dom->base.power_off = bcm2835_power_pd_power_off; + + dom->domain = pd_xlate_index; + dom->power = power; + + /* XXX: on/off at boot? */ + pm_genpd_init(&dom->base, NULL, true); + + power->pd_xlate.domains[pd_xlate_index] = &dom->base; +} + +/** bcm2835_reset_reset - Resets a block that has a reset line in the + * PM block. + * + * The consumer of the reset controller must have the power domain up + * -- there's no reset ability with the power domain down. To reset + * the sub-block, we just disable its access to memory through the + * ASB, reset, and re-enable. + */ +static int bcm2835_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + struct bcm2835_power *power = container_of(rcdev, struct bcm2835_power, + reset); + struct bcm2835_power_domain *pd; + int ret; + + switch (id) { + case BCM2835_RESET_V3D: + pd = &power->domains[BCM2835_POWER_DOMAIN_GRAFX_V3D]; + break; + case BCM2835_RESET_H264: + pd = &power->domains[BCM2835_POWER_DOMAIN_IMAGE_H264]; + break; + case BCM2835_RESET_ISP: + pd = &power->domains[BCM2835_POWER_DOMAIN_IMAGE_ISP]; + break; + default: + dev_err(power->dev, "Bad reset id %ld\n", id); + return -EINVAL; + } + + ret = bcm2835_power_pd_power_off(&pd->base); + if (ret) + return ret; + + return bcm2835_power_pd_power_on(&pd->base); +} + +static int bcm2835_reset_status(struct reset_controller_dev *rcdev, + unsigned long id) +{ + struct bcm2835_power *power = container_of(rcdev, struct bcm2835_power, + reset); + + switch (id) { + case BCM2835_RESET_V3D: + return !PM_READ(PM_GRAFX & PM_V3DRSTN); + case BCM2835_RESET_H264: + return !PM_READ(PM_IMAGE & PM_H264RSTN); + case BCM2835_RESET_ISP: + return !PM_READ(PM_IMAGE & PM_ISPRSTN); + default: + return -EINVAL; + } +} + +const struct reset_control_ops bcm2835_reset_ops = { + .reset = bcm2835_reset_reset, + .status = bcm2835_reset_status, +}; + +static const char *const power_domain_names[] = { + [BCM2835_POWER_DOMAIN_GRAFX] = "grafx", + [BCM2835_POWER_DOMAIN_GRAFX_V3D] = "v3d", + + [BCM2835_POWER_DOMAIN_IMAGE] = "image", + [BCM2835_POWER_DOMAIN_IMAGE_PERI] = "peri_image", + [BCM2835_POWER_DOMAIN_IMAGE_H264] = "h264", + [BCM2835_POWER_DOMAIN_IMAGE_ISP] = "isp", + + [BCM2835_POWER_DOMAIN_USB] = "usb", + [BCM2835_POWER_DOMAIN_DSI0] = "dsi0", + [BCM2835_POWER_DOMAIN_DSI1] = "dsi1", + [BCM2835_POWER_DOMAIN_CAM0] = "cam0", + [BCM2835_POWER_DOMAIN_CAM1] = "cam1", + [BCM2835_POWER_DOMAIN_CCP2TX] = "ccp2tx", + [BCM2835_POWER_DOMAIN_HDMI] = "hdmi", +}; + +static int bcm2835_power_probe(struct platform_device *pdev) +{ + struct bcm2835_pm *pm = dev_get_drvdata(pdev->dev.parent); + struct device *dev = &pdev->dev; + struct bcm2835_power *power; + static const struct { + int parent, child; + } domain_deps[] = { + { BCM2835_POWER_DOMAIN_GRAFX, BCM2835_POWER_DOMAIN_GRAFX_V3D }, + { BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_PERI }, + { BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_H264 }, + { BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_ISP }, + { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_USB }, + { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM0 }, + { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM1 }, + }; + int ret, i; + u32 id; + + power = devm_kzalloc(dev, sizeof(*power), GFP_KERNEL); + if (!power) + return -ENOMEM; + platform_set_drvdata(pdev, power); + + power->dev = dev; + power->base = pm->base; + power->asb = pm->asb; + + id = ASB_READ(ASB_AXI_BRDG_ID); + if (id != 0x62726467 /* "BRDG" */) { + dev_err(dev, "ASB register ID returned 0x%08x\n", id); + return -ENODEV; + } + + power->pd_xlate.domains = devm_kcalloc(dev, + ARRAY_SIZE(power_domain_names), + sizeof(*power->pd_xlate.domains), + GFP_KERNEL); + if (!power->pd_xlate.domains) + return -ENOMEM; + + power->pd_xlate.num_domains = ARRAY_SIZE(power_domain_names); + + for (i = 0; i < ARRAY_SIZE(power_domain_names); i++) + bcm2835_init_power_domain(power, i, power_domain_names[i]); + + for (i = 0; i < ARRAY_SIZE(domain_deps); i++) { + pm_genpd_add_subdomain(&power->domains[domain_deps[i].parent].base, + &power->domains[domain_deps[i].child].base); + } + + power->reset.owner = THIS_MODULE; + power->reset.nr_resets = BCM2835_RESET_COUNT; + power->reset.ops = &bcm2835_reset_ops; + power->reset.of_node = dev->parent->of_node; + + ret = devm_reset_controller_register(dev, &power->reset); + if (ret) + return ret; + + of_genpd_add_provider_onecell(dev->parent->of_node, &power->pd_xlate); + + dev_info(dev, "Broadcom BCM2835 power domains driver"); + return 0; +} + +static int bcm2835_power_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver bcm2835_power_driver = { + .probe = bcm2835_power_probe, + .remove = bcm2835_power_remove, + .driver = { + .name = "bcm2835-power", + }, +}; +module_platform_driver(bcm2835_power_driver); + +MODULE_AUTHOR("Eric Anholt "); +MODULE_DESCRIPTION("Driver for Broadcom BCM2835 PM power domains and reset"); +MODULE_LICENSE("GPL"); diff --git a/include/dt-bindings/soc/bcm2835-pm.h b/include/dt-bindings/soc/bcm2835-pm.h new file mode 100644 index 000000000000..153d75b8d99f --- /dev/null +++ b/include/dt-bindings/soc/bcm2835-pm.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */ + +#ifndef _DT_BINDINGS_ARM_BCM2835_PM_H +#define _DT_BINDINGS_ARM_BCM2835_PM_H + +#define BCM2835_POWER_DOMAIN_GRAFX 0 +#define BCM2835_POWER_DOMAIN_GRAFX_V3D 1 +#define BCM2835_POWER_DOMAIN_IMAGE 2 +#define BCM2835_POWER_DOMAIN_IMAGE_PERI 3 +#define BCM2835_POWER_DOMAIN_IMAGE_ISP 4 +#define BCM2835_POWER_DOMAIN_IMAGE_H264 5 +#define BCM2835_POWER_DOMAIN_USB 6 +#define BCM2835_POWER_DOMAIN_DSI0 7 +#define BCM2835_POWER_DOMAIN_DSI1 8 +#define BCM2835_POWER_DOMAIN_CAM0 9 +#define BCM2835_POWER_DOMAIN_CAM1 10 +#define BCM2835_POWER_DOMAIN_CCP2TX 11 +#define BCM2835_POWER_DOMAIN_HDMI 12 + +#define BCM2835_POWER_DOMAIN_COUNT 13 + +#define BCM2835_RESET_V3D 0 +#define BCM2835_RESET_ISP 1 +#define BCM2835_RESET_H264 2 + +#define BCM2835_RESET_COUNT 3 + +#endif /* _DT_BINDINGS_ARM_BCM2835_PM_H */ diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h index b7d0ee1feffa..ed37dc40e82a 100644 --- a/include/linux/mfd/bcm2835-pm.h +++ b/include/linux/mfd/bcm2835-pm.h @@ -8,6 +8,7 @@ struct bcm2835_pm { struct device *dev; void __iomem *base; + void __iomem *asb; }; #endif /* BCM2835_MFD_PM_H */ -- cgit From 19e99de9a53f9ece6baf8e9a15428aedd4b20c86 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 8 Jan 2019 10:15:36 +0100 Subject: ARM: davinci: remove dead code related to MAC address reading There are no more users of davinci_get_mac_addr(). Remove it. Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/common.c | 15 --------------- include/linux/davinci_emac.h | 1 - 2 files changed, 16 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c index e1d0f0d841ff..0c638fe15dcb 100644 --- a/arch/arm/mach-davinci/common.c +++ b/arch/arm/mach-davinci/common.c @@ -26,21 +26,6 @@ EXPORT_SYMBOL(davinci_soc_info); void __iomem *davinci_intc_base; int davinci_intc_type; -void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context) -{ - char *mac_addr = davinci_soc_info.emac_pdata->mac_addr; - off_t offset = (off_t)context; - - if (!IS_BUILTIN(CONFIG_NVMEM)) { - pr_warn("Cannot read MAC addr from EEPROM without CONFIG_NVMEM\n"); - return; - } - - /* Read MAC addr from EEPROM */ - if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN) - pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr); -} - static int __init davinci_init_id(struct davinci_soc_info *soc_info) { int i; diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h index 05b97144d342..28e6cf1356da 100644 --- a/include/linux/davinci_emac.h +++ b/include/linux/davinci_emac.h @@ -46,5 +46,4 @@ enum { EMAC_VERSION_2, /* DM646x */ }; -void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context); #endif -- cgit From cc2d22477779f189595db5c515bd5ef9c75a1f35 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 7 Jan 2019 20:49:39 +0100 Subject: pwm: Drop per-chip dbg_show callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This callback was introduced in commit 62099abf67a2 ("pwm: Add debugfs interface") in 2012 and up to now there is not a single user. So drop this unused code. Signed-off-by: Uwe Kleine-König [thierry.reding@gmail.com: remove kerneldoc for ->dbg_show()] Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 5 +---- include/linux/pwm.h | 4 ---- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 253a459fe0d8..3149204567f3 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -1036,10 +1036,7 @@ static int pwm_seq_show(struct seq_file *s, void *v) dev_name(chip->dev), chip->npwm, (chip->npwm != 1) ? "s" : ""); - if (chip->ops->dbg_show) - chip->ops->dbg_show(chip, s); - else - pwm_dbg_show(chip, s); + pwm_dbg_show(chip, s); return 0; } diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d5199b507d79..6a544cb89de4 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -254,7 +254,6 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @dbg_show: optional routine to show contents in debugfs * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { @@ -272,9 +271,6 @@ struct pwm_ops { struct pwm_state *state); void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); -#ifdef CONFIG_DEBUG_FS - void (*dbg_show)(struct pwm_chip *chip, struct seq_file *s); -#endif struct module *owner; }; -- cgit From 5d0a4c11896e8b83f816f135c24b184d4ba57741 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 7 Jan 2019 20:49:41 +0100 Subject: pwm: Rearrange structures to group members by purpose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pwm_ops there are a few callbacks that are not supposed to be used by new drivers. Group them at the end of the structure and add a comment. Similarily for struct pwm_chip group the members that drivers shouldn't care about at the end and mark them as internal with another comment. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 6a544cb89de4..b628abfffacc 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -242,11 +242,7 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM * @free: optional hook for freeing a PWM - * @config: configure duty cycles and period length for this PWM - * @set_polarity: configure the polarity of this PWM * @capture: capture and report PWM signal - * @enable: enable PWM output toggling - * @disable: disable PWM output toggling * @apply: atomically apply a new PWM config. The state argument * should be adjusted with the real hardware config (if the * approximate the period or duty_cycle value, state should @@ -255,48 +251,55 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * called once per PWM device when the PWM chip is * registered. * @owner: helps prevent removal of modules exporting active PWMs + * @config: configure duty cycles and period length for this PWM + * @set_polarity: configure the polarity of this PWM + * @enable: enable PWM output toggling + * @disable: disable PWM output toggling */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); void (*free)(struct pwm_chip *chip, struct pwm_device *pwm); - int (*config)(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns); - int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm, - enum pwm_polarity polarity); int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); - int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); - void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); struct module *owner; + + /* Only used by legacy drivers */ + int (*config)(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns); + int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm, + enum pwm_polarity polarity); + int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); + void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs - * @list: list node for internal use * @ops: callbacks for this PWM controller * @base: number of first PWM controlled by this chip * @npwm: number of PWMs controlled by this chip - * @pwms: array of PWM devices allocated by the framework * @of_xlate: request a PWM device given a device tree PWM specifier * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier + * @list: list node for internal use + * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { struct device *dev; - struct list_head list; const struct pwm_ops *ops; int base; unsigned int npwm; - struct pwm_device *pwms; - struct pwm_device * (*of_xlate)(struct pwm_chip *pc, const struct of_phandle_args *args); unsigned int of_pwm_n_cells; + + /* only used internally by the PWM framework */ + struct list_head list; + struct pwm_device *pwms; }; /** -- cgit From c1a85a00ea66cb6f0bd0f14e47c28c2b0999799f Mon Sep 17 00:00:00 2001 From: Micah Morton Date: Mon, 7 Jan 2019 16:10:53 -0800 Subject: LSM: generalize flag passing to security_capable This patch provides a general mechanism for passing flags to the security_capable LSM hook. It replaces the specific 'audit' flag that is used to tell security_capable whether it should log an audit message for the given capability check. The reason for generalizing this flag passing is so we can add an additional flag that signifies whether security_capable is being called by a setid syscall (which is needed by the proposed SafeSetID LSM). Signed-off-by: Micah Morton Reviewed-by: Kees Cook Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 8 +++++--- include/linux/security.h | 28 ++++++++++++++-------------- kernel/capability.c | 22 +++++++++++++--------- kernel/seccomp.c | 4 ++-- security/apparmor/capability.c | 14 +++++++------- security/apparmor/include/capability.h | 2 +- security/apparmor/ipc.c | 3 ++- security/apparmor/lsm.c | 4 ++-- security/apparmor/resource.c | 2 +- security/commoncap.c | 17 +++++++++-------- security/security.c | 14 +++++--------- security/selinux/hooks.c | 18 +++++++++--------- security/smack/smack_access.c | 2 +- 13 files changed, 71 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 40511a8a5ae6..195707210975 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1270,7 +1270,7 @@ * @cred contains the credentials to use. * @ns contains the user namespace we want the capability in * @cap contains the capability . - * @audit contains whether to write an audit message or not + * @opts contains options for the capable check * Return 0 if the capability is granted for @tsk. * @syslog: * Check permission before accessing the kernel message ring or changing @@ -1446,8 +1446,10 @@ union security_list_options { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable)(const struct cred *cred, struct user_namespace *ns, - int cap, int audit); + int (*capable)(const struct cred *cred, + struct user_namespace *ns, + int cap, + unsigned int opts); int (*quotactl)(int cmds, int type, int id, struct super_block *sb); int (*quota_on)(struct dentry *dentry); int (*syslog)(int type); diff --git a/include/linux/security.h b/include/linux/security.h index b2c5333ed4b5..13537a49ae97 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -54,9 +54,12 @@ struct xattr; struct xfrm_sec_ctx; struct mm_struct; +/* Default (no) options for the capable function */ +#define CAP_OPT_NONE 0x0 /* If capable should audit the security request */ -#define SECURITY_CAP_NOAUDIT 0 -#define SECURITY_CAP_AUDIT 1 +#define CAP_OPT_NOAUDIT BIT(1) +/* If capable is being called by a setid function */ +#define CAP_OPT_INSETID BIT(2) /* LSM Agnostic defines for sb_set_mnt_opts */ #define SECURITY_LSM_NATIVE_LABELS 1 @@ -72,7 +75,7 @@ enum lsm_event { /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, - int cap, int audit); + int cap, unsigned int opts); extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz); extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -207,10 +210,10 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(const struct cred *cred, struct user_namespace *ns, - int cap); -int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns, - int cap); +int security_capable(const struct cred *cred, + struct user_namespace *ns, + int cap, + unsigned int opts); int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); @@ -464,14 +467,11 @@ static inline int security_capset(struct cred *new, } static inline int security_capable(const struct cred *cred, - struct user_namespace *ns, int cap) + struct user_namespace *ns, + int cap, + unsigned int opts) { - return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT); -} - -static inline int security_capable_noaudit(const struct cred *cred, - struct user_namespace *ns, int cap) { - return cap_capable(cred, ns, cap, SECURITY_CAP_NOAUDIT); + return cap_capable(cred, ns, cap, opts); } static inline int security_quotactl(int cmds, int type, int id, diff --git a/kernel/capability.c b/kernel/capability.c index 1e1c0236f55b..7718d7dcadc7 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -299,7 +299,7 @@ bool has_ns_capability(struct task_struct *t, int ret; rcu_read_lock(); - ret = security_capable(__task_cred(t), ns, cap); + ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NONE); rcu_read_unlock(); return (ret == 0); @@ -340,7 +340,7 @@ bool has_ns_capability_noaudit(struct task_struct *t, int ret; rcu_read_lock(); - ret = security_capable_noaudit(__task_cred(t), ns, cap); + ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NOAUDIT); rcu_read_unlock(); return (ret == 0); @@ -363,7 +363,9 @@ bool has_capability_noaudit(struct task_struct *t, int cap) return has_ns_capability_noaudit(t, &init_user_ns, cap); } -static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit) +static bool ns_capable_common(struct user_namespace *ns, + int cap, + unsigned int opts) { int capable; @@ -372,8 +374,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit) BUG(); } - capable = audit ? security_capable(current_cred(), ns, cap) : - security_capable_noaudit(current_cred(), ns, cap); + capable = security_capable(current_cred(), ns, cap, opts); if (capable == 0) { current->flags |= PF_SUPERPRIV; return true; @@ -394,7 +395,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit) */ bool ns_capable(struct user_namespace *ns, int cap) { - return ns_capable_common(ns, cap, true); + return ns_capable_common(ns, cap, CAP_OPT_NONE); } EXPORT_SYMBOL(ns_capable); @@ -412,7 +413,7 @@ EXPORT_SYMBOL(ns_capable); */ bool ns_capable_noaudit(struct user_namespace *ns, int cap) { - return ns_capable_common(ns, cap, false); + return ns_capable_common(ns, cap, CAP_OPT_NOAUDIT); } EXPORT_SYMBOL(ns_capable_noaudit); @@ -448,10 +449,11 @@ EXPORT_SYMBOL(capable); bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap) { + if (WARN_ON_ONCE(!cap_valid(cap))) return false; - if (security_capable(file->f_cred, ns, cap) == 0) + if (security_capable(file->f_cred, ns, cap, CAP_OPT_NONE) == 0) return true; return false; @@ -500,10 +502,12 @@ bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) { int ret = 0; /* An absent tracer adds no restrictions */ const struct cred *cred; + rcu_read_lock(); cred = rcu_dereference(tsk->ptracer_cred); if (cred) - ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + ret = security_capable(cred, ns, CAP_SYS_PTRACE, + CAP_OPT_NOAUDIT); rcu_read_unlock(); return (ret == 0); } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d7f538847b84..38a77800def6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -443,8 +443,8 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) * behavior of privileged children. */ if (!task_no_new_privs(current) && - security_capable_noaudit(current_cred(), current_user_ns(), - CAP_SYS_ADMIN) != 0) + security_capable(current_cred(), current_user_ns(), + CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) return ERR_PTR(-EACCES); /* Allocate a new seccomp_filter */ diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 253ef6e9d445..752f73980e30 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -110,13 +110,13 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, * profile_capable - test if profile allows use of capability @cap * @profile: profile being enforced (NOT NULL, NOT unconfined) * @cap: capability to test if allowed - * @audit: whether an audit record should be generated + * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated * @sa: audit data (MAY BE NULL indicating no auditing) * * Returns: 0 if allowed else -EPERM */ -static int profile_capable(struct aa_profile *profile, int cap, int audit, - struct common_audit_data *sa) +static int profile_capable(struct aa_profile *profile, int cap, + unsigned int opts, struct common_audit_data *sa) { int error; @@ -126,7 +126,7 @@ static int profile_capable(struct aa_profile *profile, int cap, int audit, else error = -EPERM; - if (audit == SECURITY_CAP_NOAUDIT) { + if (opts & CAP_OPT_NOAUDIT) { if (!COMPLAIN_MODE(profile)) return error; /* audit the cap request in complain mode but note that it @@ -142,13 +142,13 @@ static int profile_capable(struct aa_profile *profile, int cap, int audit, * aa_capable - test permission to use capability * @label: label being tested for capability (NOT NULL) * @cap: capability to be tested - * @audit: whether an audit record should be generated + * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated * * Look up capability in profile capability set. * * Returns: 0 on success, or else an error code. */ -int aa_capable(struct aa_label *label, int cap, int audit) +int aa_capable(struct aa_label *label, int cap, unsigned int opts) { struct aa_profile *profile; int error = 0; @@ -156,7 +156,7 @@ int aa_capable(struct aa_label *label, int cap, int audit) sa.u.cap = cap; error = fn_for_each_confined(label, profile, - profile_capable(profile, cap, audit, &sa)); + profile_capable(profile, cap, opts, &sa)); return error; } diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h index e0304e2aeb7f..1b3663b6ab12 100644 --- a/security/apparmor/include/capability.h +++ b/security/apparmor/include/capability.h @@ -40,7 +40,7 @@ struct aa_caps { extern struct aa_sfs_entry aa_sfs_entry_caps[]; -int aa_capable(struct aa_label *label, int cap, int audit); +int aa_capable(struct aa_label *label, int cap, unsigned int opts); static inline void aa_free_cap_rules(struct aa_caps *caps) { diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 527ea1557120..aacd1e95cb59 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -107,7 +107,8 @@ static int profile_tracer_perm(struct aa_profile *tracer, aad(sa)->label = &tracer->label; aad(sa)->peer = tracee; aad(sa)->request = 0; - aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1); + aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, + CAP_OPT_NONE); return aa_audit(AUDIT_APPARMOR_AUTO, tracer, sa, audit_ptrace_cb); } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 60ef71268ccf..b6c395e2acd0 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -172,14 +172,14 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective, } static int apparmor_capable(const struct cred *cred, struct user_namespace *ns, - int cap, int audit) + int cap, unsigned int opts) { struct aa_label *label; int error = 0; label = aa_get_newest_cred_label(cred); if (!unconfined(label)) - error = aa_capable(label, cap, audit); + error = aa_capable(label, cap, opts); aa_put_label(label); return error; diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index 95fd26d09757..552ed09cb47e 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -124,7 +124,7 @@ int aa_task_setrlimit(struct aa_label *label, struct task_struct *task, */ if (label != peer && - aa_capable(label, CAP_SYS_RESOURCE, SECURITY_CAP_NOAUDIT) != 0) + aa_capable(label, CAP_SYS_RESOURCE, CAP_OPT_NOAUDIT) != 0) error = fn_for_each(label, profile, audit_resource(profile, resource, new_rlim->rlim_max, peer, diff --git a/security/commoncap.c b/security/commoncap.c index 52e04136bfa8..188eaf59f82f 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -68,7 +68,7 @@ static void warn_setuid_and_fcaps_mixed(const char *fname) * kernel's capable() and has_capability() returns 1 for this case. */ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, - int cap, int audit) + int cap, unsigned int opts) { struct user_namespace *ns = targ_ns; @@ -222,12 +222,11 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective, */ static inline int cap_inh_is_capped(void) { - /* they are so limited unless the current task has the CAP_SETPCAP * capability */ if (cap_capable(current_cred(), current_cred()->user_ns, - CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0) + CAP_SETPCAP, CAP_OPT_NONE) == 0) return 0; return 1; } @@ -1208,8 +1207,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, || ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ || (cap_capable(current_cred(), - current_cred()->user_ns, CAP_SETPCAP, - SECURITY_CAP_AUDIT) != 0) /*[4]*/ + current_cred()->user_ns, + CAP_SETPCAP, + CAP_OPT_NONE) != 0) /*[4]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks @@ -1304,9 +1304,10 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; - if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, - SECURITY_CAP_NOAUDIT) == 0) + if (cap_capable(current_cred(), &init_user_ns, + CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0) cap_sys_admin = 1; + return cap_sys_admin; } @@ -1325,7 +1326,7 @@ int cap_mmap_addr(unsigned long addr) if (addr < dac_mmap_min_addr) { ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO, - SECURITY_CAP_AUDIT); + CAP_OPT_NONE); /* set PF_SUPERPRIV if it turns out we allow the low mmap */ if (ret == 0) current->flags |= PF_SUPERPRIV; diff --git a/security/security.c b/security/security.c index 953fc3ea18a9..a618e22df5c6 100644 --- a/security/security.c +++ b/security/security.c @@ -689,16 +689,12 @@ int security_capset(struct cred *new, const struct cred *old, effective, inheritable, permitted); } -int security_capable(const struct cred *cred, struct user_namespace *ns, - int cap) +int security_capable(const struct cred *cred, + struct user_namespace *ns, + int cap, + unsigned int opts) { - return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_AUDIT); -} - -int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns, - int cap) -{ - return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_NOAUDIT); + return call_int_hook(capable, 0, cred, ns, cap, opts); } int security_quotactl(int cmds, int type, int id, struct super_block *sb) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d98e1d8d18f6..b2ee49f938f1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1578,7 +1578,7 @@ static inline u32 signal_to_av(int sig) /* Check whether a task is allowed to use a capability. */ static int cred_has_capability(const struct cred *cred, - int cap, int audit, bool initns) + int cap, unsigned int opts, bool initns) { struct common_audit_data ad; struct av_decision avd; @@ -1605,7 +1605,7 @@ static int cred_has_capability(const struct cred *cred, rc = avc_has_perm_noaudit(&selinux_state, sid, sid, sclass, av, 0, &avd); - if (audit == SECURITY_CAP_AUDIT) { + if (!(opts & CAP_OPT_NOAUDIT)) { int rc2 = avc_audit(&selinux_state, sid, sid, sclass, av, &avd, rc, &ad, 0); if (rc2) @@ -2125,9 +2125,9 @@ static int selinux_capset(struct cred *new, const struct cred *old, */ static int selinux_capable(const struct cred *cred, struct user_namespace *ns, - int cap, int audit) + int cap, unsigned int opts) { - return cred_has_capability(cred, cap, audit, ns == &init_user_ns); + return cred_has_capability(cred, cap, opts, ns == &init_user_ns); } static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) @@ -2201,7 +2201,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) int rc, cap_sys_admin = 0; rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN, - SECURITY_CAP_NOAUDIT, true); + CAP_OPT_NOAUDIT, true); if (rc == 0) cap_sys_admin = 1; @@ -2988,11 +2988,11 @@ static int selinux_inode_getattr(const struct path *path) static bool has_cap_mac_admin(bool audit) { const struct cred *cred = current_cred(); - int cap_audit = audit ? SECURITY_CAP_AUDIT : SECURITY_CAP_NOAUDIT; + unsigned int opts = audit ? CAP_OPT_NONE : CAP_OPT_NOAUDIT; - if (cap_capable(cred, &init_user_ns, CAP_MAC_ADMIN, cap_audit)) + if (cap_capable(cred, &init_user_ns, CAP_MAC_ADMIN, opts)) return false; - if (cred_has_capability(cred, CAP_MAC_ADMIN, cap_audit, true)) + if (cred_has_capability(cred, CAP_MAC_ADMIN, opts, true)) return false; return true; } @@ -3387,7 +3387,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, case KDSKBENT: case KDSKBSENT: error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG, - SECURITY_CAP_AUDIT, true); + CAP_OPT_NONE, true); break; /* default case assumes that the command will go diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 489d49a20b47..fe2ce3a65822 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -640,7 +640,7 @@ bool smack_privileged_cred(int cap, const struct cred *cred) struct smack_known_list_elem *sklep; int rc; - rc = cap_capable(cred, &init_user_ns, cap, SECURITY_CAP_AUDIT); + rc = cap_capable(cred, &init_user_ns, cap, CAP_OPT_NONE); if (rc) return false; -- cgit From 5b438f4ba315db4f8c1489d175656798d58c014f Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 11 Jan 2019 13:04:57 +0800 Subject: iommu/vt-d: Support page request in scalable mode VT-d Rev3.0 has made a few changes to the page request interface, 1. widened PRQ descriptor from 128 bits to 256 bits; 2. removed streaming response type; 3. introduced private data that requires page response even the request is not last request in group (LPIG). This is a supplement to commit 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") and makes the svm code compliant with VT-d Rev3.0. Cc: Ashok Raj Cc: Liu Yi L Cc: Kevin Tian Signed-off-by: Jacob Pan Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 77 ++++++++++++++++++++++++++------------------- include/linux/intel-iommu.h | 21 ++++++------- include/linux/intel-svm.h | 2 +- 3 files changed, 55 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a2a2aa4439aa..79add5716552 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -470,20 +470,31 @@ EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); /* Page request queue descriptor */ struct page_req_dsc { - u64 srr:1; - u64 bof:1; - u64 pasid_present:1; - u64 lpig:1; - u64 pasid:20; - u64 bus:8; - u64 private:23; - u64 prg_index:9; - u64 rd_req:1; - u64 wr_req:1; - u64 exe_req:1; - u64 priv_req:1; - u64 devfn:8; - u64 addr:52; + union { + struct { + u64 type:8; + u64 pasid_present:1; + u64 priv_data_present:1; + u64 rsvd:6; + u64 rid:16; + u64 pasid:20; + u64 exe_req:1; + u64 pm_req:1; + u64 rsvd2:10; + }; + u64 qw_0; + }; + union { + struct { + u64 rd_req:1; + u64 wr_req:1; + u64 lpig:1; + u64 prg_index:9; + u64 addr:52; + }; + u64 qw_1; + }; + u64 priv_data[2]; }; #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10) @@ -596,7 +607,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) /* Accounting for major/minor faults? */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - if (sdev->sid == PCI_DEVID(req->bus, req->devfn)) + if (sdev->sid == req->rid) break; } /* Other devices can go away, but the drivers are not permitted @@ -609,33 +620,35 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (sdev && sdev->ops && sdev->ops->fault_cb) { int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->priv_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); + (req->exe_req << 1) | (req->pm_req); + sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, + req->priv_data, rwxp, result); } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; svm = NULL; no_pasid: - if (req->lpig) { - /* Page Group Response */ + if (req->lpig || req->priv_data_present) { + /* + * Per VT-d spec. v3.0 ch7.7, system software must + * respond with page group response if private data + * is present (PDP) or last page in group (LPIG) bit + * is set. This is an additional VT-d feature beyond + * PCI ATS spec. + */ resp.qw0 = QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID((req->bus << 8) | req->devfn) | + QI_PGRP_DID(req->rid) | QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->pasid_present) | + QI_PGRP_RESP_CODE(result) | QI_PGRP_RESP_TYPE; resp.qw1 = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_PRIV(req->private) | - QI_PGRP_RESP_CODE(result); - } else if (req->srr) { - /* Page Stream Response */ - resp.qw0 = QI_PSTRM_IDX(req->prg_index) | - QI_PSTRM_PRIV(req->private) | - QI_PSTRM_BUS(req->bus) | - QI_PSTRM_PASID(req->pasid) | - QI_PSTRM_RESP_TYPE; - resp.qw1 = QI_PSTRM_ADDR(address) | - QI_PSTRM_DEVFN(req->devfn) | - QI_PSTRM_RESP_CODE(result); + QI_PGRP_LPIG(req->lpig); + + if (req->priv_data_present) + memcpy(&resp.qw2, req->priv_data, + sizeof(req->priv_data)); } resp.qw2 = 0; resp.qw3 = 0; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0605f3bf6e79..fa364de9db18 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -374,20 +374,17 @@ enum { #define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 -#define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -#define QI_PGRP_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PGRP_RESP_CODE(res) ((u64)(res)) -#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) -#define QI_PGRP_DID(did) (((u64)(did)) << 16) +/* Page group response descriptor QW0 */ #define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) +#define QI_PGRP_PDP(p) (((u64)(p)) << 5) +#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) +#define QI_PGRP_DID(rid) (((u64)(rid)) << 16) +#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) + +/* Page group response descriptor QW1 */ +#define QI_PGRP_LPIG(x) (((u64)(x)) << 2) +#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3) -#define QI_PSTRM_ADDR(addr) (((u64)(addr)) & VTD_PAGE_MASK) -#define QI_PSTRM_DEVFN(devfn) (((u64)(devfn)) << 4) -#define QI_PSTRM_RESP_CODE(res) ((u64)(res)) -#define QI_PSTRM_IDX(idx) (((u64)(idx)) << 55) -#define QI_PSTRM_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PSTRM_BUS(bus) (((u64)(bus)) << 24) -#define QI_PSTRM_PASID(pasid) (((u64)(pasid)) << 4) #define QI_RESP_SUCCESS 0x0 #define QI_RESP_INVALID 0x1 diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 99bc5b3ae26e..e3f76315ca4d 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -20,7 +20,7 @@ struct device; struct svm_dev_ops { void (*fault_cb)(struct device *dev, int pasid, u64 address, - u32 private, int rwxp, int response); + void *private, int rwxp, int response); }; /* Values for rxwp in fault_cb callback */ -- cgit From 19514910d021c93c7823ec32067e6b7dea224f0f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:19 +0100 Subject: livepatch: Change unsigned long old_addr -> void *old_func in struct klp_func The address of the to be patched function and new function is stored in struct klp_func as: void *new_func; unsigned long old_addr; The different naming scheme and type are derived from the way the addresses are set. @old_addr is assigned at runtime using kallsyms-based search. @new_func is statically initialized, for example: static struct klp_func funcs[] = { { .old_name = "cmdline_proc_show", .new_func = livepatch_cmdline_proc_show, }, { } }; This patch changes unsigned long old_addr -> void *old_func. It removes some confusion when these address are later used in the code. It is motivated by a followup patch that adds special NOP struct klp_func where we want to assign func->new_func = func->old_addr respectively func->new_func = func->old_func. This patch does not modify the existing behavior. Suggested-by: Josh Poimboeuf Signed-off-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Joe Lawrence Acked-by: Alice Ferrazzi Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 4 ++-- kernel/livepatch/core.c | 6 +++--- kernel/livepatch/patch.c | 18 ++++++++++-------- kernel/livepatch/patch.h | 4 ++-- kernel/livepatch/transition.c | 4 ++-- 5 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index aec44b1d9582..634e13876380 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -40,7 +40,7 @@ * @new_func: pointer to the patched function code * @old_sympos: a hint indicating which symbol position the old function * can be found (optional) - * @old_addr: the address of the function being patched + * @old_func: pointer to the function being patched * @kobj: kobject for sysfs resources * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function @@ -77,7 +77,7 @@ struct klp_func { unsigned long old_sympos; /* internal */ - unsigned long old_addr; + void *old_func; struct kobject kobj; struct list_head stack_node; unsigned long old_size, new_size; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 5b77a7314e01..cb59c7fb94cb 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -648,7 +648,7 @@ static void klp_free_object_loaded(struct klp_object *obj) obj->mod = NULL; klp_for_each_func(obj, func) - func->old_addr = 0; + func->old_func = NULL; } /* @@ -721,11 +721,11 @@ static int klp_init_object_loaded(struct klp_patch *patch, klp_for_each_func(obj, func) { ret = klp_find_object_symbol(obj->name, func->old_name, func->old_sympos, - &func->old_addr); + (unsigned long *)&func->old_func); if (ret) return ret; - ret = kallsyms_lookup_size_offset(func->old_addr, + ret = kallsyms_lookup_size_offset((unsigned long)func->old_func, &func->old_size, NULL); if (!ret) { pr_err("kallsyms size lookup failed for '%s'\n", diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 7702cb4064fc..825022d70912 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -34,7 +34,7 @@ static LIST_HEAD(klp_ops); -struct klp_ops *klp_find_ops(unsigned long old_addr) +struct klp_ops *klp_find_ops(void *old_func) { struct klp_ops *ops; struct klp_func *func; @@ -42,7 +42,7 @@ struct klp_ops *klp_find_ops(unsigned long old_addr) list_for_each_entry(ops, &klp_ops, node) { func = list_first_entry(&ops->func_stack, struct klp_func, stack_node); - if (func->old_addr == old_addr) + if (func->old_func == old_func) return ops; } @@ -142,17 +142,18 @@ static void klp_unpatch_func(struct klp_func *func) if (WARN_ON(!func->patched)) return; - if (WARN_ON(!func->old_addr)) + if (WARN_ON(!func->old_func)) return; - ops = klp_find_ops(func->old_addr); + ops = klp_find_ops(func->old_func); if (WARN_ON(!ops)) return; if (list_is_singular(&ops->func_stack)) { unsigned long ftrace_loc; - ftrace_loc = klp_get_ftrace_location(func->old_addr); + ftrace_loc = + klp_get_ftrace_location((unsigned long)func->old_func); if (WARN_ON(!ftrace_loc)) return; @@ -174,17 +175,18 @@ static int klp_patch_func(struct klp_func *func) struct klp_ops *ops; int ret; - if (WARN_ON(!func->old_addr)) + if (WARN_ON(!func->old_func)) return -EINVAL; if (WARN_ON(func->patched)) return -EINVAL; - ops = klp_find_ops(func->old_addr); + ops = klp_find_ops(func->old_func); if (!ops) { unsigned long ftrace_loc; - ftrace_loc = klp_get_ftrace_location(func->old_addr); + ftrace_loc = + klp_get_ftrace_location((unsigned long)func->old_func); if (!ftrace_loc) { pr_err("failed to find location for function '%s'\n", func->old_name); diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h index e72d8250d04b..a9b16e513656 100644 --- a/kernel/livepatch/patch.h +++ b/kernel/livepatch/patch.h @@ -10,7 +10,7 @@ * struct klp_ops - structure for tracking registered ftrace ops structs * * A single ftrace_ops is shared between all enabled replacement functions - * (klp_func structs) which have the same old_addr. This allows the switch + * (klp_func structs) which have the same old_func. This allows the switch * between function versions to happen instantaneously by updating the klp_ops * struct's func_stack list. The winner is the klp_func at the top of the * func_stack (front of the list). @@ -25,7 +25,7 @@ struct klp_ops { struct ftrace_ops fops; }; -struct klp_ops *klp_find_ops(unsigned long old_addr); +struct klp_ops *klp_find_ops(void *old_func); int klp_patch_object(struct klp_object *obj); void klp_unpatch_object(struct klp_object *obj); diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 304d5eb8a98c..f27a378ad5e1 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -224,11 +224,11 @@ static int klp_check_stack_func(struct klp_func *func, * Check for the to-be-patched function * (the previous func). */ - ops = klp_find_ops(func->old_addr); + ops = klp_find_ops(func->old_func); if (list_is_singular(&ops->func_stack)) { /* original function */ - func_addr = func->old_addr; + func_addr = (unsigned long)func->old_func; func_size = func->old_size; } else { /* previously patched function */ -- cgit From 0430f78bf38f9972f0cf0522709cc63d49fa164c Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:21 +0100 Subject: livepatch: Consolidate klp_free functions The code for freeing livepatch structures is a bit scattered and tricky: + direct calls to klp_free_*_limited() and kobject_put() are used to release partially initialized objects + klp_free_patch() removes the patch from the public list and releases all objects except for patch->kobj + object_put(&patch->kobj) and the related wait_for_completion() are called directly outside klp_mutex; this code is duplicated; Now, we are going to remove the registration stage to simplify the API and the code. This would require handling more situations in klp_enable_patch() error paths. More importantly, we are going to add a feature called atomic replace. It will need to dynamically create func and object structures. We will want to reuse the existing init() and free() functions. This would create even more error path scenarios. This patch implements more straightforward free functions: + checks kobj_added flag instead of @limit[*] + initializes patch->list early so that the check for empty list always works + The action(s) that has to be done outside klp_mutex are done in separate klp_free_patch_finish() function. It waits only when patch->kobj was really released via the _start() part. The patch does not change the existing behavior. [*] We need our own flag to track that the kobject was successfully added to the hierarchy. Note that kobj.state_initialized only indicates that kobject has been initialized, not whether is has been added (and needs to be removed on cleanup). Signed-off-by: Petr Mladek Cc: Josh Poimboeuf Cc: Miroslav Benes Cc: Jessica Yu Cc: Jiri Kosina Cc: Jason Baron Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 6 ++ kernel/livepatch/core.c | 137 +++++++++++++++++++++++++++++++--------------- 2 files changed, 98 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 634e13876380..6978785bc059 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -45,6 +45,7 @@ * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function * @new_size: size of the new function + * @kobj_added: @kobj has been added and needs freeing * @patched: the func has been added to the klp_ops list * @transition: the func is currently being applied or reverted * @@ -81,6 +82,7 @@ struct klp_func { struct kobject kobj; struct list_head stack_node; unsigned long old_size, new_size; + bool kobj_added; bool patched; bool transition; }; @@ -117,6 +119,7 @@ struct klp_callbacks { * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object * (NULL for vmlinux) + * @kobj_added: @kobj has been added and needs freeing * @patched: the object's funcs have been added to the klp_ops list */ struct klp_object { @@ -128,6 +131,7 @@ struct klp_object { /* internal */ struct kobject kobj; struct module *mod; + bool kobj_added; bool patched; }; @@ -137,6 +141,7 @@ struct klp_object { * @objs: object entries for kernel objects to be patched * @list: list node for global list of registered patches * @kobj: kobject for sysfs resources + * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) * @finish: for waiting till it is safe to remove the patch module */ @@ -148,6 +153,7 @@ struct klp_patch { /* internal */ struct list_head list; struct kobject kobj; + bool kobj_added; bool enabled; struct completion finish; }; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 20589da35194..6f0d9095f662 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -465,17 +465,15 @@ static struct kobj_type klp_ktype_func = { .sysfs_ops = &kobj_sysfs_ops, }; -/* - * Free all functions' kobjects in the array up to some limit. When limit is - * NULL, all kobjects are freed. - */ -static void klp_free_funcs_limited(struct klp_object *obj, - struct klp_func *limit) +static void klp_free_funcs(struct klp_object *obj) { struct klp_func *func; - for (func = obj->funcs; func->old_name && func != limit; func++) - kobject_put(&func->kobj); + klp_for_each_func(obj, func) { + /* Might be called from klp_init_patch() error path. */ + if (func->kobj_added) + kobject_put(&func->kobj); + } } /* Clean up when a patched object is unloaded */ @@ -489,30 +487,60 @@ static void klp_free_object_loaded(struct klp_object *obj) func->old_func = NULL; } -/* - * Free all objects' kobjects in the array up to some limit. When limit is - * NULL, all kobjects are freed. - */ -static void klp_free_objects_limited(struct klp_patch *patch, - struct klp_object *limit) +static void klp_free_objects(struct klp_patch *patch) { struct klp_object *obj; - for (obj = patch->objs; obj->funcs && obj != limit; obj++) { - klp_free_funcs_limited(obj, NULL); - kobject_put(&obj->kobj); + klp_for_each_object(patch, obj) { + klp_free_funcs(obj); + + /* Might be called from klp_init_patch() error path. */ + if (obj->kobj_added) + kobject_put(&obj->kobj); } } -static void klp_free_patch(struct klp_patch *patch) +/* + * This function implements the free operations that can be called safely + * under klp_mutex. + * + * The operation must be completed by calling klp_free_patch_finish() + * outside klp_mutex. + */ +static void klp_free_patch_start(struct klp_patch *patch) { - klp_free_objects_limited(patch, NULL); if (!list_empty(&patch->list)) list_del(&patch->list); + + klp_free_objects(patch); +} + +/* + * This function implements the free part that must be called outside + * klp_mutex. + * + * It must be called after klp_free_patch_start(). And it has to be + * the last function accessing the livepatch structures when the patch + * gets disabled. + */ +static void klp_free_patch_finish(struct klp_patch *patch) +{ + /* + * Avoid deadlock with enabled_store() sysfs callback by + * calling this outside klp_mutex. It is safe because + * this is called when the patch gets disabled and it + * cannot get enabled again. + */ + if (patch->kobj_added) { + kobject_put(&patch->kobj); + wait_for_completion(&patch->finish); + } } static int klp_init_func(struct klp_object *obj, struct klp_func *func) { + int ret; + if (!func->old_name || !func->new_func) return -EINVAL; @@ -528,9 +556,13 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) * object. If the user selects 0 for old_sympos, then 1 will be used * since a unique symbol will be the first occurrence. */ - return kobject_init_and_add(&func->kobj, &klp_ktype_func, - &obj->kobj, "%s,%lu", func->old_name, - func->old_sympos ? func->old_sympos : 1); + ret = kobject_init_and_add(&func->kobj, &klp_ktype_func, + &obj->kobj, "%s,%lu", func->old_name, + func->old_sympos ? func->old_sympos : 1); + if (!ret) + func->kobj_added = true; + + return ret; } /* Arches may override this to finish any remaining arch-specific tasks */ @@ -589,9 +621,6 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) int ret; const char *name; - if (!obj->funcs) - return -EINVAL; - if (klp_is_module(obj) && strlen(obj->name) >= MODULE_NAME_LEN) return -EINVAL; @@ -605,46 +634,66 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) &patch->kobj, "%s", name); if (ret) return ret; + obj->kobj_added = true; klp_for_each_func(obj, func) { ret = klp_init_func(obj, func); if (ret) - goto free; + return ret; } - if (klp_is_object_loaded(obj)) { + if (klp_is_object_loaded(obj)) ret = klp_init_object_loaded(patch, obj); - if (ret) - goto free; - } - - return 0; -free: - klp_free_funcs_limited(obj, func); - kobject_put(&obj->kobj); return ret; } -static int klp_init_patch(struct klp_patch *patch) +static int klp_init_patch_early(struct klp_patch *patch) { struct klp_object *obj; - int ret; + struct klp_func *func; if (!patch->objs) return -EINVAL; - mutex_lock(&klp_mutex); - + INIT_LIST_HEAD(&patch->list); + patch->kobj_added = false; patch->enabled = false; init_completion(&patch->finish); + klp_for_each_object(patch, obj) { + if (!obj->funcs) + return -EINVAL; + + obj->kobj_added = false; + + klp_for_each_func(obj, func) + func->kobj_added = false; + } + + return 0; +} + +static int klp_init_patch(struct klp_patch *patch) +{ + struct klp_object *obj; + int ret; + + mutex_lock(&klp_mutex); + + ret = klp_init_patch_early(patch); + if (ret) { + mutex_unlock(&klp_mutex); + return ret; + } + ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, klp_root_kobj, "%s", patch->mod->name); if (ret) { mutex_unlock(&klp_mutex); return ret; } + patch->kobj_added = true; klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); @@ -659,12 +708,11 @@ static int klp_init_patch(struct klp_patch *patch) return 0; free: - klp_free_objects_limited(patch, obj); + klp_free_patch_start(patch); mutex_unlock(&klp_mutex); - kobject_put(&patch->kobj); - wait_for_completion(&patch->finish); + klp_free_patch_finish(patch); return ret; } @@ -693,12 +741,11 @@ int klp_unregister_patch(struct klp_patch *patch) goto err; } - klp_free_patch(patch); + klp_free_patch_start(patch); mutex_unlock(&klp_mutex); - kobject_put(&patch->kobj); - wait_for_completion(&patch->finish); + klp_free_patch_finish(patch); return 0; err: -- cgit From 68007289bf3cd937a5b8fc4987d2787167bd06ca Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:22 +0100 Subject: livepatch: Don't block the removal of patches loaded after a forced transition module_put() is currently never called in klp_complete_transition() when klp_force is set. As a result, we might keep the reference count even when klp_enable_patch() fails and klp_cancel_transition() is called. This might give the impression that a module might get blocked in some strange init state. Fortunately, it is not the case. The reference count is ignored when mod->init fails and erroneous modules are always removed. Anyway, this might be confusing. Instead, this patch moves the global klp_forced flag into struct klp_patch. As a result, we block only modules that might still be in use after a forced transition. Newly loaded livepatches might be eventually completely removed later. It is not a big deal. But the code is at least consistent with the reality. Signed-off-by: Petr Mladek Acked-by: Joe Lawrence Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 2 ++ kernel/livepatch/core.c | 4 +++- kernel/livepatch/core.h | 1 + kernel/livepatch/transition.c | 10 +++++----- 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 6978785bc059..6a9165d9b090 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -143,6 +143,7 @@ struct klp_object { * @kobj: kobject for sysfs resources * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) + * @forced: was involved in a forced transition * @finish: for waiting till it is safe to remove the patch module */ struct klp_patch { @@ -155,6 +156,7 @@ struct klp_patch { struct kobject kobj; bool kobj_added; bool enabled; + bool forced; struct completion finish; }; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 6f0d9095f662..e77c5017ae0c 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -45,7 +45,8 @@ */ DEFINE_MUTEX(klp_mutex); -static LIST_HEAD(klp_patches); +/* Registered patches */ +LIST_HEAD(klp_patches); static struct kobject *klp_root_kobj; @@ -659,6 +660,7 @@ static int klp_init_patch_early(struct klp_patch *patch) INIT_LIST_HEAD(&patch->list); patch->kobj_added = false; patch->enabled = false; + patch->forced = false; init_completion(&patch->finish); klp_for_each_object(patch, obj) { diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h index 48a83d4364cf..d0cb5390e247 100644 --- a/kernel/livepatch/core.h +++ b/kernel/livepatch/core.h @@ -5,6 +5,7 @@ #include extern struct mutex klp_mutex; +extern struct list_head klp_patches; static inline bool klp_is_object_loaded(struct klp_object *obj) { diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index f27a378ad5e1..a4c921364003 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -33,8 +33,6 @@ struct klp_patch *klp_transition_patch; static int klp_target_state = KLP_UNDEFINED; -static bool klp_forced = false; - /* * This work can be performed periodically to finish patching or unpatching any * "straggler" tasks which failed to transition in the first attempt. @@ -137,10 +135,10 @@ static void klp_complete_transition(void) klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); /* - * klp_forced set implies unbounded increase of module's ref count if + * patch->forced set implies unbounded increase of module's ref count if * the module is disabled/enabled in a loop. */ - if (!klp_forced && klp_target_state == KLP_UNPATCHED) + if (!klp_transition_patch->forced && klp_target_state == KLP_UNPATCHED) module_put(klp_transition_patch->mod); klp_target_state = KLP_UNDEFINED; @@ -620,6 +618,7 @@ void klp_send_signals(void) */ void klp_force_transition(void) { + struct klp_patch *patch; struct task_struct *g, *task; unsigned int cpu; @@ -633,5 +632,6 @@ void klp_force_transition(void) for_each_possible_cpu(cpu) klp_update_patch_state(idle_task(cpu)); - klp_forced = true; + list_for_each_entry(patch, &klp_patches, list) + patch->forced = true; } -- cgit From 958ef1e39d24d6cb8bf2a7406130a98c9564230f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:23 +0100 Subject: livepatch: Simplify API by removing registration step The possibility to re-enable a registered patch was useful for immediate patches where the livepatch module had to stay until the system reboot. The improved consistency model allows to achieve the same result by unloading and loading the livepatch module again. Also we are going to add a feature called atomic replace. It will allow to create a patch that would replace all already registered patches. The aim is to handle dependent patches more securely. It will obsolete the stack of patches that helped to handle the dependencies so far. Then it might be unclear when a cumulative patch re-enabling is safe. It would be complicated to support the many modes. Instead we could actually make the API and code easier to understand. Therefore, remove the two step public API. All the checks and init calls are moved from klp_register_patch() to klp_enabled_patch(). Also the patch is automatically freed, including the sysfs interface when the transition to the disabled state is completed. As a result, there is never a disabled patch on the top of the stack. Therefore we do not need to check the stack in __klp_enable_patch(). And we could simplify the check in __klp_disable_patch(). Also the API and logic is much easier. It is enough to call klp_enable_patch() in module_init() call. The patch can be disabled by writing '0' into /sys/kernel/livepatch//enabled. Then the module can be removed once the transition finishes and sysfs interface is freed. The only problem is how to free the structures and kobjects safely. The operation is triggered from the sysfs interface. We could not put the related kobject from there because it would cause lock inversion between klp_mutex and kernfs locks, see kn->count lockdep map. Therefore, offload the free task to a workqueue. It is perfectly fine: + The patch can no longer be used in the livepatch operations. + The module could not be removed until the free operation finishes and module_put() is called. + The operation is asynchronous already when the first klp_try_complete_transition() fails and another call is queued with a delay. Suggested-by: Josh Poimboeuf Signed-off-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- Documentation/livepatch/livepatch.txt | 135 +++++-------- include/linux/livepatch.h | 7 +- kernel/livepatch/core.c | 280 +++++++++------------------ kernel/livepatch/core.h | 2 + kernel/livepatch/transition.c | 19 +- samples/livepatch/livepatch-callbacks-demo.c | 13 +- samples/livepatch/livepatch-sample.c | 13 +- samples/livepatch/livepatch-shadow-fix1.c | 14 +- samples/livepatch/livepatch-shadow-fix2.c | 14 +- 9 files changed, 168 insertions(+), 329 deletions(-) (limited to 'include/linux') diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt index 2d7ed09dbd59..8f56490a4bb6 100644 --- a/Documentation/livepatch/livepatch.txt +++ b/Documentation/livepatch/livepatch.txt @@ -12,12 +12,11 @@ Table of Contents: 4. Livepatch module 4.1. New functions 4.2. Metadata - 4.3. Livepatch module handling 5. Livepatch life-cycle - 5.1. Registration + 5.1. Loading 5.2. Enabling 5.3. Disabling - 5.4. Unregistration + 5.4. Removing 6. Sysfs 7. Limitations @@ -298,117 +297,89 @@ into three levels: see the "Consistency model" section. -4.3. Livepatch module handling ------------------------------- - -The usual behavior is that the new functions will get used when -the livepatch module is loaded. For this, the module init() function -has to register the patch (struct klp_patch) and enable it. See the -section "Livepatch life-cycle" below for more details about these -two operations. - -Module removal is only safe when there are no users of the underlying -functions. This is the reason why the force feature permanently disables -the removal. The forced tasks entered the functions but we cannot say -that they returned back. Therefore it cannot be decided when the -livepatch module can be safely removed. When the system is successfully -transitioned to a new patch state (patched/unpatched) without being -forced it is guaranteed that no task sleeps or runs in the old code. - - 5. Livepatch life-cycle ======================= -Livepatching defines four basic operations that define the life cycle of each -live patch: registration, enabling, disabling and unregistration. There are -several reasons why it is done this way. - -First, the patch is applied only when all patched symbols for already -loaded objects are found. The error handling is much easier if this -check is done before particular functions get redirected. +Livepatching can be described by four basic operations: +loading, enabling, disabling, removing. -Second, it might take some time until the entire system is migrated with -the hybrid consistency model being used. The patch revert might block -the livepatch module removal for too long. Therefore it is useful to -revert the patch using a separate operation that might be called -explicitly. But it does not make sense to remove all information until -the livepatch module is really removed. +5.1. Loading +------------ -5.1. Registration ------------------ +The only reasonable way is to enable the patch when the livepatch kernel +module is being loaded. For this, klp_enable_patch() has to be called +in the module_init() callback. There are two main reasons: -Each patch first has to be registered using klp_register_patch(). This makes -the patch known to the livepatch framework. Also it does some preliminary -computing and checks. +First, only the module has an easy access to the related struct klp_patch. -In particular, the patch is added into the list of known patches. The -addresses of the patched functions are found according to their names. -The special relocations, mentioned in the section "New functions", are -applied. The relevant entries are created under -/sys/kernel/livepatch/. The patch is rejected when any operation -fails. +Second, the error code might be used to refuse loading the module when +the patch cannot get enabled. 5.2. Enabling ------------- -Registered patches might be enabled either by calling klp_enable_patch() or -by writing '1' to /sys/kernel/livepatch//enabled. The system will -start using the new implementation of the patched functions at this stage. +The livepatch gets enabled by calling klp_enable_patch() from +the module_init() callback. The system will start using the new +implementation of the patched functions at this stage. -When a patch is enabled, livepatch enters into a transition state where -tasks are converging to the patched state. This is indicated by a value -of '1' in /sys/kernel/livepatch//transition. Once all tasks have -been patched, the 'transition' value changes to '0'. For more -information about this process, see the "Consistency model" section. +First, the addresses of the patched functions are found according to their +names. The special relocations, mentioned in the section "New functions", +are applied. The relevant entries are created under +/sys/kernel/livepatch/. The patch is rejected when any above +operation fails. -If an original function is patched for the first time, a function -specific struct klp_ops is created and an universal ftrace handler is -registered. +Second, livepatch enters into a transition state where tasks are converging +to the patched state. If an original function is patched for the first +time, a function specific struct klp_ops is created and an universal +ftrace handler is registered[*]. This stage is indicated by a value of '1' +in /sys/kernel/livepatch//transition. For more information about +this process, see the "Consistency model" section. -Functions might be patched multiple times. The ftrace handler is registered -only once for the given function. Further patches just add an entry to the -list (see field `func_stack`) of the struct klp_ops. The last added -entry is chosen by the ftrace handler and becomes the active function -replacement. +Finally, once all tasks have been patched, the 'transition' value changes +to '0'. -Note that the patches might be enabled in a different order than they were -registered. +[*] Note that functions might be patched multiple times. The ftrace handler + is registered only once for a given function. Further patches just add + an entry to the list (see field `func_stack`) of the struct klp_ops. + The right implementation is selected by the ftrace handler, see + the "Consistency model" section. 5.3. Disabling -------------- -Enabled patches might get disabled either by calling klp_disable_patch() or -by writing '0' to /sys/kernel/livepatch//enabled. At this stage -either the code from the previously enabled patch or even the original -code gets used. +Enabled patches might get disabled by writing '0' to +/sys/kernel/livepatch//enabled. -When a patch is disabled, livepatch enters into a transition state where -tasks are converging to the unpatched state. This is indicated by a -value of '1' in /sys/kernel/livepatch//transition. Once all tasks -have been unpatched, the 'transition' value changes to '0'. For more -information about this process, see the "Consistency model" section. +First, livepatch enters into a transition state where tasks are converging +to the unpatched state. The system starts using either the code from +the previously enabled patch or even the original one. This stage is +indicated by a value of '1' in /sys/kernel/livepatch//transition. +For more information about this process, see the "Consistency model" +section. -Here all the functions (struct klp_func) associated with the to-be-disabled +Second, once all tasks have been unpatched, the 'transition' value changes +to '0'. All the functions (struct klp_func) associated with the to-be-disabled patch are removed from the corresponding struct klp_ops. The ftrace handler is unregistered and the struct klp_ops is freed when the func_stack list becomes empty. -Patches must be disabled in exactly the reverse order in which they were -enabled. It makes the problem and the implementation much easier. +Third, the sysfs interface is destroyed. +Note that patches must be disabled in exactly the reverse order in which +they were enabled. It makes the problem and the implementation much easier. -5.4. Unregistration -------------------- -Disabled patches might be unregistered by calling klp_unregister_patch(). -This can be done only when the patch is disabled and the code is no longer -used. It must be called before the livepatch module gets unloaded. +5.4. Removing +------------- -At this stage, all the relevant sys-fs entries are removed and the patch -is removed from the list of known patches. +Module removal is only safe when there are no users of functions provided +by the module. This is the reason why the force feature permanently +disables the removal. Only when the system is successfully transitioned +to a new patch state (patched/unpatched) without being forced it is +guaranteed that no task sleeps or runs in the old code. 6. Sysfs diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 6a9165d9b090..8f9c19c69744 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -139,11 +139,12 @@ struct klp_object { * struct klp_patch - patch structure for live patching * @mod: reference to the live patch module * @objs: object entries for kernel objects to be patched - * @list: list node for global list of registered patches + * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) * @forced: was involved in a forced transition + * @free_work: patch cleanup from workqueue-context * @finish: for waiting till it is safe to remove the patch module */ struct klp_patch { @@ -157,6 +158,7 @@ struct klp_patch { bool kobj_added; bool enabled; bool forced; + struct work_struct free_work; struct completion finish; }; @@ -168,10 +170,7 @@ struct klp_patch { func->old_name || func->new_func || func->old_sympos; \ func++) -int klp_register_patch(struct klp_patch *); -int klp_unregister_patch(struct klp_patch *); int klp_enable_patch(struct klp_patch *); -int klp_disable_patch(struct klp_patch *); void arch_klp_init_object_loaded(struct klp_patch *patch, struct klp_object *obj); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index e77c5017ae0c..bd41b03a72d5 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -45,7 +45,11 @@ */ DEFINE_MUTEX(klp_mutex); -/* Registered patches */ +/* + * Actively used patches: enabled or in transition. Note that replaced + * or disabled patches are not listed even though the related kernel + * module still can be loaded. + */ LIST_HEAD(klp_patches); static struct kobject *klp_root_kobj; @@ -83,17 +87,6 @@ static void klp_find_object_module(struct klp_object *obj) mutex_unlock(&module_mutex); } -static bool klp_is_patch_registered(struct klp_patch *patch) -{ - struct klp_patch *mypatch; - - list_for_each_entry(mypatch, &klp_patches, list) - if (mypatch == patch) - return true; - - return false; -} - static bool klp_initialized(void) { return !!klp_root_kobj; @@ -292,7 +285,6 @@ static int klp_write_object_relocations(struct module *pmod, * /sys/kernel/livepatch/// */ static int __klp_disable_patch(struct klp_patch *patch); -static int __klp_enable_patch(struct klp_patch *patch); static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -309,40 +301,32 @@ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, mutex_lock(&klp_mutex); - if (!klp_is_patch_registered(patch)) { - /* - * Module with the patch could either disappear meanwhile or is - * not properly initialized yet. - */ - ret = -EINVAL; - goto err; - } - if (patch->enabled == enabled) { /* already in requested state */ ret = -EINVAL; - goto err; + goto out; } - if (patch == klp_transition_patch) { + /* + * Allow to reverse a pending transition in both ways. It might be + * necessary to complete the transition without forcing and breaking + * the system integrity. + * + * Do not allow to re-enable a disabled patch. + */ + if (patch == klp_transition_patch) klp_reverse_transition(); - } else if (enabled) { - ret = __klp_enable_patch(patch); - if (ret) - goto err; - } else { + else if (!enabled) ret = __klp_disable_patch(patch); - if (ret) - goto err; - } + else + ret = -EINVAL; +out: mutex_unlock(&klp_mutex); + if (ret) + return ret; return count; - -err: - mutex_unlock(&klp_mutex); - return ret; } static ssize_t enabled_show(struct kobject *kobj, @@ -508,7 +492,7 @@ static void klp_free_objects(struct klp_patch *patch) * The operation must be completed by calling klp_free_patch_finish() * outside klp_mutex. */ -static void klp_free_patch_start(struct klp_patch *patch) +void klp_free_patch_start(struct klp_patch *patch) { if (!list_empty(&patch->list)) list_del(&patch->list); @@ -536,6 +520,23 @@ static void klp_free_patch_finish(struct klp_patch *patch) kobject_put(&patch->kobj); wait_for_completion(&patch->finish); } + + /* Put the module after the last access to struct klp_patch. */ + if (!patch->forced) + module_put(patch->mod); +} + +/* + * The livepatch might be freed from sysfs interface created by the patch. + * This work allows to wait until the interface is destroyed in a separate + * context. + */ +static void klp_free_patch_work_fn(struct work_struct *work) +{ + struct klp_patch *patch = + container_of(work, struct klp_patch, free_work); + + klp_free_patch_finish(patch); } static int klp_init_func(struct klp_object *obj, struct klp_func *func) @@ -661,6 +662,7 @@ static int klp_init_patch_early(struct klp_patch *patch) patch->kobj_added = false; patch->enabled = false; patch->forced = false; + INIT_WORK(&patch->free_work, klp_free_patch_work_fn); init_completion(&patch->finish); klp_for_each_object(patch, obj) { @@ -673,6 +675,9 @@ static int klp_init_patch_early(struct klp_patch *patch) func->kobj_added = false; } + if (!try_module_get(patch->mod)) + return -ENODEV; + return 0; } @@ -681,115 +686,22 @@ static int klp_init_patch(struct klp_patch *patch) struct klp_object *obj; int ret; - mutex_lock(&klp_mutex); - - ret = klp_init_patch_early(patch); - if (ret) { - mutex_unlock(&klp_mutex); - return ret; - } - ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, klp_root_kobj, "%s", patch->mod->name); - if (ret) { - mutex_unlock(&klp_mutex); + if (ret) return ret; - } patch->kobj_added = true; klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); if (ret) - goto free; + return ret; } list_add_tail(&patch->list, &klp_patches); - mutex_unlock(&klp_mutex); - - return 0; - -free: - klp_free_patch_start(patch); - - mutex_unlock(&klp_mutex); - - klp_free_patch_finish(patch); - - return ret; -} - -/** - * klp_unregister_patch() - unregisters a patch - * @patch: Disabled patch to be unregistered - * - * Frees the data structures and removes the sysfs interface. - * - * Return: 0 on success, otherwise error - */ -int klp_unregister_patch(struct klp_patch *patch) -{ - int ret; - - mutex_lock(&klp_mutex); - - if (!klp_is_patch_registered(patch)) { - ret = -EINVAL; - goto err; - } - - if (patch->enabled) { - ret = -EBUSY; - goto err; - } - - klp_free_patch_start(patch); - - mutex_unlock(&klp_mutex); - - klp_free_patch_finish(patch); - return 0; -err: - mutex_unlock(&klp_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(klp_unregister_patch); - -/** - * klp_register_patch() - registers a patch - * @patch: Patch to be registered - * - * Initializes the data structure associated with the patch and - * creates the sysfs interface. - * - * There is no need to take the reference on the patch module here. It is done - * later when the patch is enabled. - * - * Return: 0 on success, otherwise error - */ -int klp_register_patch(struct klp_patch *patch) -{ - if (!patch || !patch->mod) - return -EINVAL; - - if (!is_livepatch_module(patch->mod)) { - pr_err("module %s is not marked as a livepatch module\n", - patch->mod->name); - return -EINVAL; - } - - if (!klp_initialized()) - return -ENODEV; - - if (!klp_have_reliable_stack()) { - pr_err("This architecture doesn't have support for the livepatch consistency model.\n"); - return -ENOSYS; - } - - return klp_init_patch(patch); } -EXPORT_SYMBOL_GPL(klp_register_patch); static int __klp_disable_patch(struct klp_patch *patch) { @@ -802,8 +714,7 @@ static int __klp_disable_patch(struct klp_patch *patch) return -EBUSY; /* enforce stacking: only the last enabled patch can be disabled */ - if (!list_is_last(&patch->list, &klp_patches) && - list_next_entry(patch, list)->enabled) + if (!list_is_last(&patch->list, &klp_patches)) return -EBUSY; klp_init_transition(patch, KLP_UNPATCHED); @@ -822,44 +733,12 @@ static int __klp_disable_patch(struct klp_patch *patch) smp_wmb(); klp_start_transition(); - klp_try_complete_transition(); patch->enabled = false; + klp_try_complete_transition(); return 0; } -/** - * klp_disable_patch() - disables a registered patch - * @patch: The registered, enabled patch to be disabled - * - * Unregisters the patched functions from ftrace. - * - * Return: 0 on success, otherwise error - */ -int klp_disable_patch(struct klp_patch *patch) -{ - int ret; - - mutex_lock(&klp_mutex); - - if (!klp_is_patch_registered(patch)) { - ret = -EINVAL; - goto err; - } - - if (!patch->enabled) { - ret = -EINVAL; - goto err; - } - - ret = __klp_disable_patch(patch); - -err: - mutex_unlock(&klp_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(klp_disable_patch); - static int __klp_enable_patch(struct klp_patch *patch) { struct klp_object *obj; @@ -871,17 +750,8 @@ static int __klp_enable_patch(struct klp_patch *patch) if (WARN_ON(patch->enabled)) return -EINVAL; - /* enforce stacking: only the first disabled patch can be enabled */ - if (patch->list.prev != &klp_patches && - !list_prev_entry(patch, list)->enabled) - return -EBUSY; - - /* - * A reference is taken on the patch module to prevent it from being - * unloaded. - */ - if (!try_module_get(patch->mod)) - return -ENODEV; + if (!patch->kobj_added) + return -EINVAL; pr_notice("enabling patch '%s'\n", patch->mod->name); @@ -916,8 +786,8 @@ static int __klp_enable_patch(struct klp_patch *patch) } klp_start_transition(); - klp_try_complete_transition(); patch->enabled = true; + klp_try_complete_transition(); return 0; err: @@ -928,11 +798,15 @@ err: } /** - * klp_enable_patch() - enables a registered patch - * @patch: The registered, disabled patch to be enabled + * klp_enable_patch() - enable the livepatch + * @patch: patch to be enabled * - * Performs the needed symbol lookups and code relocations, - * then registers the patched functions with ftrace. + * Initializes the data structure associated with the patch, creates the sysfs + * interface, performs the needed symbol lookups and code relocations, + * registers the patched functions with ftrace. + * + * This function is supposed to be called from the livepatch module_init() + * callback. * * Return: 0 on success, otherwise error */ @@ -940,17 +814,51 @@ int klp_enable_patch(struct klp_patch *patch) { int ret; + if (!patch || !patch->mod) + return -EINVAL; + + if (!is_livepatch_module(patch->mod)) { + pr_err("module %s is not marked as a livepatch module\n", + patch->mod->name); + return -EINVAL; + } + + if (!klp_initialized()) + return -ENODEV; + + if (!klp_have_reliable_stack()) { + pr_err("This architecture doesn't have support for the livepatch consistency model.\n"); + return -ENOSYS; + } + + mutex_lock(&klp_mutex); - if (!klp_is_patch_registered(patch)) { - ret = -EINVAL; - goto err; + ret = klp_init_patch_early(patch); + if (ret) { + mutex_unlock(&klp_mutex); + return ret; } + ret = klp_init_patch(patch); + if (ret) + goto err; + ret = __klp_enable_patch(patch); + if (ret) + goto err; + + mutex_unlock(&klp_mutex); + + return 0; err: + klp_free_patch_start(patch); + mutex_unlock(&klp_mutex); + + klp_free_patch_finish(patch); + return ret; } EXPORT_SYMBOL_GPL(klp_enable_patch); diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h index d0cb5390e247..d4eefc520c08 100644 --- a/kernel/livepatch/core.h +++ b/kernel/livepatch/core.h @@ -7,6 +7,8 @@ extern struct mutex klp_mutex; extern struct list_head klp_patches; +void klp_free_patch_start(struct klp_patch *patch); + static inline bool klp_is_object_loaded(struct klp_object *obj) { return !obj->name || obj->mod; diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index a4c921364003..c9917a24b3a4 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -134,13 +134,6 @@ static void klp_complete_transition(void) pr_notice("'%s': %s complete\n", klp_transition_patch->mod->name, klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); - /* - * patch->forced set implies unbounded increase of module's ref count if - * the module is disabled/enabled in a loop. - */ - if (!klp_transition_patch->forced && klp_target_state == KLP_UNPATCHED) - module_put(klp_transition_patch->mod); - klp_target_state = KLP_UNDEFINED; klp_transition_patch = NULL; } @@ -357,6 +350,7 @@ void klp_try_complete_transition(void) { unsigned int cpu; struct task_struct *g, *task; + struct klp_patch *patch; bool complete = true; WARN_ON_ONCE(klp_target_state == KLP_UNDEFINED); @@ -405,7 +399,18 @@ void klp_try_complete_transition(void) } /* we're done, now cleanup the data structures */ + patch = klp_transition_patch; klp_complete_transition(); + + /* + * It would make more sense to free the patch in + * klp_complete_transition() but it is called also + * from klp_cancel_transition(). + */ + if (!patch->enabled) { + klp_free_patch_start(patch); + schedule_work(&patch->free_work); + } } /* diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c index 72f9e6d1387b..62d97953ad02 100644 --- a/samples/livepatch/livepatch-callbacks-demo.c +++ b/samples/livepatch/livepatch-callbacks-demo.c @@ -195,22 +195,11 @@ static struct klp_patch patch = { static int livepatch_callbacks_demo_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_callbacks_demo_exit(void) { - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_callbacks_demo_init); diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index 2d554dd930e2..01c9cf003ca2 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -69,22 +69,11 @@ static struct klp_patch patch = { static int livepatch_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_exit(void) { - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_init); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index e8f1bd6b29b1..a5a5cac21d4d 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -157,25 +157,13 @@ static struct klp_patch patch = { static int livepatch_shadow_fix1_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_shadow_fix1_exit(void) { /* Cleanup any existing SV_LEAK shadow variables */ klp_shadow_free_all(SV_LEAK, livepatch_fix1_dummy_leak_dtor); - - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_shadow_fix1_init); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index b34c7bf83356..52de947b5526 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -129,25 +129,13 @@ static struct klp_patch patch = { static int livepatch_shadow_fix2_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_shadow_fix2_exit(void) { /* Cleanup any existing SV_COUNTER shadow variables */ klp_shadow_free_all(SV_COUNTER, NULL); - - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_shadow_fix2_init); -- cgit From 20e55025958e18e671d92c7adea00c301ac93c43 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 9 Jan 2019 13:43:24 +0100 Subject: livepatch: Use lists to manage patches, objects and functions Currently klp_patch contains a pointer to a statically allocated array of struct klp_object and struct klp_objects contains a pointer to a statically allocated array of klp_func. In order to allow for the dynamic allocation of objects and functions, link klp_patch, klp_object, and klp_func together via linked lists. This allows us to more easily allocate new objects and functions, while having the iterator be a simple linked list walk. The static structures are added to the lists early. It allows to add the dynamically allocated objects before klp_init_object() and klp_init_func() calls. Therefore it reduces the further changes to the code. This patch does not change the existing behavior. Signed-off-by: Jason Baron [pmladek@suse.com: Initialize lists before init calls] Signed-off-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Joe Lawrence Cc: Josh Poimboeuf Cc: Jiri Kosina Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 19 +++++++++++++++++-- kernel/livepatch/core.c | 9 +++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 8f9c19c69744..e117e20ff771 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -24,6 +24,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_LIVEPATCH) @@ -42,6 +43,7 @@ * can be found (optional) * @old_func: pointer to the function being patched * @kobj: kobject for sysfs resources + * @node: list node for klp_object func_list * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function * @new_size: size of the new function @@ -80,6 +82,7 @@ struct klp_func { /* internal */ void *old_func; struct kobject kobj; + struct list_head node; struct list_head stack_node; unsigned long old_size, new_size; bool kobj_added; @@ -117,6 +120,8 @@ struct klp_callbacks { * @funcs: function entries for functions to be patched in the object * @callbacks: functions to be executed pre/post (un)patching * @kobj: kobject for sysfs resources + * @func_list: dynamic list of the function entries + * @node: list node for klp_patch obj_list * @mod: kernel module associated with the patched object * (NULL for vmlinux) * @kobj_added: @kobj has been added and needs freeing @@ -130,6 +135,8 @@ struct klp_object { /* internal */ struct kobject kobj; + struct list_head func_list; + struct list_head node; struct module *mod; bool kobj_added; bool patched; @@ -141,6 +148,7 @@ struct klp_object { * @objs: object entries for kernel objects to be patched * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources + * @obj_list: dynamic list of the object entries * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) * @forced: was involved in a forced transition @@ -155,6 +163,7 @@ struct klp_patch { /* internal */ struct list_head list; struct kobject kobj; + struct list_head obj_list; bool kobj_added; bool enabled; bool forced; @@ -162,14 +171,20 @@ struct klp_patch { struct completion finish; }; -#define klp_for_each_object(patch, obj) \ +#define klp_for_each_object_static(patch, obj) \ for (obj = patch->objs; obj->funcs || obj->name; obj++) -#define klp_for_each_func(obj, func) \ +#define klp_for_each_object(patch, obj) \ + list_for_each_entry(obj, &patch->obj_list, node) + +#define klp_for_each_func_static(obj, func) \ for (func = obj->funcs; \ func->old_name || func->new_func || func->old_sympos; \ func++) +#define klp_for_each_func(obj, func) \ + list_for_each_entry(func, &obj->func_list, node) + int klp_enable_patch(struct klp_patch *); void arch_klp_init_object_loaded(struct klp_patch *patch, diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index bd41b03a72d5..37d0d3645fa6 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -659,20 +659,25 @@ static int klp_init_patch_early(struct klp_patch *patch) return -EINVAL; INIT_LIST_HEAD(&patch->list); + INIT_LIST_HEAD(&patch->obj_list); patch->kobj_added = false; patch->enabled = false; patch->forced = false; INIT_WORK(&patch->free_work, klp_free_patch_work_fn); init_completion(&patch->finish); - klp_for_each_object(patch, obj) { + klp_for_each_object_static(patch, obj) { if (!obj->funcs) return -EINVAL; + INIT_LIST_HEAD(&obj->func_list); obj->kobj_added = false; + list_add_tail(&obj->node, &patch->obj_list); - klp_for_each_func(obj, func) + klp_for_each_func_static(obj, func) { func->kobj_added = false; + list_add_tail(&func->node, &obj->func_list); + } } if (!try_module_get(patch->mod)) -- cgit From e1452b607c48c642caf57299f4da83aa002f8533 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 9 Jan 2019 13:43:25 +0100 Subject: livepatch: Add atomic replace Sometimes we would like to revert a particular fix. Currently, this is not easy because we want to keep all other fixes active and we could revert only the last applied patch. One solution would be to apply new patch that implemented all the reverted functions like in the original code. It would work as expected but there will be unnecessary redirections. In addition, it would also require knowing which functions need to be reverted at build time. Another problem is when there are many patches that touch the same functions. There might be dependencies between patches that are not enforced on the kernel side. Also it might be pretty hard to actually prepare the patch and ensure compatibility with the other patches. Atomic replace && cumulative patches: A better solution would be to create cumulative patch and say that it replaces all older ones. This patch adds a new "replace" flag to struct klp_patch. When it is enabled, a set of 'nop' klp_func will be dynamically created for all functions that are already being patched but that will no longer be modified by the new patch. They are used as a new target during the patch transition. The idea is to handle Nops' structures like the static ones. When the dynamic structures are allocated, we initialize all values that are normally statically defined. The only exception is "new_func" in struct klp_func. It has to point to the original function and the address is known only when the object (module) is loaded. Note that we really need to set it. The address is used, for example, in klp_check_stack_func(). Nevertheless we still need to distinguish the dynamically allocated structures in some operations. For this, we add "nop" flag into struct klp_func and "dynamic" flag into struct klp_object. They need special handling in the following situations: + The structures are added into the lists of objects and functions immediately. In fact, the lists were created for this purpose. + The address of the original function is known only when the patched object (module) is loaded. Therefore it is copied later in klp_init_object_loaded(). + The ftrace handler must not set PC to func->new_func. It would cause infinite loop because the address points back to the beginning of the original function. + The various free() functions must free the structure itself. Note that other ways to detect the dynamic structures are not considered safe. For example, even the statically defined struct klp_object might include empty funcs array. It might be there just to run some callbacks. Also note that the safe iterator must be used in the free() functions. Otherwise already freed structures might get accessed. Special callbacks handling: The callbacks from the replaced patches are _not_ called by intention. It would be pretty hard to define a reasonable semantic and implement it. It might even be counter-productive. The new patch is cumulative. It is supposed to include most of the changes from older patches. In most cases, it will not want to call pre_unpatch() post_unpatch() callbacks from the replaced patches. It would disable/break things for no good reasons. Also it should be easier to handle various scenarios in a single script in the new patch than think about interactions caused by running many scripts from older patches. Not to say that the old scripts even would not expect to be called in this situation. Removing replaced patches: One nice effect of the cumulative patches is that the code from the older patches is no longer used. Therefore the replaced patches can be removed. It has several advantages: + Nops' structs will no longer be necessary and might be removed. This would save memory, restore performance (no ftrace handler), allow clear view on what is really patched. + Disabling the patch will cause using the original code everywhere. Therefore the livepatch callbacks could handle only one scenario. Note that the complication is already complex enough when the patch gets enabled. It is currently solved by calling callbacks only from the new cumulative patch. + The state is clean in both the sysfs interface and lsmod. The modules with the replaced livepatches might even get removed from the system. Some people actually expected this behavior from the beginning. After all a cumulative patch is supposed to "completely" replace an existing one. It is like when a new version of an application replaces an older one. This patch does the first step. It removes the replaced patches from the list of patches. It is safe. The consistency model ensures that they are no longer used. By other words, each process works only with the structures from klp_transition_patch. The removal is done by a special function. It combines actions done by __disable_patch() and klp_complete_transition(). But it is a fast track without all the transaction-related stuff. Signed-off-by: Jason Baron [pmladek@suse.com: Split, reuse existing code, simplified] Signed-off-by: Petr Mladek Cc: Josh Poimboeuf Cc: Jessica Yu Cc: Jiri Kosina Cc: Miroslav Benes Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- Documentation/livepatch/livepatch.txt | 31 ++++- include/linux/livepatch.h | 12 ++ kernel/livepatch/core.c | 232 ++++++++++++++++++++++++++++++++-- kernel/livepatch/core.h | 1 + kernel/livepatch/patch.c | 8 ++ kernel/livepatch/transition.c | 3 + 6 files changed, 273 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt index 8f56490a4bb6..2a70f43166f6 100644 --- a/Documentation/livepatch/livepatch.txt +++ b/Documentation/livepatch/livepatch.txt @@ -15,8 +15,9 @@ Table of Contents: 5. Livepatch life-cycle 5.1. Loading 5.2. Enabling - 5.3. Disabling - 5.4. Removing + 5.3. Replacing + 5.4. Disabling + 5.5. Removing 6. Sysfs 7. Limitations @@ -300,8 +301,12 @@ into three levels: 5. Livepatch life-cycle ======================= -Livepatching can be described by four basic operations: -loading, enabling, disabling, removing. +Livepatching can be described by five basic operations: +loading, enabling, replacing, disabling, removing. + +Where the replacing and the disabling operations are mutually +exclusive. They have the same result for the given patch but +not for the system. 5.1. Loading @@ -347,7 +352,21 @@ to '0'. the "Consistency model" section. -5.3. Disabling +5.3. Replacing +-------------- + +All enabled patches might get replaced by a cumulative patch that +has the .replace flag set. + +Once the new patch is enabled and the 'transition' finishes then +all the functions (struct klp_func) associated with the replaced +patches are removed from the corresponding struct klp_ops. Also +the ftrace handler is unregistered and the struct klp_ops is +freed when the related function is not modified by the new patch +and func_stack list becomes empty. + + +5.4. Disabling -------------- Enabled patches might get disabled by writing '0' to @@ -372,7 +391,7 @@ Note that patches must be disabled in exactly the reverse order in which they were enabled. It makes the problem and the implementation much easier. -5.4. Removing +5.5. Removing ------------- Module removal is only safe when there are no users of functions provided diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index e117e20ff771..53551f470722 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -48,6 +48,7 @@ * @old_size: size of the old function * @new_size: size of the new function * @kobj_added: @kobj has been added and needs freeing + * @nop: temporary patch to use the original code again; dyn. allocated * @patched: the func has been added to the klp_ops list * @transition: the func is currently being applied or reverted * @@ -86,6 +87,7 @@ struct klp_func { struct list_head stack_node; unsigned long old_size, new_size; bool kobj_added; + bool nop; bool patched; bool transition; }; @@ -125,6 +127,7 @@ struct klp_callbacks { * @mod: kernel module associated with the patched object * (NULL for vmlinux) * @kobj_added: @kobj has been added and needs freeing + * @dynamic: temporary object for nop functions; dynamically allocated * @patched: the object's funcs have been added to the klp_ops list */ struct klp_object { @@ -139,6 +142,7 @@ struct klp_object { struct list_head node; struct module *mod; bool kobj_added; + bool dynamic; bool patched; }; @@ -146,6 +150,7 @@ struct klp_object { * struct klp_patch - patch structure for live patching * @mod: reference to the live patch module * @objs: object entries for kernel objects to be patched + * @replace: replace all actively used patches * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources * @obj_list: dynamic list of the object entries @@ -159,6 +164,7 @@ struct klp_patch { /* external */ struct module *mod; struct klp_object *objs; + bool replace; /* internal */ struct list_head list; @@ -174,6 +180,9 @@ struct klp_patch { #define klp_for_each_object_static(patch, obj) \ for (obj = patch->objs; obj->funcs || obj->name; obj++) +#define klp_for_each_object_safe(patch, obj, tmp_obj) \ + list_for_each_entry_safe(obj, tmp_obj, &patch->obj_list, node) + #define klp_for_each_object(patch, obj) \ list_for_each_entry(obj, &patch->obj_list, node) @@ -182,6 +191,9 @@ struct klp_patch { func->old_name || func->new_func || func->old_sympos; \ func++) +#define klp_for_each_func_safe(obj, func, tmp_func) \ + list_for_each_entry_safe(func, tmp_func, &obj->func_list, node) + #define klp_for_each_func(obj, func) \ list_for_each_entry(func, &obj->func_list, node) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 37d0d3645fa6..ecb7660f1d8b 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -92,6 +92,40 @@ static bool klp_initialized(void) return !!klp_root_kobj; } +static struct klp_func *klp_find_func(struct klp_object *obj, + struct klp_func *old_func) +{ + struct klp_func *func; + + klp_for_each_func(obj, func) { + if ((strcmp(old_func->old_name, func->old_name) == 0) && + (old_func->old_sympos == func->old_sympos)) { + return func; + } + } + + return NULL; +} + +static struct klp_object *klp_find_object(struct klp_patch *patch, + struct klp_object *old_obj) +{ + struct klp_object *obj; + + klp_for_each_object(patch, obj) { + if (klp_is_module(old_obj)) { + if (klp_is_module(obj) && + strcmp(old_obj->name, obj->name) == 0) { + return obj; + } + } else if (!klp_is_module(obj)) { + return obj; + } + } + + return NULL; +} + struct klp_find_arg { const char *objname; const char *name; @@ -418,6 +452,121 @@ static struct attribute *klp_patch_attrs[] = { NULL }; +static void klp_free_object_dynamic(struct klp_object *obj) +{ + kfree(obj->name); + kfree(obj); +} + +static struct klp_object *klp_alloc_object_dynamic(const char *name) +{ + struct klp_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + if (name) { + obj->name = kstrdup(name, GFP_KERNEL); + if (!obj->name) { + kfree(obj); + return NULL; + } + } + + INIT_LIST_HEAD(&obj->func_list); + obj->dynamic = true; + + return obj; +} + +static void klp_free_func_nop(struct klp_func *func) +{ + kfree(func->old_name); + kfree(func); +} + +static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func, + struct klp_object *obj) +{ + struct klp_func *func; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return NULL; + + if (old_func->old_name) { + func->old_name = kstrdup(old_func->old_name, GFP_KERNEL); + if (!func->old_name) { + kfree(func); + return NULL; + } + } + + /* + * func->new_func is same as func->old_func. These addresses are + * set when the object is loaded, see klp_init_object_loaded(). + */ + func->old_sympos = old_func->old_sympos; + func->nop = true; + + return func; +} + +static int klp_add_object_nops(struct klp_patch *patch, + struct klp_object *old_obj) +{ + struct klp_object *obj; + struct klp_func *func, *old_func; + + obj = klp_find_object(patch, old_obj); + + if (!obj) { + obj = klp_alloc_object_dynamic(old_obj->name); + if (!obj) + return -ENOMEM; + + list_add_tail(&obj->node, &patch->obj_list); + } + + klp_for_each_func(old_obj, old_func) { + func = klp_find_func(obj, old_func); + if (func) + continue; + + func = klp_alloc_func_nop(old_func, obj); + if (!func) + return -ENOMEM; + + list_add_tail(&func->node, &obj->func_list); + } + + return 0; +} + +/* + * Add 'nop' functions which simply return to the caller to run + * the original function. The 'nop' functions are added to a + * patch to facilitate a 'replace' mode. + */ +static int klp_add_nops(struct klp_patch *patch) +{ + struct klp_patch *old_patch; + struct klp_object *old_obj; + + list_for_each_entry(old_patch, &klp_patches, list) { + klp_for_each_object(old_patch, old_obj) { + int err; + + err = klp_add_object_nops(patch, old_obj); + if (err) + return err; + } + } + + return 0; +} + static void klp_kobj_release_patch(struct kobject *kobj) { struct klp_patch *patch; @@ -434,6 +583,12 @@ static struct kobj_type klp_ktype_patch = { static void klp_kobj_release_object(struct kobject *kobj) { + struct klp_object *obj; + + obj = container_of(kobj, struct klp_object, kobj); + + if (obj->dynamic) + klp_free_object_dynamic(obj); } static struct kobj_type klp_ktype_object = { @@ -443,6 +598,12 @@ static struct kobj_type klp_ktype_object = { static void klp_kobj_release_func(struct kobject *kobj) { + struct klp_func *func; + + func = container_of(kobj, struct klp_func, kobj); + + if (func->nop) + klp_free_func_nop(func); } static struct kobj_type klp_ktype_func = { @@ -452,12 +613,15 @@ static struct kobj_type klp_ktype_func = { static void klp_free_funcs(struct klp_object *obj) { - struct klp_func *func; + struct klp_func *func, *tmp_func; - klp_for_each_func(obj, func) { + klp_for_each_func_safe(obj, func, tmp_func) { /* Might be called from klp_init_patch() error path. */ - if (func->kobj_added) + if (func->kobj_added) { kobject_put(&func->kobj); + } else if (func->nop) { + klp_free_func_nop(func); + } } } @@ -468,20 +632,27 @@ static void klp_free_object_loaded(struct klp_object *obj) obj->mod = NULL; - klp_for_each_func(obj, func) + klp_for_each_func(obj, func) { func->old_func = NULL; + + if (func->nop) + func->new_func = NULL; + } } static void klp_free_objects(struct klp_patch *patch) { - struct klp_object *obj; + struct klp_object *obj, *tmp_obj; - klp_for_each_object(patch, obj) { + klp_for_each_object_safe(patch, obj, tmp_obj) { klp_free_funcs(obj); /* Might be called from klp_init_patch() error path. */ - if (obj->kobj_added) + if (obj->kobj_added) { kobject_put(&obj->kobj); + } else if (obj->dynamic) { + klp_free_object_dynamic(obj); + } } } @@ -543,7 +714,14 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) { int ret; - if (!func->old_name || !func->new_func) + if (!func->old_name) + return -EINVAL; + + /* + * NOPs get the address later. The patched module must be loaded, + * see klp_init_object_loaded(). + */ + if (!func->new_func && !func->nop) return -EINVAL; if (strlen(func->old_name) >= KSYM_NAME_LEN) @@ -605,6 +783,9 @@ static int klp_init_object_loaded(struct klp_patch *patch, return -ENOENT; } + if (func->nop) + func->new_func = func->old_func; + ret = kallsyms_lookup_size_offset((unsigned long)func->new_func, &func->new_size, NULL); if (!ret) { @@ -697,6 +878,12 @@ static int klp_init_patch(struct klp_patch *patch) return ret; patch->kobj_added = true; + if (patch->replace) { + ret = klp_add_nops(patch); + if (ret) + return ret; + } + klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); if (ret) @@ -868,6 +1055,35 @@ err: } EXPORT_SYMBOL_GPL(klp_enable_patch); +/* + * This function removes replaced patches. + * + * We could be pretty aggressive here. It is called in the situation where + * these structures are no longer accessible. All functions are redirected + * by the klp_transition_patch. They use either a new code or they are in + * the original code because of the special nop function patches. + * + * The only exception is when the transition was forced. In this case, + * klp_ftrace_handler() might still see the replaced patch on the stack. + * Fortunately, it is carefully designed to work with removed functions + * thanks to RCU. We only have to keep the patches on the system. Also + * this is handled transparently by patch->module_put. + */ +void klp_discard_replaced_patches(struct klp_patch *new_patch) +{ + struct klp_patch *old_patch, *tmp_patch; + + list_for_each_entry_safe(old_patch, tmp_patch, &klp_patches, list) { + if (old_patch == new_patch) + return; + + old_patch->enabled = false; + klp_unpatch_objects(old_patch); + klp_free_patch_start(old_patch); + schedule_work(&old_patch->free_work); + } +} + /* * Remove parts of patches that touch a given kernel module. The list of * patches processed might be limited. When limit is NULL, all patches diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h index d4eefc520c08..f6a853adcc00 100644 --- a/kernel/livepatch/core.h +++ b/kernel/livepatch/core.h @@ -8,6 +8,7 @@ extern struct mutex klp_mutex; extern struct list_head klp_patches; void klp_free_patch_start(struct klp_patch *patch); +void klp_discard_replaced_patches(struct klp_patch *new_patch); static inline bool klp_is_object_loaded(struct klp_object *obj) { diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 825022d70912..0ff466ab4b5a 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -118,7 +118,15 @@ static void notrace klp_ftrace_handler(unsigned long ip, } } + /* + * NOPs are used to replace existing patches with original code. + * Do nothing! Setting pc would cause an infinite loop. + */ + if (func->nop) + goto unlock; + klp_arch_set_pc(regs, (unsigned long)func->new_func); + unlock: preempt_enable_notrace(); } diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index c9917a24b3a4..f4c5908a9731 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -85,6 +85,9 @@ static void klp_complete_transition(void) klp_transition_patch->mod->name, klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); + if (klp_transition_patch->replace && klp_target_state == KLP_PATCHED) + klp_discard_replaced_patches(klp_transition_patch); + if (klp_target_state == KLP_UNPATCHED) { /* * All tasks have transitioned to KLP_UNPATCHED so we can now -- cgit From 98a455d91e7116ca417bc37da6aa2dd633206a6f Mon Sep 17 00:00:00 2001 From: Shunyong Yang Date: Tue, 18 Dec 2018 14:02:45 +0800 Subject: ACPI / tables: table override from built-in initrd In some scenario, we need to build initrd with kernel in a single image. This can simplify system deployment process by downloading the whole system once, such as in IC verification. This patch adds support to override ACPI tables from built-in initrd. Signed-off-by: Shunyong Yang [ rjw: Minor cleanups ] Signed-off-by: Rafael J. Wysocki --- Documentation/acpi/initrd_table_override.txt | 4 ++++ drivers/acpi/Kconfig | 10 ++++++++++ drivers/acpi/tables.c | 12 ++++++++++-- include/linux/initrd.h | 3 +++ 4 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt index eb651a6aa285..30437a6db373 100644 --- a/Documentation/acpi/initrd_table_override.txt +++ b/Documentation/acpi/initrd_table_override.txt @@ -14,6 +14,10 @@ upgrade the ACPI execution environment that is defined by the ACPI tables via upgrading the ACPI tables provided by the BIOS with an instrumented, modified, more recent version one, or installing brand new ACPI tables. +When building initrd with kernel in a single image, option +ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this +feature to work. + For a full list of ACPI tables that can be upgraded/installed, take a look at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in drivers/acpi/tables.c. diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 90ff0a47c12e..4e015c77e48e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -357,6 +357,16 @@ config ACPI_TABLE_UPGRADE initrd, therefore it's safe to say Y. See Documentation/acpi/initrd_table_override.txt for details +config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD + bool "Override ACPI tables from built-in initrd" + depends on ACPI_TABLE_UPGRADE + depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION="" + help + This option provides functionality to override arbitrary ACPI tables + from built-in uncompressed initrd. + + See Documentation/acpi/initrd_table_override.txt for details + config ACPI_DEBUG bool "Debug Statements" help diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 48eabb6c2d4f..8fccbe49612a 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -473,14 +473,22 @@ static DECLARE_BITMAP(acpi_initrd_installed, NR_ACPI_INITRD_TABLES); void __init acpi_table_upgrade(void) { - void *data = (void *)initrd_start; - size_t size = initrd_end - initrd_start; + void *data; + size_t size; int sig, no, table_nr = 0, total_offset = 0; long offset = 0; struct acpi_table_header *table; char cpio_path[32] = "kernel/firmware/acpi/"; struct cpio_data file; + if (IS_ENABLED(CONFIG_ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD)) { + data = __initramfs_start; + size = __initramfs_size; + } else { + data = (void *)initrd_start; + size = initrd_end - initrd_start; + } + if (data == NULL || size == 0) return; diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 14beaff9b445..d77fe34fb00a 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -25,3 +25,6 @@ extern phys_addr_t phys_initrd_start; extern unsigned long phys_initrd_size; extern unsigned int real_root_dev; + +extern char __initramfs_start[]; +extern unsigned long __initramfs_size; -- cgit From 16118794ede91aac1a73abe15de22d3de9d2b775 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Jan 2019 14:33:17 +0100 Subject: posix-cpu-timers: Remove private interval storage Posix CPU timers store the interval in private storage for historical reasons (it_interval used to be a non scalar representation on 32bit systems). This is gone and there is no reason for duplicated storage anymore. Use it_interval everywhere. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Cc: "H.J. Lu" Link: https://lkml.kernel.org/r/20190111133500.945255655@linutronix.de --- include/linux/posix-timers.h | 2 +- kernel/time/posix-cpu-timers.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e96581ca7c9d..b20798fc5191 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -12,7 +12,7 @@ struct siginfo; struct cpu_timer_list { struct list_head entry; - u64 expires, incr; + u64 expires; struct task_struct *task; int firing; }; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 80f955210861..0a426f4e3125 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -67,13 +67,13 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now) int i; u64 delta, incr; - if (timer->it.cpu.incr == 0) + if (!timer->it_interval) return; if (now < timer->it.cpu.expires) return; - incr = timer->it.cpu.incr; + incr = timer->it_interval; delta = now + incr - timer->it.cpu.expires; /* Don't use (incr*2 < delta), incr*2 might overflow. */ @@ -520,7 +520,7 @@ static void cpu_timer_fire(struct k_itimer *timer) */ wake_up_process(timer->it_process); timer->it.cpu.expires = 0; - } else if (timer->it.cpu.incr == 0) { + } else if (!timer->it_interval) { /* * One-shot timer. Clear it as soon as it's fired. */ @@ -606,7 +606,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, */ ret = 0; - old_incr = timer->it.cpu.incr; + old_incr = timer->it_interval; old_expires = timer->it.cpu.expires; if (unlikely(timer->it.cpu.firing)) { timer->it.cpu.firing = -1; @@ -684,8 +684,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * Install the new reload setting, and * set up the signal and overrun bookkeeping. */ - timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); - timer->it_interval = ns_to_ktime(timer->it.cpu.incr); + timer->it_interval = timespec64_to_ktime(new->it_interval); /* * This acts as a modification timestamp for the timer, @@ -724,7 +723,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp /* * Easy part: convert the reload time. */ - itp->it_interval = ns_to_timespec64(timer->it.cpu.incr); + itp->it_interval = ktime_to_timespec64(timer->it_interval); if (!timer->it.cpu.expires) return; -- cgit From 8a62ffe2753a845272f4f2100b5fca0b6053ff6f Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 21 Dec 2018 11:33:54 +0100 Subject: PM-runtime: Add new interface to get accounted time Some drivers (like i915/drm) needs to get the accounted suspended time. pm_runtime_suspended_time() will return the suspended accounted time in ns unit. Reviewed-by: Ulf Hansson Signed-off-by: Vincent Guittot Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 15 +++++++++++++++ include/linux/pm_runtime.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 457be03b744d..a453090c9449 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -88,6 +88,21 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status) dev->power.runtime_status = status; } +u64 pm_runtime_suspended_time(struct device *dev) +{ + unsigned long flags, time; + + spin_lock_irqsave(&dev->power.lock, flags); + + update_pm_runtime_accounting(dev); + time = dev->power.suspended_jiffies; + + spin_unlock_irqrestore(&dev->power.lock, flags); + + return jiffies_to_nsecs(time); +} +EXPORT_SYMBOL_GPL(pm_runtime_suspended_time); + /** * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. * @dev: Device to handle. diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 54af4eef169f..a370006921c0 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -113,6 +113,8 @@ static inline bool pm_runtime_is_irq_safe(struct device *dev) return dev->power.irq_safe; } +extern u64 pm_runtime_suspended_time(struct device *dev); + #else /* !CONFIG_PM */ static inline bool queue_pm_work(struct work_struct *work) { return false; } -- cgit From b33a02aadcc6330a61e511240b634dc11112e65e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Jan 2019 17:24:55 +0200 Subject: i2c: acpi: Move I2C bits from acpi.h to i2c.h As discussed previously the best location for certain bus related bits, e.g. I2C, is its own realm of the headers. In order to uncontaminate acpi.h move the I2C bits to i2c.h. There is no functional change intended. Link: https://lkml.org/lkml/2018/11/28/744 Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Rafael J. Wysocki Signed-off-by: Wolfram Sang --- include/linux/acpi.h | 11 ----------- include/linux/i2c.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 87715f20b69a..13f5cb2c4763 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1061,17 +1061,6 @@ static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) } #endif -#if defined(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) -bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, - struct acpi_resource_i2c_serialbus **i2c); -#else -static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, - struct acpi_resource_i2c_serialbus **i2c) -{ - return false; -} -#endif - /* Device properties */ #ifdef CONFIG_ACPI diff --git a/include/linux/i2c.h b/include/linux/i2c.h index cba59d66c00d..1f45331924d6 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -967,11 +967,21 @@ static inline int of_i2c_get_board_info(struct device *dev, #endif /* CONFIG_OF */ +struct acpi_resource; +struct acpi_resource_i2c_serialbus; + #if IS_ENABLED(CONFIG_ACPI) +bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, + struct acpi_resource_i2c_serialbus **i2c); u32 i2c_acpi_find_bus_speed(struct device *dev); struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, struct i2c_board_info *info); #else +static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, + struct acpi_resource_i2c_serialbus **i2c) +{ + return false; +} static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; -- cgit From 063755ab1d1c1127adc09703185967862584935b Mon Sep 17 00:00:00 2001 From: Philippe Schenker Date: Fri, 21 Dec 2018 14:46:31 +0100 Subject: mfd: stmpe: Move ADC related defines to MFD header Move defines that are ADC related to the header of the overlying MFD, so they can be used from multiple sub-devices. Signed-off-by: Philippe Schenker Acked-by: Dmitry Torokhov Signed-off-by: Lee Jones --- drivers/input/touchscreen/stmpe-ts.c | 34 +++++++++++++--------------------- include/linux/mfd/stmpe.h | 11 +++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index 2a78e27b4495..c5d9006588a2 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -49,17 +49,6 @@ #define STMPE_IRQ_TOUCH_DET 0 -#define SAMPLE_TIME(x) ((x & 0xf) << 4) -#define MOD_12B(x) ((x & 0x1) << 3) -#define REF_SEL(x) ((x & 0x1) << 1) -#define ADC_FREQ(x) (x & 0x3) -#define AVE_CTRL(x) ((x & 0x3) << 6) -#define DET_DELAY(x) ((x & 0x7) << 3) -#define SETTLING(x) (x & 0x7) -#define FRACTION_Z(x) (x & 0x7) -#define I_DRIVE(x) (x & 0x1) -#define OP_MODE(x) ((x & 0x7) << 1) - #define STMPE_TS_NAME "stmpe-ts" #define XY_MASK 0xfff @@ -213,9 +202,10 @@ static int stmpe_init_hw(struct stmpe_touch *ts) return ret; } - adc_ctrl1 = SAMPLE_TIME(ts->sample_time) | MOD_12B(ts->mod_12b) | - REF_SEL(ts->ref_sel); - adc_ctrl1_mask = SAMPLE_TIME(0xff) | MOD_12B(0xff) | REF_SEL(0xff); + adc_ctrl1 = STMPE_SAMPLE_TIME(ts->sample_time) | + STMPE_MOD_12B(ts->mod_12b) | STMPE_REF_SEL(ts->ref_sel); + adc_ctrl1_mask = STMPE_SAMPLE_TIME(0xff) | STMPE_MOD_12B(0xff) | + STMPE_REF_SEL(0xff); ret = stmpe_set_bits(stmpe, STMPE_REG_ADC_CTRL1, adc_ctrl1_mask, adc_ctrl1); @@ -225,15 +215,17 @@ static int stmpe_init_hw(struct stmpe_touch *ts) } ret = stmpe_set_bits(stmpe, STMPE_REG_ADC_CTRL2, - ADC_FREQ(0xff), ADC_FREQ(ts->adc_freq)); + STMPE_ADC_FREQ(0xff), STMPE_ADC_FREQ(ts->adc_freq)); if (ret) { dev_err(dev, "Could not setup ADC\n"); return ret; } - tsc_cfg = AVE_CTRL(ts->ave_ctrl) | DET_DELAY(ts->touch_det_delay) | - SETTLING(ts->settling); - tsc_cfg_mask = AVE_CTRL(0xff) | DET_DELAY(0xff) | SETTLING(0xff); + tsc_cfg = STMPE_AVE_CTRL(ts->ave_ctrl) | + STMPE_DET_DELAY(ts->touch_det_delay) | + STMPE_SETTLING(ts->settling); + tsc_cfg_mask = STMPE_AVE_CTRL(0xff) | STMPE_DET_DELAY(0xff) | + STMPE_SETTLING(0xff); ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_CFG, tsc_cfg_mask, tsc_cfg); if (ret) { @@ -242,14 +234,14 @@ static int stmpe_init_hw(struct stmpe_touch *ts) } ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_FRACTION_Z, - FRACTION_Z(0xff), FRACTION_Z(ts->fraction_z)); + STMPE_FRACTION_Z(0xff), STMPE_FRACTION_Z(ts->fraction_z)); if (ret) { dev_err(dev, "Could not config touch\n"); return ret; } ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_I_DRIVE, - I_DRIVE(0xff), I_DRIVE(ts->i_drive)); + STMPE_I_DRIVE(0xff), STMPE_I_DRIVE(ts->i_drive)); if (ret) { dev_err(dev, "Could not config touch\n"); return ret; @@ -263,7 +255,7 @@ static int stmpe_init_hw(struct stmpe_touch *ts) } ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_CTRL, - OP_MODE(0xff), OP_MODE(OP_MOD_XYZ)); + STMPE_OP_MODE(0xff), STMPE_OP_MODE(OP_MOD_XYZ)); if (ret) { dev_err(dev, "Could not set mode\n"); return ret; diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index 4a827af17e59..c0353f6431f9 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -10,6 +10,17 @@ #include +#define STMPE_SAMPLE_TIME(x) ((x & 0xf) << 4) +#define STMPE_MOD_12B(x) ((x & 0x1) << 3) +#define STMPE_REF_SEL(x) ((x & 0x1) << 1) +#define STMPE_ADC_FREQ(x) (x & 0x3) +#define STMPE_AVE_CTRL(x) ((x & 0x3) << 6) +#define STMPE_DET_DELAY(x) ((x & 0x7) << 3) +#define STMPE_SETTLING(x) (x & 0x7) +#define STMPE_FRACTION_Z(x) (x & 0x7) +#define STMPE_I_DRIVE(x) (x & 0x1) +#define STMPE_OP_MODE(x) ((x & 0x7) << 1) + struct device; struct regulator; -- cgit From 6377cfa3b857ced301f2079ac97de6c19057ab65 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 21 Dec 2018 14:46:32 +0100 Subject: mfd: stmpe: Preparations for STMPE ADC driver This prepares the MFD for the STMPE ADC driver. This commit introduces devicetree settings that are used by the ADC and adds an init function. Common ADC settings that are shared with the touchscreen driver can now reside in the overlying MFD. Signed-off-by: Stefan Agner Signed-off-by: Max Krummenacher Signed-off-by: Philippe Schenker Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 3 ++- drivers/mfd/stmpe.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/stmpe.h | 10 +++++++ 3 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 8c5dfdce4326..bba159e8eaa4 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1204,7 +1204,7 @@ config MFD_STMPE Currently supported devices are: - STMPE811: GPIO, Touchscreen + STMPE811: GPIO, Touchscreen, ADC STMPE1601: GPIO, Keypad STMPE1801: GPIO, Keypad STMPE2401: GPIO, Keypad @@ -1217,6 +1217,7 @@ config MFD_STMPE GPIO: stmpe-gpio Keypad: stmpe-keypad Touchscreen: stmpe-ts + ADC: stmpe-adc menu "STMicroelectronics STMPE Interface Drivers" depends on MFD_STMPE diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index 566caca4efd8..f582531a8f3e 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -463,6 +463,28 @@ static const struct mfd_cell stmpe_ts_cell = { .num_resources = ARRAY_SIZE(stmpe_ts_resources), }; +/* + * ADC (STMPE811) + */ + +static struct resource stmpe_adc_resources[] = { + { + .name = "STMPE_TEMP_SENS", + .flags = IORESOURCE_IRQ, + }, + { + .name = "STMPE_ADC", + .flags = IORESOURCE_IRQ, + }, +}; + +static const struct mfd_cell stmpe_adc_cell = { + .name = "stmpe-adc", + .of_compatible = "st,stmpe-adc", + .resources = stmpe_adc_resources, + .num_resources = ARRAY_SIZE(stmpe_adc_resources), +}; + /* * STMPE811 or STMPE610 */ @@ -497,6 +519,11 @@ static struct stmpe_variant_block stmpe811_blocks[] = { .irq = STMPE811_IRQ_TOUCH_DET, .block = STMPE_BLOCK_TOUCHSCREEN, }, + { + .cell = &stmpe_adc_cell, + .irq = STMPE811_IRQ_TEMP_SENS, + .block = STMPE_BLOCK_ADC, + }, }; static int stmpe811_enable(struct stmpe *stmpe, unsigned int blocks, @@ -517,6 +544,35 @@ static int stmpe811_enable(struct stmpe *stmpe, unsigned int blocks, enable ? 0 : mask); } +int stmpe811_adc_common_init(struct stmpe *stmpe) +{ + int ret; + u8 adc_ctrl1, adc_ctrl1_mask; + + adc_ctrl1 = STMPE_SAMPLE_TIME(stmpe->sample_time) | + STMPE_MOD_12B(stmpe->mod_12b) | + STMPE_REF_SEL(stmpe->ref_sel); + adc_ctrl1_mask = STMPE_SAMPLE_TIME(0xff) | STMPE_MOD_12B(0xff) | + STMPE_REF_SEL(0xff); + + ret = stmpe_set_bits(stmpe, STMPE811_REG_ADC_CTRL1, + adc_ctrl1_mask, adc_ctrl1); + if (ret) { + dev_err(stmpe->dev, "Could not setup ADC\n"); + return ret; + } + + ret = stmpe_set_bits(stmpe, STMPE811_REG_ADC_CTRL2, + STMPE_ADC_FREQ(0xff), STMPE_ADC_FREQ(stmpe->adc_freq)); + if (ret) { + dev_err(stmpe->dev, "Could not setup ADC\n"); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(stmpe811_adc_common_init); + static int stmpe811_get_altfunc(struct stmpe *stmpe, enum stmpe_block block) { /* 0 for touchscreen, 1 for GPIO */ @@ -1325,6 +1381,7 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum) struct device_node *np = ci->dev->of_node; struct stmpe *stmpe; int ret; + u32 val; pdata = devm_kzalloc(ci->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) @@ -1342,6 +1399,15 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum) mutex_init(&stmpe->irq_lock); mutex_init(&stmpe->lock); + if (!of_property_read_u32(np, "st,sample-time", &val)) + stmpe->sample_time = val; + if (!of_property_read_u32(np, "st,mod-12b", &val)) + stmpe->mod_12b = val; + if (!of_property_read_u32(np, "st,ref-sel", &val)) + stmpe->ref_sel = val; + if (!of_property_read_u32(np, "st,adc-freq", &val)) + stmpe->adc_freq = val; + stmpe->dev = ci->dev; stmpe->client = ci->client; stmpe->pdata = pdata; @@ -1433,6 +1499,8 @@ int stmpe_remove(struct stmpe *stmpe) if (!IS_ERR(stmpe->vcc)) regulator_disable(stmpe->vcc); + __stmpe_disable(stmpe, STMPE_BLOCK_ADC); + mfd_remove_devices(stmpe->dev); return 0; diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index c0353f6431f9..07f55aac9390 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -21,6 +21,9 @@ #define STMPE_I_DRIVE(x) (x & 0x1) #define STMPE_OP_MODE(x) ((x & 0x7) << 1) +#define STMPE811_REG_ADC_CTRL1 0x20 +#define STMPE811_REG_ADC_CTRL2 0x21 + struct device; struct regulator; @@ -134,6 +137,12 @@ struct stmpe { u8 ier[2]; u8 oldier[2]; struct stmpe_platform_data *pdata; + + /* For devices that use an ADC */ + u8 sample_time; + u8 mod_12b; + u8 ref_sel; + u8 adc_freq; }; extern int stmpe_reg_write(struct stmpe *stmpe, u8 reg, u8 data); @@ -147,6 +156,7 @@ extern int stmpe_set_altfunc(struct stmpe *stmpe, u32 pins, enum stmpe_block block); extern int stmpe_enable(struct stmpe *stmpe, unsigned int blocks); extern int stmpe_disable(struct stmpe *stmpe, unsigned int blocks); +extern int stmpe811_adc_common_init(struct stmpe *stmpe); #define STMPE_GPIO_NOREQ_811_TOUCH (0xf0) -- cgit From 1d7ae53b152dbc5ba0a4f6a83ecc42ac66f52d11 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:47 +0300 Subject: iommu: Introduce iotlb_sync_map callback Introduce iotlb_sync_map() callback that is invoked in the end of iommu_map(). This new callback allows IOMMU drivers to avoid syncing after mapping of each contiguous chunk and sync only when the whole mapping is completed, optimizing performance of the mapping operation. Signed-off-by: Dmitry Osipenko Reviewed-by: Robin Murphy Reviewed-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++++-- include/linux/iommu.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3ed4db334341..ed0e63f2cd9b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1585,13 +1585,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain, int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { + const struct iommu_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(domain->ops->map == NULL || + if (unlikely(ops->map == NULL || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1620,7 +1621,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", iova, &paddr, pgsize); - ret = domain->ops->map(domain, iova, paddr, pgsize, prot); + ret = ops->map(domain, iova, paddr, pgsize, prot); if (ret) break; @@ -1629,6 +1630,9 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); + /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e90da6b6f3d1..477ef47c357c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -201,6 +201,7 @@ struct iommu_ops { void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_range_add)(struct iommu_domain *domain, unsigned long iova, size_t size); + void (*iotlb_sync_map)(struct iommu_domain *domain); void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); -- cgit From 51908d2e9b7c7730608a19f24fc8718af745bb2f Mon Sep 17 00:00:00 2001 From: Pascal PAILLET-LME Date: Mon, 14 Jan 2019 10:05:16 +0000 Subject: mfd: stpmic1: Add STPMIC1 driver STPMIC1 is a PMIC from STMicroelectronics. The STPMIC1 integrates 10 regulators, 3 power switches, a watchdog and an input for a power on key. Signed-off-by: Pascal Paillet Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 16 ++++ drivers/mfd/Makefile | 1 + drivers/mfd/stpmic1.c | 213 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/stpmic1.h | 212 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 442 insertions(+) create mode 100644 drivers/mfd/stpmic1.c create mode 100644 include/linux/mfd/stpmic1.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 8c5dfdce4326..0761cb83d174 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1871,6 +1871,22 @@ config MFD_STM32_TIMERS for PWM and IIO Timer. This driver allow to share the registers between the others drivers. +config MFD_STPMIC1 + tristate "Support for STPMIC1 PMIC" + depends on (I2C=y && OF) + select REGMAP_I2C + select REGMAP_IRQ + select MFD_CORE + help + Support for ST Microelectronics STPMIC1 PMIC. STPMIC1 has power on + key, watchdog and regulator functionalities which are supported via + the relevant subsystems. This driver provides core support for the + STPMIC1. In order to use the actual functionaltiy of the device other + drivers must be enabled. + + To compile this driver as a module, choose M here: the + module will be called stpmic1. + menu "Multimedia Capabilities Port drivers" depends on ARCH_SA1100 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 12980a4ad460..a62fb0112d9f 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -233,6 +233,7 @@ obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI) += intel_soc_pmic_chtdc_ti.o obj-$(CONFIG_MFD_MT6397) += mt6397-core.o obj-$(CONFIG_MFD_ALTERA_A10SR) += altera-a10sr.o +obj-$(CONFIG_MFD_STPMIC1) += stpmic1.o obj-$(CONFIG_MFD_SUN4I_GPADC) += sun4i-gpadc.o obj-$(CONFIG_MFD_STM32_LPTIMER) += stm32-lptimer.o diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c new file mode 100644 index 000000000000..7dfbe8906cb8 --- /dev/null +++ b/drivers/mfd/stpmic1.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) STMicroelectronics 2018 +// Author: Pascal Paillet + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define STPMIC1_MAIN_IRQ 0 + +static const struct regmap_range stpmic1_readable_ranges[] = { + regmap_reg_range(TURN_ON_SR, VERSION_SR), + regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR), + regmap_reg_range(BST_SW_CR, BST_SW_CR), + regmap_reg_range(INT_PENDING_R1, INT_PENDING_R4), + regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4), + regmap_reg_range(INT_MASK_R1, INT_MASK_R4), + regmap_reg_range(INT_SET_MASK_R1, INT_SET_MASK_R4), + regmap_reg_range(INT_CLEAR_MASK_R1, INT_CLEAR_MASK_R4), + regmap_reg_range(INT_SRC_R1, INT_SRC_R1), +}; + +static const struct regmap_range stpmic1_writeable_ranges[] = { + regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR), + regmap_reg_range(BST_SW_CR, BST_SW_CR), + regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4), + regmap_reg_range(INT_SET_MASK_R1, INT_SET_MASK_R4), + regmap_reg_range(INT_CLEAR_MASK_R1, INT_CLEAR_MASK_R4), +}; + +static const struct regmap_range stpmic1_volatile_ranges[] = { + regmap_reg_range(TURN_ON_SR, VERSION_SR), + regmap_reg_range(WCHDG_CR, WCHDG_CR), + regmap_reg_range(INT_PENDING_R1, INT_PENDING_R4), + regmap_reg_range(INT_SRC_R1, INT_SRC_R4), +}; + +static const struct regmap_access_table stpmic1_readable_table = { + .yes_ranges = stpmic1_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(stpmic1_readable_ranges), +}; + +static const struct regmap_access_table stpmic1_writeable_table = { + .yes_ranges = stpmic1_writeable_ranges, + .n_yes_ranges = ARRAY_SIZE(stpmic1_writeable_ranges), +}; + +static const struct regmap_access_table stpmic1_volatile_table = { + .yes_ranges = stpmic1_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(stpmic1_volatile_ranges), +}; + +const struct regmap_config stpmic1_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .cache_type = REGCACHE_RBTREE, + .max_register = PMIC_MAX_REGISTER_ADDRESS, + .rd_table = &stpmic1_readable_table, + .wr_table = &stpmic1_writeable_table, + .volatile_table = &stpmic1_volatile_table, +}; + +static const struct regmap_irq stpmic1_irqs[] = { + REGMAP_IRQ_REG(IT_PONKEY_F, 0, 0x01), + REGMAP_IRQ_REG(IT_PONKEY_R, 0, 0x02), + REGMAP_IRQ_REG(IT_WAKEUP_F, 0, 0x04), + REGMAP_IRQ_REG(IT_WAKEUP_R, 0, 0x08), + REGMAP_IRQ_REG(IT_VBUS_OTG_F, 0, 0x10), + REGMAP_IRQ_REG(IT_VBUS_OTG_R, 0, 0x20), + REGMAP_IRQ_REG(IT_SWOUT_F, 0, 0x40), + REGMAP_IRQ_REG(IT_SWOUT_R, 0, 0x80), + + REGMAP_IRQ_REG(IT_CURLIM_BUCK1, 1, 0x01), + REGMAP_IRQ_REG(IT_CURLIM_BUCK2, 1, 0x02), + REGMAP_IRQ_REG(IT_CURLIM_BUCK3, 1, 0x04), + REGMAP_IRQ_REG(IT_CURLIM_BUCK4, 1, 0x08), + REGMAP_IRQ_REG(IT_OCP_OTG, 1, 0x10), + REGMAP_IRQ_REG(IT_OCP_SWOUT, 1, 0x20), + REGMAP_IRQ_REG(IT_OCP_BOOST, 1, 0x40), + REGMAP_IRQ_REG(IT_OVP_BOOST, 1, 0x80), + + REGMAP_IRQ_REG(IT_CURLIM_LDO1, 2, 0x01), + REGMAP_IRQ_REG(IT_CURLIM_LDO2, 2, 0x02), + REGMAP_IRQ_REG(IT_CURLIM_LDO3, 2, 0x04), + REGMAP_IRQ_REG(IT_CURLIM_LDO4, 2, 0x08), + REGMAP_IRQ_REG(IT_CURLIM_LDO5, 2, 0x10), + REGMAP_IRQ_REG(IT_CURLIM_LDO6, 2, 0x20), + REGMAP_IRQ_REG(IT_SHORT_SWOTG, 2, 0x40), + REGMAP_IRQ_REG(IT_SHORT_SWOUT, 2, 0x80), + + REGMAP_IRQ_REG(IT_TWARN_F, 3, 0x01), + REGMAP_IRQ_REG(IT_TWARN_R, 3, 0x02), + REGMAP_IRQ_REG(IT_VINLOW_F, 3, 0x04), + REGMAP_IRQ_REG(IT_VINLOW_R, 3, 0x08), + REGMAP_IRQ_REG(IT_SWIN_F, 3, 0x40), + REGMAP_IRQ_REG(IT_SWIN_R, 3, 0x80), +}; + +static const struct regmap_irq_chip stpmic1_regmap_irq_chip = { + .name = "pmic_irq", + .status_base = INT_PENDING_R1, + .mask_base = INT_CLEAR_MASK_R1, + .unmask_base = INT_SET_MASK_R1, + .ack_base = INT_CLEAR_R1, + .num_regs = STPMIC1_PMIC_NUM_IRQ_REGS, + .irqs = stpmic1_irqs, + .num_irqs = ARRAY_SIZE(stpmic1_irqs), +}; + +static int stpmic1_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct stpmic1 *ddata; + struct device *dev = &i2c->dev; + int ret; + struct device_node *np = dev->of_node; + u32 reg; + + ddata = devm_kzalloc(dev, sizeof(struct stpmic1), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + i2c_set_clientdata(i2c, ddata); + ddata->dev = dev; + + ddata->regmap = devm_regmap_init_i2c(i2c, &stpmic1_regmap_config); + if (IS_ERR(ddata->regmap)) + return PTR_ERR(ddata->regmap); + + ddata->irq = of_irq_get(np, STPMIC1_MAIN_IRQ); + if (ddata->irq < 0) { + dev_err(dev, "Failed to get main IRQ: %d\n", ddata->irq); + return ddata->irq; + } + + ret = regmap_read(ddata->regmap, VERSION_SR, ®); + if (ret) { + dev_err(dev, "Unable to read PMIC version\n"); + return ret; + } + dev_info(dev, "PMIC Chip Version: 0x%x\n", reg); + + /* Initialize PMIC IRQ Chip & associated IRQ domains */ + ret = devm_regmap_add_irq_chip(dev, ddata->regmap, ddata->irq, + IRQF_ONESHOT | IRQF_SHARED, + 0, &stpmic1_regmap_irq_chip, + &ddata->irq_data); + if (ret) { + dev_err(dev, "IRQ Chip registration failed: %d\n", ret); + return ret; + } + + return devm_of_platform_populate(dev); +} + +#ifdef CONFIG_PM_SLEEP +static int stpmic1_suspend(struct device *dev) +{ + struct i2c_client *i2c = to_i2c_client(dev); + struct stpmic1 *pmic_dev = i2c_get_clientdata(i2c); + + disable_irq(pmic_dev->irq); + + return 0; +} + +static int stpmic1_resume(struct device *dev) +{ + struct i2c_client *i2c = to_i2c_client(dev); + struct stpmic1 *pmic_dev = i2c_get_clientdata(i2c); + int ret; + + ret = regcache_sync(pmic_dev->regmap); + if (ret) + return ret; + + enable_irq(pmic_dev->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(stpmic1_pm, stpmic1_suspend, stpmic1_resume); + +static const struct of_device_id stpmic1_of_match[] = { + { .compatible = "st,stpmic1", }, + {}, +}; +MODULE_DEVICE_TABLE(of, stpmic1_of_match); + +static struct i2c_driver stpmic1_driver = { + .driver = { + .name = "stpmic1", + .of_match_table = of_match_ptr(stpmic1_of_match), + .pm = &stpmic1_pm, + }, + .probe = stpmic1_probe, +}; + +module_i2c_driver(stpmic1_driver); + +MODULE_DESCRIPTION("STPMIC1 PMIC Driver"); +MODULE_AUTHOR("Pascal Paillet "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/stpmic1.h b/include/linux/mfd/stpmic1.h new file mode 100644 index 000000000000..fa3f99f7e9a1 --- /dev/null +++ b/include/linux/mfd/stpmic1.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) STMicroelectronics 2018 - All Rights Reserved + * Author: Philippe Peurichard , + * Pascal Paillet for STMicroelectronics. + */ + +#ifndef __LINUX_MFD_STPMIC1_H +#define __LINUX_MFD_STPMIC1_H + +#define TURN_ON_SR 0x1 +#define TURN_OFF_SR 0x2 +#define ICC_LDO_TURN_OFF_SR 0x3 +#define ICC_BUCK_TURN_OFF_SR 0x4 +#define RREQ_STATE_SR 0x5 +#define VERSION_SR 0x6 + +#define SWOFF_PWRCTRL_CR 0x10 +#define PADS_PULL_CR 0x11 +#define BUCKS_PD_CR 0x12 +#define LDO14_PD_CR 0x13 +#define LDO56_VREF_PD_CR 0x14 +#define VBUS_DET_VIN_CR 0x15 +#define PKEY_TURNOFF_CR 0x16 +#define BUCKS_MASK_RANK_CR 0x17 +#define BUCKS_MASK_RESET_CR 0x18 +#define LDOS_MASK_RANK_CR 0x19 +#define LDOS_MASK_RESET_CR 0x1A +#define WCHDG_CR 0x1B +#define WCHDG_TIMER_CR 0x1C +#define BUCKS_ICCTO_CR 0x1D +#define LDOS_ICCTO_CR 0x1E + +#define BUCK1_ACTIVE_CR 0x20 +#define BUCK2_ACTIVE_CR 0x21 +#define BUCK3_ACTIVE_CR 0x22 +#define BUCK4_ACTIVE_CR 0x23 +#define VREF_DDR_ACTIVE_CR 0x24 +#define LDO1_ACTIVE_CR 0x25 +#define LDO2_ACTIVE_CR 0x26 +#define LDO3_ACTIVE_CR 0x27 +#define LDO4_ACTIVE_CR 0x28 +#define LDO5_ACTIVE_CR 0x29 +#define LDO6_ACTIVE_CR 0x2A + +#define BUCK1_STDBY_CR 0x30 +#define BUCK2_STDBY_CR 0x31 +#define BUCK3_STDBY_CR 0x32 +#define BUCK4_STDBY_CR 0x33 +#define VREF_DDR_STDBY_CR 0x34 +#define LDO1_STDBY_CR 0x35 +#define LDO2_STDBY_CR 0x36 +#define LDO3_STDBY_CR 0x37 +#define LDO4_STDBY_CR 0x38 +#define LDO5_STDBY_CR 0x39 +#define LDO6_STDBY_CR 0x3A + +#define BST_SW_CR 0x40 + +#define INT_PENDING_R1 0x50 +#define INT_PENDING_R2 0x51 +#define INT_PENDING_R3 0x52 +#define INT_PENDING_R4 0x53 + +#define INT_DBG_LATCH_R1 0x60 +#define INT_DBG_LATCH_R2 0x61 +#define INT_DBG_LATCH_R3 0x62 +#define INT_DBG_LATCH_R4 0x63 + +#define INT_CLEAR_R1 0x70 +#define INT_CLEAR_R2 0x71 +#define INT_CLEAR_R3 0x72 +#define INT_CLEAR_R4 0x73 + +#define INT_MASK_R1 0x80 +#define INT_MASK_R2 0x81 +#define INT_MASK_R3 0x82 +#define INT_MASK_R4 0x83 + +#define INT_SET_MASK_R1 0x90 +#define INT_SET_MASK_R2 0x91 +#define INT_SET_MASK_R3 0x92 +#define INT_SET_MASK_R4 0x93 + +#define INT_CLEAR_MASK_R1 0xA0 +#define INT_CLEAR_MASK_R2 0xA1 +#define INT_CLEAR_MASK_R3 0xA2 +#define INT_CLEAR_MASK_R4 0xA3 + +#define INT_SRC_R1 0xB0 +#define INT_SRC_R2 0xB1 +#define INT_SRC_R3 0xB2 +#define INT_SRC_R4 0xB3 + +#define PMIC_MAX_REGISTER_ADDRESS INT_SRC_R4 + +#define STPMIC1_PMIC_NUM_IRQ_REGS 4 + +#define TURN_OFF_SR_ICC_EVENT 0x08 + +#define LDO_VOLTAGE_MASK GENMASK(6, 2) +#define BUCK_VOLTAGE_MASK GENMASK(7, 2) +#define LDO_BUCK_VOLTAGE_SHIFT 2 + +#define LDO_ENABLE_MASK BIT(0) +#define BUCK_ENABLE_MASK BIT(0) + +#define BUCK_HPLP_ENABLE_MASK BIT(1) +#define BUCK_HPLP_SHIFT 1 + +#define STDBY_ENABLE_MASK BIT(0) + +#define BUCKS_PD_CR_REG_MASK GENMASK(7, 0) +#define BUCK_MASK_RANK_REGISTER_MASK GENMASK(3, 0) +#define BUCK_MASK_RESET_REGISTER_MASK GENMASK(3, 0) +#define LDO1234_PULL_DOWN_REGISTER_MASK GENMASK(7, 0) +#define LDO56_VREF_PD_CR_REG_MASK GENMASK(5, 0) +#define LDO_MASK_RANK_REGISTER_MASK GENMASK(5, 0) +#define LDO_MASK_RESET_REGISTER_MASK GENMASK(5, 0) + +#define BUCK1_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK1_PULL_DOWN_MASK BIT(0) +#define BUCK2_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK2_PULL_DOWN_MASK BIT(2) +#define BUCK3_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK3_PULL_DOWN_MASK BIT(4) +#define BUCK4_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK4_PULL_DOWN_MASK BIT(6) + +#define LDO1_PULL_DOWN_REG LDO14_PD_CR +#define LDO1_PULL_DOWN_MASK BIT(0) +#define LDO2_PULL_DOWN_REG LDO14_PD_CR +#define LDO2_PULL_DOWN_MASK BIT(2) +#define LDO3_PULL_DOWN_REG LDO14_PD_CR +#define LDO3_PULL_DOWN_MASK BIT(4) +#define LDO4_PULL_DOWN_REG LDO14_PD_CR +#define LDO4_PULL_DOWN_MASK BIT(6) +#define LDO5_PULL_DOWN_REG LDO56_VREF_PD_CR +#define LDO5_PULL_DOWN_MASK BIT(0) +#define LDO6_PULL_DOWN_REG LDO56_VREF_PD_CR +#define LDO6_PULL_DOWN_MASK BIT(2) +#define VREF_DDR_PULL_DOWN_REG LDO56_VREF_PD_CR +#define VREF_DDR_PULL_DOWN_MASK BIT(4) + +#define BUCKS_ICCTO_CR_REG_MASK GENMASK(6, 0) +#define LDOS_ICCTO_CR_REG_MASK GENMASK(5, 0) + +#define LDO_BYPASS_MASK BIT(7) + +/* Main PMIC Control Register + * SWOFF_PWRCTRL_CR + * Address : 0x10 + */ +#define ICC_EVENT_ENABLED BIT(4) +#define PWRCTRL_POLARITY_HIGH BIT(3) +#define PWRCTRL_PIN_VALID BIT(2) +#define RESTART_REQUEST_ENABLED BIT(1) +#define SOFTWARE_SWITCH_OFF_ENABLED BIT(0) + +/* Main PMIC PADS Control Register + * PADS_PULL_CR + * Address : 0x11 + */ +#define WAKEUP_DETECTOR_DISABLED BIT(4) +#define PWRCTRL_PD_ACTIVE BIT(3) +#define PWRCTRL_PU_ACTIVE BIT(2) +#define WAKEUP_PD_ACTIVE BIT(1) +#define PONKEY_PU_INACTIVE BIT(0) + +/* Main PMIC VINLOW Control Register + * VBUS_DET_VIN_CRC DMSC + * Address : 0x15 + */ +#define SWIN_DETECTOR_ENABLED BIT(7) +#define SWOUT_DETECTOR_ENABLED BIT(6) +#define VINLOW_ENABLED BIT(0) +#define VINLOW_CTRL_REG_MASK GENMASK(7, 0) + +/* USB Control Register + * Address : 0x40 + */ +#define BOOST_OVP_DISABLED BIT(7) +#define VBUS_OTG_DETECTION_DISABLED BIT(6) +#define SW_OUT_DISCHARGE BIT(5) +#define VBUS_OTG_DISCHARGE BIT(4) +#define OCP_LIMIT_HIGH BIT(3) +#define SWIN_SWOUT_ENABLED BIT(2) +#define USBSW_OTG_SWITCH_ENABLED BIT(1) +#define BOOST_ENABLED BIT(0) + +/* PKEY_TURNOFF_CR + * Address : 0x16 + */ +#define PONKEY_PWR_OFF BIT(7) +#define PONKEY_CC_FLAG_CLEAR BIT(6) +#define PONKEY_TURNOFF_TIMER_MASK GENMASK(3, 0) +#define PONKEY_TURNOFF_MASK GENMASK(7, 0) + +/* + * struct stpmic1 - stpmic1 master device for sub-drivers + * @dev: master device of the chip (can be used to access platform data) + * @irq: main IRQ number + * @regmap_irq_chip_data: irq chip data + */ +struct stpmic1 { + struct device *dev; + struct regmap *regmap; + int irq; + struct regmap_irq_chip_data *irq_data; +}; + +#endif /* __LINUX_MFD_STPMIC1_H */ -- cgit From 3725cd0957615f26aef9557f72a327a75ca9a150 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 16 Jan 2019 15:34:36 -0700 Subject: dma-buf: Fix kerneldoc comment for struct dma_fence_array The kerneldoc comment for struct dma_fence_array lacks a description of the "work" member, leading to this docs-build warning: ./include/linux/dma-fence-array.h:54: warning: Function parameter or member 'work' not described in 'dma_fence_array' Add a description and make the warning go away. Signed-off-by: Jonathan Corbet Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190116153436.3b244cda@lwn.net --- include/linux/dma-fence-array.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index bc8940ca280d..c0ff417b4770 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -40,6 +40,7 @@ struct dma_fence_array_cb { * @num_fences: number of fences in the array * @num_pending: fences in the array still pending * @fences: array of the fences + * @work: internal irq_work function */ struct dma_fence_array { struct dma_fence base; -- cgit From 6d7fbce7da0cd06ff3f3f30e009a15a6243f0bc0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 12:02:57 -0500 Subject: kill kernfs_pin_sb() unused now and impossible to use safely anyway. Signed-off-by: Al Viro --- fs/kernfs/mount.c | 30 ------------------------------ include/linux/kernfs.h | 1 - 2 files changed, 31 deletions(-) (limited to 'include/linux') diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d71c9405874a..4d303047a4f8 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -377,36 +377,6 @@ void kernfs_kill_sb(struct super_block *sb) kfree(info); } -/** - * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root - * @kernfs_root: the kernfs_root in question - * @ns: the namespace tag - * - * Pin the superblock so the superblock won't be destroyed in subsequent - * operations. This can be used to block ->kill_sb() which may be useful - * for kernfs users which dynamically manage superblocks. - * - * Returns NULL if there's no superblock associated to this kernfs_root, or - * -EINVAL if the superblock is being freed. - */ -struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns) -{ - struct kernfs_super_info *info; - struct super_block *sb = NULL; - - mutex_lock(&kernfs_mutex); - list_for_each_entry(info, &root->supers, node) { - if (info->ns == ns) { - sb = info->sb; - if (!atomic_inc_not_zero(&info->sb->s_active)) - sb = ERR_PTR(-EINVAL); - break; - } - } - mutex_unlock(&kernfs_mutex); - return sb; -} - void __init kernfs_init(void) { diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5b36b1287a5a..44acb4c3659c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -357,7 +357,6 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, struct kernfs_root *root, unsigned long magic, bool *new_sb_created, const void *ns); void kernfs_kill_sb(struct super_block *sb); -struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns); void kernfs_init(void); -- cgit From 44021606298870e4adc641ef3927e7bb47ca8236 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 15 Jan 2019 12:22:10 -0500 Subject: cpuidle: use BIT() for idle state flags and remove CPUIDLE_DRIVER_FLAGS_MASK Use BIT() macro to do a small tidy-up. CPUIDLE_DRIVER_FLAGS_MASK is not used, so remove it. Signed-off-by: Yangtao Li Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4dff74f48d4b..3b39472324a3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -69,11 +69,9 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_NONE (0x00) -#define CPUIDLE_FLAG_POLLING (0x01) /* polling state */ -#define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ -#define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ - -#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) +#define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ +#define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ +#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ struct cpuidle_device_kobj; struct cpuidle_state_kobj; -- cgit From 5f620bb6439ea8f354cfe4c7d47887df9d3acaf0 Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Thu, 17 Jan 2019 09:10:55 +0000 Subject: drivers: usb :fsl: Remove USB Errata checking code Remove USB errata checking code from driver. Applicability of erratum is retrieved by reading corresponding property in device tree. This property is written during device tree fixup. Besides, replace spaces with tabs to make code aligned. Signed-off-by: Ramneek Mehresh Signed-off-by: Nikhil Badola Signed-off-by: Yinbo Zhu Signed-off-by: Ran Wang Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-fsl.c | 7 +------ drivers/usb/host/fsl-mph-dr-of.c | 6 ++++++ include/linux/fsl_devices.h | 7 ++++--- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 0a867d96c126..e3d0c1c25160 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -304,14 +304,9 @@ static int ehci_fsl_usb_setup(struct ehci_hcd *ehci) return -EINVAL; if (pdata->operating_mode == FSL_USB2_MPH_HOST) { - unsigned int chip, rev, svr; - - svr = mfspr(SPRN_SVR); - chip = svr >> 16; - rev = (svr >> 4) & 0xf; /* Deal with USB Erratum #14 on MPC834x Rev 1.0 & 1.1 chips */ - if ((rev == 1) && (chip >= 0x8050) && (chip <= 0x8055)) + if (pdata->has_fsl_erratum_14 == 1) ehci->has_fsl_port_bug = 1; if (pdata->port_enables & FSL_USB2_PORT0_ENABLED) diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 677f9d592109..4f8b8a08c914 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -225,6 +225,12 @@ static int fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev) pdata->has_fsl_erratum_a005697 = of_property_read_bool(np, "fsl,usb_erratum-a005697"); + if (of_get_property(np, "fsl,usb_erratum_14", NULL)) + pdata->has_fsl_erratum_14 = 1; + else + pdata->has_fsl_erratum_14 = 0; + + /* * Determine whether phy_clk_valid needs to be checked * by reading property in device tree diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 60cef8227534..5da56a674f2f 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -98,10 +98,11 @@ struct fsl_usb2_platform_data { unsigned suspended:1; unsigned already_suspended:1; - unsigned has_fsl_erratum_a007792:1; - unsigned has_fsl_erratum_a005275:1; + unsigned has_fsl_erratum_a007792:1; + unsigned has_fsl_erratum_14:1; + unsigned has_fsl_erratum_a005275:1; unsigned has_fsl_erratum_a005697:1; - unsigned check_phy_clk_valid:1; + unsigned check_phy_clk_valid:1; /* register save area for suspend/resume */ u32 pm_command; -- cgit From 2ff5c5a1dc6e6c502e0a3e49db4e792804e43693 Mon Sep 17 00:00:00 2001 From: Martin Hostettler Date: Sat, 15 Dec 2018 15:34:20 +0100 Subject: vt: refactor vc_ques to allow of other private sequences. The vc_ques keeps track if a csi sequence is a private DEC control function beginning with '?'. Nowadays some private control functions begin with '>' and '='. Switch the code to instead use a new 3-bit vc_priv that allows for all private use parameter prefixes. Signed-off-by: Martin Hostettler Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 20 +++++++++++--------- include/linux/console_struct.h | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ec55336abf95..b59feeaaf02b 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1341,6 +1341,8 @@ struct vc_data *vc_deallocate(unsigned int currcons) * VT102 emulator */ +enum { EPecma = 0, EPdec, EPeq, EPgt, EPlt}; + #define set_kbd(vc, x) vt_set_kbd_mode_bit((vc)->vc_num, (x)) #define clr_kbd(vc, x) vt_clr_kbd_mode_bit((vc)->vc_num, (x)) #define is_kbd(vc, x) vt_get_kbd_mode_bit((vc)->vc_num, (x)) @@ -1814,7 +1816,7 @@ static void set_mode(struct vc_data *vc, int on_off) int i; for (i = 0; i <= vc->vc_npar; i++) - if (vc->vc_ques) { + if (vc->vc_priv == EPdec) { switch(vc->vc_par[i]) { /* DEC private modes set/reset */ case 1: /* Cursor keys send ^[Ox/^[[x */ if (on_off) @@ -2030,7 +2032,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) vc->vc_top = 0; vc->vc_bottom = vc->vc_rows; vc->vc_state = ESnormal; - vc->vc_ques = 0; + vc->vc_priv = EPecma; vc->vc_translate = set_translate(LAT1_MAP, vc); vc->vc_G0_charset = LAT1_MAP; vc->vc_G1_charset = GRAF_MAP; @@ -2234,8 +2236,8 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc->vc_state=ESfunckey; return; } - vc->vc_ques = (c == '?'); - if (vc->vc_ques) + vc->vc_priv = (c == '?') ? EPdec : EPecma; + if (vc->vc_priv != EPecma) return; /* fall through */ case ESgetpars: @@ -2256,7 +2258,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) set_mode(vc, 0); return; case 'c': - if (vc->vc_ques) { + if (vc->vc_priv == EPdec) { if (vc->vc_par[0]) vc->vc_cursor_type = vc->vc_par[0] | (vc->vc_par[1] << 8) | (vc->vc_par[2] << 16); else @@ -2265,7 +2267,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } break; case 'm': - if (vc->vc_ques) { + if (vc->vc_priv == EPdec) { clear_selection(); if (vc->vc_par[0]) vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1]; @@ -2275,7 +2277,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } break; case 'n': - if (!vc->vc_ques) { + if (vc->vc_priv == EPecma) { if (vc->vc_par[0] == 5) status_report(tty); else if (vc->vc_par[0] == 6) @@ -2283,8 +2285,8 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } return; } - if (vc->vc_ques) { - vc->vc_ques = 0; + if (vc->vc_priv != EPecma) { + vc->vc_priv = EPecma; return; } switch(c) { diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index ab137f97ecbd..ed798e114663 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -119,7 +119,7 @@ struct vc_data { unsigned int vc_s_blink : 1; unsigned int vc_s_reverse : 1; /* misc */ - unsigned int vc_ques : 1; + unsigned int vc_priv : 3; unsigned int vc_need_wrap : 1; unsigned int vc_can_do_color : 1; unsigned int vc_report_mouse : 2; -- cgit From 570d0200123fb4f809aa2f6226e93a458d664d70 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 18 Jan 2019 10:34:59 +0800 Subject: driver core: move device->knode_class to device_private As the description of struct device_private says, it stores data which is private to driver core. And it already has similar fields like: knode_parent, knode_driver, knode_driver and knode_bus. This look it is more proper to put knode_class together with those fields to make it private to driver core. This patch move device->knode_class to device_private to make it comply with code convention. Signed-off-by: Wei Yang Reviewed-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 4 ++++ drivers/base/class.c | 14 ++++++++++---- drivers/base/core.c | 4 ++-- include/linux/device.h | 1 - 4 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/base.h b/drivers/base/base.h index 7a419a7a6235..37329a668935 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -60,6 +60,7 @@ struct driver_private { * @knode_parent - node in sibling list * @knode_driver - node in driver list * @knode_bus - node in bus list + * @knode_class - node in class list * @deferred_probe - entry in deferred_probe_list which is used to retry the * binding of drivers which were unable to get all the resources needed by * the device; typically because it depends on another driver getting @@ -74,6 +75,7 @@ struct device_private { struct klist_node knode_parent; struct klist_node knode_driver; struct klist_node knode_bus; + struct klist_node knode_class; struct list_head deferred_probe; struct device *device; }; @@ -83,6 +85,8 @@ struct device_private { container_of(obj, struct device_private, knode_driver) #define to_device_private_bus(obj) \ container_of(obj, struct device_private, knode_bus) +#define to_device_private_class(obj) \ + container_of(obj, struct device_private, knode_class) /* initialisation functions */ extern int devices_init(void); diff --git a/drivers/base/class.c b/drivers/base/class.c index 54def4e02f00..d8a6a5864c2e 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -117,16 +117,22 @@ static void class_put(struct class *cls) kset_put(&cls->p->subsys); } +static struct device *klist_class_to_dev(struct klist_node *n) +{ + struct device_private *p = to_device_private_class(n); + return p->device; +} + static void klist_class_dev_get(struct klist_node *n) { - struct device *dev = container_of(n, struct device, knode_class); + struct device *dev = klist_class_to_dev(n); get_device(dev); } static void klist_class_dev_put(struct klist_node *n) { - struct device *dev = container_of(n, struct device, knode_class); + struct device *dev = klist_class_to_dev(n); put_device(dev); } @@ -277,7 +283,7 @@ void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, struct klist_node *start_knode = NULL; if (start) - start_knode = &start->knode_class; + start_knode = &start->p->knode_class; klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode); iter->type = type; } @@ -304,7 +310,7 @@ struct device *class_dev_iter_next(struct class_dev_iter *iter) knode = klist_next(&iter->ki); if (!knode) return NULL; - dev = container_of(knode, struct device, knode_class); + dev = klist_class_to_dev(knode); if (!iter->type || iter->type == dev->type) return dev; } diff --git a/drivers/base/core.c b/drivers/base/core.c index 0073b09bb99f..4a4b6f8cbc4f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1966,7 +1966,7 @@ int device_add(struct device *dev) if (dev->class) { mutex_lock(&dev->class->p->mutex); /* tie the class to the device */ - klist_add_tail(&dev->knode_class, + klist_add_tail(&dev->p->knode_class, &dev->class->p->klist_devices); /* notify any interfaces that the device is here */ @@ -2105,7 +2105,7 @@ void device_del(struct device *dev) if (class_intf->remove_dev) class_intf->remove_dev(dev, class_intf); /* remove the device from the class list */ - klist_del(&dev->knode_class); + klist_del(&dev->p->knode_class); mutex_unlock(&dev->class->p->mutex); } device_remove_file(dev, &dev_attr_uevent); diff --git a/include/linux/device.h b/include/linux/device.h index 6cb4640b6160..d0e452fd0bff 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1035,7 +1035,6 @@ struct device { spinlock_t devres_lock; struct list_head devres_head; - struct klist_node knode_class; struct class *class; const struct attribute_group **groups; /* optional groups */ -- cgit From 1cfb2a512e74e577bb0ed7c8d76df90a41a83f6a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 18 Jan 2019 19:15:59 +0900 Subject: LSM: Make lsm_early_cred() and lsm_early_task() local functions. Since current->cred == current->real_cred when ordered_lsm_init() is called, and lsm_early_cred()/lsm_early_task() need to be called between the amount of required bytes is determined and module specific initialization function is called, we can move these calls from individual modules to ordered_lsm_init(). Signed-off-by: Tetsuo Handa Acked-by: Casey Schaufler Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 5 ----- security/apparmor/lsm.c | 2 -- security/security.c | 27 +++++++++++---------------- security/selinux/hooks.c | 1 - security/smack/smack_lsm.c | 2 -- security/tomoyo/tomoyo.c | 1 - 6 files changed, 11 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 195707210975..22fc786d723a 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2112,9 +2112,4 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, extern int lsm_inode_alloc(struct inode *inode); -#ifdef CONFIG_SECURITY -void __init lsm_early_cred(struct cred *cred); -void __init lsm_early_task(struct task_struct *task); -#endif - #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index b6c395e2acd0..bb5a02d2439f 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1484,8 +1484,6 @@ static int __init set_init_ctx(void) { struct cred *cred = (struct cred *)current->real_cred; - lsm_early_cred(cred); - lsm_early_task(current); set_cred_label(cred, aa_get_label(ns_unconfined(root_ns))); return 0; diff --git a/security/security.c b/security/security.c index a618e22df5c6..992b612c819a 100644 --- a/security/security.c +++ b/security/security.c @@ -278,6 +278,9 @@ static void __init ordered_lsm_parse(const char *order, const char *origin) kfree(sep); } +static void __init lsm_early_cred(struct cred *cred); +static void __init lsm_early_task(struct task_struct *task); + static void __init ordered_lsm_init(void) { struct lsm_info **lsm; @@ -312,6 +315,8 @@ static void __init ordered_lsm_init(void) blob_sizes.lbs_inode, 0, SLAB_PANIC, NULL); + lsm_early_cred((struct cred *) current->cred); + lsm_early_task(current); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -465,17 +470,12 @@ static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) * lsm_early_cred - during initialization allocate a composite cred blob * @cred: the cred that needs a blob * - * Allocate the cred blob for all the modules if it's not already there + * Allocate the cred blob for all the modules */ -void __init lsm_early_cred(struct cred *cred) +static void __init lsm_early_cred(struct cred *cred) { - int rc; + int rc = lsm_cred_alloc(cred, GFP_KERNEL); - if (cred == NULL) - panic("%s: NULL cred.\n", __func__); - if (cred->security != NULL) - return; - rc = lsm_cred_alloc(cred, GFP_KERNEL); if (rc) panic("%s: Early cred alloc failed.\n", __func__); } @@ -589,17 +589,12 @@ int lsm_msg_msg_alloc(struct msg_msg *mp) * lsm_early_task - during initialization allocate a composite task blob * @task: the task that needs a blob * - * Allocate the task blob for all the modules if it's not already there + * Allocate the task blob for all the modules */ -void __init lsm_early_task(struct task_struct *task) +static void __init lsm_early_task(struct task_struct *task) { - int rc; + int rc = lsm_task_alloc(task); - if (task == NULL) - panic("%s: task cred.\n", __func__); - if (task->security != NULL) - return; - rc = lsm_task_alloc(task); if (rc) panic("%s: Early task alloc failed.\n", __func__); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b2ee49f938f1..5d92167dbe05 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -207,7 +207,6 @@ static void cred_init_security(void) struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; - lsm_early_cred(cred); tsec = selinux_cred(cred); tsec->osid = tsec->sid = SECINITSID_KERNEL; } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 0b848b1f6366..79d6d2a6a0bc 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4671,8 +4671,6 @@ static __init int smack_init(void) if (!smack_inode_cache) return -ENOMEM; - lsm_early_cred(cred); - /* * Set the security state for the initial task. */ diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 066c0daf0efc..2b3eee06004b 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -566,7 +566,6 @@ static int __init tomoyo_init(void) /* register ourselves with the security framework */ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo"); printk(KERN_INFO "TOMOYO Linux initialized\n"); - lsm_early_cred(cred); blob = tomoyo_cred(cred); *blob = &tomoyo_kernel_domain; tomoyo_mm_init(); -- cgit From 7527a7b157d1191b23562ed70154ae93bd65f845 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 17 Jan 2019 20:14:15 +0200 Subject: IB/core: Simplify rdma cgroup registration RDMA cgroup registration routine always returns success, so simplify function to be void and run clang formatter over whole CONFIG_CGROUP_RDMA art of core_priv.h. This reduces unwinding error path for regular registration and future net namespace change functionality for rdma device. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Acked-by: Tejun Heo Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cgroup.c | 5 ++--- drivers/infiniband/core/core_priv.h | 17 +++++++++++------ drivers/infiniband/core/device.c | 8 +------- include/linux/cgroup_rdma.h | 2 +- kernel/cgroup/rdma.c | 5 +---- 5 files changed, 16 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c index 126ac5f99db7..388fd04e5f63 100644 --- a/drivers/infiniband/core/cgroup.c +++ b/drivers/infiniband/core/cgroup.c @@ -21,12 +21,11 @@ * Register with the rdma cgroup. Should be called before * exposing rdma device to user space applications to avoid * resource accounting leak. - * Returns 0 on success or otherwise failure code. */ -int ib_device_register_rdmacg(struct ib_device *device) +void ib_device_register_rdmacg(struct ib_device *device) { device->cg_device.name = device->name; - return rdmacg_register_device(&device->cg_device); + rdmacg_register_device(&device->cg_device); } /** diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index aca75c74e451..42a49982f66e 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -115,7 +115,7 @@ void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); #ifdef CONFIG_CGROUP_RDMA -int ib_device_register_rdmacg(struct ib_device *device); +void ib_device_register_rdmacg(struct ib_device *device); void ib_device_unregister_rdmacg(struct ib_device *device); int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, @@ -126,21 +126,26 @@ void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index); #else -static inline int ib_device_register_rdmacg(struct ib_device *device) -{ return 0; } +static inline void ib_device_register_rdmacg(struct ib_device *device) +{ +} static inline void ib_device_unregister_rdmacg(struct ib_device *device) -{ } +{ +} static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ return 0; } +{ + return 0; +} static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ } +{ +} #endif static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 4a9aa6d10c5e..200431c540f2 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -599,12 +599,7 @@ int ib_register_device(struct ib_device *device, const char *name) device->index = __dev_new_index(); - ret = ib_device_register_rdmacg(device); - if (ret) { - dev_warn(&device->dev, - "Couldn't register device with rdma cgroup\n"); - goto dev_cleanup; - } + ib_device_register_rdmacg(device); ret = ib_device_register_sysfs(device); if (ret) { @@ -627,7 +622,6 @@ int ib_register_device(struct ib_device *device, const char *name) cg_cleanup: ib_device_unregister_rdmacg(device); -dev_cleanup: cleanup_device(device); out: mutex_unlock(&device_mutex); diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h index e94290b29e99..ef1bae2983f3 100644 --- a/include/linux/cgroup_rdma.h +++ b/include/linux/cgroup_rdma.h @@ -39,7 +39,7 @@ struct rdmacg_device { * APIs for RDMA/IB stack to publish when a device wants to * participate in resource accounting */ -int rdmacg_register_device(struct rdmacg_device *device); +void rdmacg_register_device(struct rdmacg_device *device); void rdmacg_unregister_device(struct rdmacg_device *device); /* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index d3bbb757ee49..1d75ae7f1cb7 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -313,10 +313,8 @@ EXPORT_SYMBOL(rdmacg_try_charge); * If IB stack wish a device to participate in rdma cgroup resource * tracking, it must invoke this API to register with rdma cgroup before * any user space application can start using the RDMA resources. - * Returns 0 on success or EINVAL when table length given is beyond - * supported size. */ -int rdmacg_register_device(struct rdmacg_device *device) +void rdmacg_register_device(struct rdmacg_device *device) { INIT_LIST_HEAD(&device->dev_node); INIT_LIST_HEAD(&device->rpools); @@ -324,7 +322,6 @@ int rdmacg_register_device(struct rdmacg_device *device) mutex_lock(&rdmacg_mutex); list_add_tail(&device->dev_node, &rdmacg_devices); mutex_unlock(&rdmacg_mutex); - return 0; } EXPORT_SYMBOL(rdmacg_register_device); -- cgit From f5d782d46aa5d4dd369e6560ce5227136b58926f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 13 Dec 2018 02:38:58 +0100 Subject: power: supply: isp1704: switch to gpiod API This migrates isp1704 driver from old GPIO API to new descriptor based GPIO API and drops useless platform data as a side-effect. Migration is simple, since all mainline users are DT based and DT API does not change. Out of tree users of the platform data need to migrate to gpiod_lookup_table as described here: Documentation/driver-api/gpio/board.rst Reviewed-by: Linus Walleij Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel --- drivers/power/supply/isp1704_charger.c | 60 +++++++--------------------------- include/linux/power/isp1704_charger.h | 30 ----------------- 2 files changed, 12 insertions(+), 78 deletions(-) delete mode 100644 include/linux/power/isp1704_charger.h (limited to 'include/linux') diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c index 95af5f305838..a63cb5dcfa08 100644 --- a/drivers/power/supply/isp1704_charger.c +++ b/drivers/power/supply/isp1704_charger.c @@ -30,13 +30,12 @@ #include #include #include -#include +#include #include #include #include #include -#include /* Vendor specific Power Control register */ #define ISP1704_PWR_CTRL 0x3d @@ -60,6 +59,7 @@ struct isp1704_charger { struct device *dev; struct power_supply *psy; struct power_supply_desc psy_desc; + struct gpio_desc *enable_gpio; struct usb_phy *phy; struct notifier_block nb; struct work_struct work; @@ -81,18 +81,9 @@ static inline int isp1704_write(struct isp1704_charger *isp, u32 reg, u32 val) return usb_phy_io_write(isp->phy, val, reg); } -/* - * Disable/enable the power from the isp1704 if a function for it - * has been provided with platform data. - */ static void isp1704_charger_set_power(struct isp1704_charger *isp, bool on) { - struct isp1704_charger_data *board = isp->dev->platform_data; - - if (board && board->set_power) - board->set_power(on); - else if (board) - gpio_set_value(board->enable_gpio, on); + gpiod_set_value(isp->enable_gpio, on); } /* @@ -405,46 +396,19 @@ static int isp1704_charger_probe(struct platform_device *pdev) int ret = -ENODEV; struct power_supply_config psy_cfg = {}; - struct isp1704_charger_data *pdata = dev_get_platdata(&pdev->dev); - struct device_node *np = pdev->dev.of_node; - - if (np) { - int gpio = of_get_named_gpio(np, "nxp,enable-gpio", 0); - - if (gpio < 0) { - dev_err(&pdev->dev, "missing DT GPIO nxp,enable-gpio\n"); - return gpio; - } - - pdata = devm_kzalloc(&pdev->dev, - sizeof(struct isp1704_charger_data), GFP_KERNEL); - if (!pdata) { - ret = -ENOMEM; - goto fail0; - } - pdata->enable_gpio = gpio; - - dev_info(&pdev->dev, "init gpio %d\n", pdata->enable_gpio); - - ret = devm_gpio_request_one(&pdev->dev, pdata->enable_gpio, - GPIOF_OUT_INIT_HIGH, "isp1704_reset"); - if (ret) { - dev_err(&pdev->dev, "gpio request failed\n"); - goto fail0; - } - } - - if (!pdata) { - dev_err(&pdev->dev, "missing platform data!\n"); - return -ENODEV; - } - - isp = devm_kzalloc(&pdev->dev, sizeof(*isp), GFP_KERNEL); if (!isp) return -ENOMEM; - if (np) + isp->enable_gpio = devm_gpiod_get(&pdev->dev, "nxp,enable", + GPIOD_OUT_HIGH); + if (IS_ERR(isp->enable_gpio)) { + ret = PTR_ERR(isp->enable_gpio); + dev_err(&pdev->dev, "Could not get reset gpio: %d\n", ret); + return ret; + } + + if (pdev->dev.of_node) isp->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "usb-phy", 0); else isp->phy = devm_usb_get_phy(&pdev->dev, USB_PHY_TYPE_USB2); diff --git a/include/linux/power/isp1704_charger.h b/include/linux/power/isp1704_charger.h deleted file mode 100644 index 0105d9e7af85..000000000000 --- a/include/linux/power/isp1704_charger.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * ISP1704 USB Charger Detection driver - * - * Copyright (C) 2011 Nokia Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -#ifndef __ISP1704_CHARGER_H -#define __ISP1704_CHARGER_H - -struct isp1704_charger_data { - void (*set_power)(bool on); - int enable_gpio; -}; - -#endif -- cgit From 486efe9f8e30bac1e236f867df164f4966f3e207 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 10 Jan 2019 13:53:24 +0000 Subject: perf/core: Add function to test for event exclusion flags Add a function that tests if any of the perf event exclusion flags are set on a given event. Signed-off-by: Andrew Murray Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Mark Rutland Cc: Matt Turner Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Richard Henderson Cc: Russell King Cc: Sascha Hauer Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: robin.murphy@arm.com Cc: suzuki.poulose@arm.com Link: https://lkml.kernel.org/r/1547128414-50693-3-git-send-email-andrew.murray@arm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..54a78d22f0a6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1004,6 +1004,15 @@ perf_event__output_id_sample(struct perf_event *event, extern void perf_log_lost_samples(struct perf_event *event, u64 lost); +static inline bool event_has_any_exclude_flag(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv || + attr->exclude_guest || attr->exclude_host; +} + static inline bool is_sampling_event(struct perf_event *event) { return event->attr.sample_period != 0; -- cgit From cc6795aeffea0a80d0baf9ad31ba926a6c42cef5 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 10 Jan 2019 13:53:25 +0000 Subject: perf/core: Add PERF_PMU_CAP_NO_EXCLUDE for exclusion incapable PMUs Many PMU drivers do not have the capability to exclude counting events that occur in specific contexts such as idle, kernel, guest, etc. These drivers indicate this by returning an error in their event_init upon testing the events attribute flags. This approach is error prone and often inconsistent. Let's instead allow PMU drivers to advertise their inability to exclude based on context via a new capability: PERF_PMU_CAP_NO_EXCLUDE. This allows the perf core to reject requests for exclusion events where there is no support in the PMU. Signed-off-by: Andrew Murray Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Mark Rutland Cc: Matt Turner Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Richard Henderson Cc: Russell King Cc: Sascha Hauer Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: robin.murphy@arm.com Cc: suzuki.poulose@arm.com Link: https://lkml.kernel.org/r/1547128414-50693-4-git-send-email-andrew.murray@arm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 54a78d22f0a6..cec02dc63b51 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -244,6 +244,7 @@ struct perf_event; #define PERF_PMU_CAP_EXCLUSIVE 0x10 #define PERF_PMU_CAP_ITRACE 0x20 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 +#define PERF_PMU_CAP_NO_EXCLUDE 0x80 /** * struct pmu - generic performance monitoring unit diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..fbe59b793b36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9772,6 +9772,15 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) if (ctx) perf_event_ctx_unlock(event->group_leader, ctx); + if (!ret) { + if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && + event_has_any_exclude_flag(event)) { + if (event->destroy) + event->destroy(event); + ret = -EINVAL; + } + } + if (ret) module_put(pmu->module); -- cgit From 8321be6a9df5c5cfbf3fb5f716caf8698a5a7016 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 21 Jan 2019 14:17:37 +0530 Subject: cpufreq: Replace open-coded << with BIT() Minor clean-up to use BIT() and keep checkpatch happy. Clean up the comment formatting while we're at it to make it easier to read. Signed-off-by: Amit Kucheria Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c86d6d8bdfed..bd7fbd6a4478 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -346,14 +346,15 @@ struct cpufreq_driver { }; /* flags */ -#define CPUFREQ_STICKY (1 << 0) /* driver isn't removed even if - all ->init() calls failed */ -#define CPUFREQ_CONST_LOOPS (1 << 1) /* loops_per_jiffy or other - kernel "constants" aren't - affected by frequency - transitions */ -#define CPUFREQ_PM_NO_WARN (1 << 2) /* don't warn on suspend/resume - speed mismatches */ + +/* driver isn't removed even if all ->init() calls failed */ +#define CPUFREQ_STICKY BIT(0) + +/* loops_per_jiffy or other kernel "constants" aren't affected by frequency transitions */ +#define CPUFREQ_CONST_LOOPS BIT(1) + +/* don't warn on suspend/resume speed mismatches */ +#define CPUFREQ_PM_NO_WARN BIT(2) /* * This should be set by platforms having multiple clock-domains, i.e. @@ -361,14 +362,14 @@ struct cpufreq_driver { * be created in cpu/cpu/cpufreq/ directory and so they can use the same * governor with different tunables for different clusters. */ -#define CPUFREQ_HAVE_GOVERNOR_PER_POLICY (1 << 3) +#define CPUFREQ_HAVE_GOVERNOR_PER_POLICY BIT(3) /* * Driver will do POSTCHANGE notifications from outside of their ->target() * routine and so must set cpufreq_driver->flags with this flag, so that core * can handle them specially. */ -#define CPUFREQ_ASYNC_NOTIFICATION (1 << 4) +#define CPUFREQ_ASYNC_NOTIFICATION BIT(4) /* * Set by drivers which want cpufreq core to check if CPU is running at a @@ -377,13 +378,13 @@ struct cpufreq_driver { * from the table. And if that fails, we will stop further boot process by * issuing a BUG_ON(). */ -#define CPUFREQ_NEED_INITIAL_FREQ_CHECK (1 << 5) +#define CPUFREQ_NEED_INITIAL_FREQ_CHECK BIT(5) /* * Set by drivers to disallow use of governors with "dynamic_switching" flag * set. */ -#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING (1 << 6) +#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING BIT(6) int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit From bbe7449e2599b58cf7b995461e2189998111f907 Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 21 Jan 2019 00:54:27 +0000 Subject: fs: common implementation of file type Many file systems use a copy&paste implementation of dirent to on-disk file type conversions. Create a common implementation to be used by file systems with some useful conversion helpers to reduce open coded file type conversions in file system code. Signed-off-by: Amir Goldstein Signed-off-by: Phillip Potter Signed-off-by: Jan Kara --- MAINTAINERS | 1 + fs/Makefile | 3 +- fs/fs_types.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 17 +------- include/linux/fs_types.h | 75 +++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 17 deletions(-) create mode 100644 fs/fs_types.c create mode 100644 include/linux/fs_types.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 51029a425dbe..0afaaf0aa6be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5881,6 +5881,7 @@ L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/* F: include/linux/fs.h +F: include/linux/fs_types.h F: include/uapi/linux/fs.h FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER diff --git a/fs/Makefile b/fs/Makefile index 293733f61594..23fcd8c164a3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -12,7 +12,8 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ - stack.o fs_struct.o statfs.o fs_pin.o nsfs.o + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ + fs_types.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/fs_types.c b/fs/fs_types.c new file mode 100644 index 000000000000..78365e5dc08c --- /dev/null +++ b/fs/fs_types.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +/* + * fs on-disk file type to dirent file type conversion + */ +static const unsigned char fs_dtype_by_ftype[FT_MAX] = { + [FT_UNKNOWN] = DT_UNKNOWN, + [FT_REG_FILE] = DT_REG, + [FT_DIR] = DT_DIR, + [FT_CHRDEV] = DT_CHR, + [FT_BLKDEV] = DT_BLK, + [FT_FIFO] = DT_FIFO, + [FT_SOCK] = DT_SOCK, + [FT_SYMLINK] = DT_LNK +}; + +/** + * fs_ftype_to_dtype() - fs on-disk file type to dirent type. + * @filetype: The on-disk file type to convert. + * + * This function converts the on-disk file type value (FT_*) to the directory + * entry type (DT_*). + * + * Context: Any context. + * Return: + * * DT_UNKNOWN - Unknown type + * * DT_FIFO - FIFO + * * DT_CHR - Character device + * * DT_DIR - Directory + * * DT_BLK - Block device + * * DT_REG - Regular file + * * DT_LNK - Symbolic link + * * DT_SOCK - Local-domain socket + */ +unsigned char fs_ftype_to_dtype(unsigned int filetype) +{ + if (filetype >= FT_MAX) + return DT_UNKNOWN; + + return fs_dtype_by_ftype[filetype]; +} +EXPORT_SYMBOL_GPL(fs_ftype_to_dtype); + +/* + * dirent file type to fs on-disk file type conversion + * Values not initialized explicitly are FT_UNKNOWN (0). + */ +static const unsigned char fs_ftype_by_dtype[DT_MAX] = { + [DT_REG] = FT_REG_FILE, + [DT_DIR] = FT_DIR, + [DT_LNK] = FT_SYMLINK, + [DT_CHR] = FT_CHRDEV, + [DT_BLK] = FT_BLKDEV, + [DT_FIFO] = FT_FIFO, + [DT_SOCK] = FT_SOCK, +}; + +/** + * fs_umode_to_ftype() - file mode to on-disk file type. + * @mode: The file mode to convert. + * + * This function converts the file mode value to the on-disk file type (FT_*). + * + * Context: Any context. + * Return: + * * FT_UNKNOWN - Unknown type + * * FT_REG_FILE - Regular file + * * FT_DIR - Directory + * * FT_CHRDEV - Character device + * * FT_BLKDEV - Block device + * * FT_FIFO - FIFO + * * FT_SOCK - Local-domain socket + * * FT_SYMLINK - Symbolic link + */ +unsigned char fs_umode_to_ftype(umode_t mode) +{ + return fs_ftype_by_dtype[S_DT(mode)]; +} +EXPORT_SYMBOL_GPL(fs_umode_to_ftype); + +/** + * fs_umode_to_dtype() - file mode to dirent file type. + * @mode: The file mode to convert. + * + * This function converts the file mode value to the directory + * entry type (DT_*). + * + * Context: Any context. + * Return: + * * DT_UNKNOWN - Unknown type + * * DT_FIFO - FIFO + * * DT_CHR - Character device + * * DT_DIR - Directory + * * DT_BLK - Block device + * * DT_REG - Regular file + * * DT_LNK - Symbolic link + * * DT_SOCK - Local-domain socket + */ +unsigned char fs_umode_to_dtype(umode_t mode) +{ + return fs_ftype_to_dtype(fs_umode_to_ftype(mode)); +} +EXPORT_SYMBOL_GPL(fs_umode_to_dtype); diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..92966678539d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1699,22 +1700,6 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, u64 phys, u64 len, u32 flags); int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); -/* - * File types - * - * NOTE! These match bits 12..15 of stat.st_mode - * (ie "(i_mode >> 12) & 15"). - */ -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 -#define DT_WHT 14 - /* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. diff --git a/include/linux/fs_types.h b/include/linux/fs_types.h new file mode 100644 index 000000000000..54816791196f --- /dev/null +++ b/include/linux/fs_types.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FS_TYPES_H +#define _LINUX_FS_TYPES_H + +/* + * This is a header for the common implementation of dirent + * to fs on-disk file type conversion. Although the fs on-disk + * bits are specific to every file system, in practice, many + * file systems use the exact same on-disk format to describe + * the lower 3 file type bits that represent the 7 POSIX file + * types. + * + * It is important to note that the definitions in this + * header MUST NOT change. This would break both the + * userspace ABI and the on-disk format of filesystems + * using this code. + * + * All those file systems can use this generic code for the + * conversions. + */ + +/* + * struct dirent file types + * exposed to user via getdents(2), readdir(3) + * + * These match bits 12..15 of stat.st_mode + * (ie "(i_mode >> 12) & 15"). + */ +#define S_DT_SHIFT 12 +#define S_DT(mode) (((mode) & S_IFMT) >> S_DT_SHIFT) +#define S_DT_MASK (S_IFMT >> S_DT_SHIFT) + +/* these are defined by POSIX and also present in glibc's dirent.h */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 + +#define DT_MAX (S_DT_MASK + 1) /* 16 */ + +/* + * fs on-disk file types. + * Only the low 3 bits are used for the POSIX file types. + * Other bits are reserved for fs private use. + * These definitions are shared and used by multiple filesystems, + * and MUST NOT change under any circumstances. + * + * Note that no fs currently stores the whiteout type on-disk, + * so whiteout dirents are exposed to user as DT_CHR. + */ +#define FT_UNKNOWN 0 +#define FT_REG_FILE 1 +#define FT_DIR 2 +#define FT_CHRDEV 3 +#define FT_BLKDEV 4 +#define FT_FIFO 5 +#define FT_SOCK 6 +#define FT_SYMLINK 7 + +#define FT_MAX 8 + +/* + * declarations for helper functions, accompanying implementation + * is in fs/fs_types.c + */ +extern unsigned char fs_ftype_to_dtype(unsigned int filetype); +extern unsigned char fs_umode_to_ftype(umode_t mode); +extern unsigned char fs_umode_to_dtype(umode_t mode); + +#endif -- cgit From dc60a4cfb77c891f67f31953025208067b05883c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 17 Jan 2019 11:47:55 -0200 Subject: media: soc_camera_platform: remove obsolete soc_camera test driver This is a test stub driver for soc_camera. Since soc_camera is being deprecated (and in fact, nobody is using it anymore) there's no sense in keeping this test driver. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/soc_camera/Kconfig | 6 - drivers/media/platform/soc_camera/Makefile | 4 - .../platform/soc_camera/soc_camera_platform.c | 188 --------------------- .../platform_data/media/soc_camera_platform.h | 83 --------- 4 files changed, 281 deletions(-) delete mode 100644 drivers/media/platform/soc_camera/soc_camera_platform.c delete mode 100644 include/linux/platform_data/media/soc_camera_platform.h (limited to 'include/linux') diff --git a/drivers/media/platform/soc_camera/Kconfig b/drivers/media/platform/soc_camera/Kconfig index d471d34b884c..8f9b3bac5450 100644 --- a/drivers/media/platform/soc_camera/Kconfig +++ b/drivers/media/platform/soc_camera/Kconfig @@ -6,9 +6,3 @@ config SOC_CAMERA SoC Camera is a common API to several cameras, not connecting over a bus like PCI or USB. For example some i2c camera connected directly to the data bus of an SoC. - -config SOC_CAMERA_PLATFORM - tristate "platform camera support" - depends on SOC_CAMERA - help - This is a generic SoC camera platform driver, useful for testing diff --git a/drivers/media/platform/soc_camera/Makefile b/drivers/media/platform/soc_camera/Makefile index 2cb7022e073b..85d5e74f3b2b 100644 --- a/drivers/media/platform/soc_camera/Makefile +++ b/drivers/media/platform/soc_camera/Makefile @@ -1,5 +1 @@ obj-$(CONFIG_SOC_CAMERA) += soc_camera.o soc_mediabus.o - -# a platform subdevice driver stub, allowing to support cameras by adding a -# couple of callback functions to the board code -obj-$(CONFIG_SOC_CAMERA_PLATFORM) += soc_camera_platform.o diff --git a/drivers/media/platform/soc_camera/soc_camera_platform.c b/drivers/media/platform/soc_camera/soc_camera_platform.c deleted file mode 100644 index 79fbe1fea95f..000000000000 --- a/drivers/media/platform/soc_camera/soc_camera_platform.c +++ /dev/null @@ -1,188 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Generic Platform Camera Driver - * - * Copyright (C) 2008 Magnus Damm - * Based on mt9m001 driver, - * Copyright (C) 2008, Guennadi Liakhovetski - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct soc_camera_platform_priv { - struct v4l2_subdev subdev; -}; - -static struct soc_camera_platform_priv *get_priv(struct platform_device *pdev) -{ - struct v4l2_subdev *subdev = platform_get_drvdata(pdev); - return container_of(subdev, struct soc_camera_platform_priv, subdev); -} - -static int soc_camera_platform_s_stream(struct v4l2_subdev *sd, int enable) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - return p->set_capture(p, enable); -} - -static int soc_camera_platform_fill_fmt(struct v4l2_subdev *sd, - struct v4l2_subdev_pad_config *cfg, - struct v4l2_subdev_format *format) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *mf = &format->format; - - mf->width = p->format.width; - mf->height = p->format.height; - mf->code = p->format.code; - mf->colorspace = p->format.colorspace; - mf->field = p->format.field; - - return 0; -} - -static int soc_camera_platform_s_power(struct v4l2_subdev *sd, int on) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - return soc_camera_set_power(p->icd->control, &p->icd->sdesc->subdev_desc, NULL, on); -} - -static const struct v4l2_subdev_core_ops platform_subdev_core_ops = { - .s_power = soc_camera_platform_s_power, -}; - -static int soc_camera_platform_enum_mbus_code(struct v4l2_subdev *sd, - struct v4l2_subdev_pad_config *cfg, - struct v4l2_subdev_mbus_code_enum *code) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - if (code->pad || code->index) - return -EINVAL; - - code->code = p->format.code; - return 0; -} - -static int soc_camera_platform_get_selection(struct v4l2_subdev *sd, - struct v4l2_subdev_pad_config *cfg, - struct v4l2_subdev_selection *sel) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE) - return -EINVAL; - - switch (sel->target) { - case V4L2_SEL_TGT_CROP_BOUNDS: - case V4L2_SEL_TGT_CROP_DEFAULT: - case V4L2_SEL_TGT_CROP: - sel->r.left = 0; - sel->r.top = 0; - sel->r.width = p->format.width; - sel->r.height = p->format.height; - return 0; - default: - return -EINVAL; - } -} - -static int soc_camera_platform_g_mbus_config(struct v4l2_subdev *sd, - struct v4l2_mbus_config *cfg) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - cfg->flags = p->mbus_param; - cfg->type = p->mbus_type; - - return 0; -} - -static const struct v4l2_subdev_video_ops platform_subdev_video_ops = { - .s_stream = soc_camera_platform_s_stream, - .g_mbus_config = soc_camera_platform_g_mbus_config, -}; - -static const struct v4l2_subdev_pad_ops platform_subdev_pad_ops = { - .enum_mbus_code = soc_camera_platform_enum_mbus_code, - .get_selection = soc_camera_platform_get_selection, - .get_fmt = soc_camera_platform_fill_fmt, - .set_fmt = soc_camera_platform_fill_fmt, -}; - -static const struct v4l2_subdev_ops platform_subdev_ops = { - .core = &platform_subdev_core_ops, - .video = &platform_subdev_video_ops, - .pad = &platform_subdev_pad_ops, -}; - -static int soc_camera_platform_probe(struct platform_device *pdev) -{ - struct soc_camera_host *ici; - struct soc_camera_platform_priv *priv; - struct soc_camera_platform_info *p = pdev->dev.platform_data; - struct soc_camera_device *icd; - - if (!p) - return -EINVAL; - - if (!p->icd) { - dev_err(&pdev->dev, - "Platform has not set soc_camera_device pointer!\n"); - return -EINVAL; - } - - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - icd = p->icd; - - /* soc-camera convention: control's drvdata points to the subdev */ - platform_set_drvdata(pdev, &priv->subdev); - /* Set the control device reference */ - icd->control = &pdev->dev; - - ici = to_soc_camera_host(icd->parent); - - v4l2_subdev_init(&priv->subdev, &platform_subdev_ops); - v4l2_set_subdevdata(&priv->subdev, p); - strscpy(priv->subdev.name, dev_name(&pdev->dev), - sizeof(priv->subdev.name)); - - return v4l2_device_register_subdev(&ici->v4l2_dev, &priv->subdev); -} - -static int soc_camera_platform_remove(struct platform_device *pdev) -{ - struct soc_camera_platform_priv *priv = get_priv(pdev); - struct soc_camera_platform_info *p = v4l2_get_subdevdata(&priv->subdev); - - p->icd->control = NULL; - v4l2_device_unregister_subdev(&priv->subdev); - return 0; -} - -static struct platform_driver soc_camera_platform_driver = { - .driver = { - .name = "soc_camera_platform", - }, - .probe = soc_camera_platform_probe, - .remove = soc_camera_platform_remove, -}; - -module_platform_driver(soc_camera_platform_driver); - -MODULE_DESCRIPTION("SoC Camera Platform driver"); -MODULE_AUTHOR("Magnus Damm"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:soc_camera_platform"); diff --git a/include/linux/platform_data/media/soc_camera_platform.h b/include/linux/platform_data/media/soc_camera_platform.h deleted file mode 100644 index 1e5065dab430..000000000000 --- a/include/linux/platform_data/media/soc_camera_platform.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Generic Platform Camera Driver Header - * - * Copyright (C) 2008 Magnus Damm - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __SOC_CAMERA_H__ -#define __SOC_CAMERA_H__ - -#include -#include -#include - -struct device; - -struct soc_camera_platform_info { - const char *format_name; - unsigned long format_depth; - struct v4l2_mbus_framefmt format; - unsigned long mbus_param; - enum v4l2_mbus_type mbus_type; - struct soc_camera_device *icd; - int (*set_capture)(struct soc_camera_platform_info *info, int enable); -}; - -static inline void soc_camera_platform_release(struct platform_device **pdev) -{ - *pdev = NULL; -} - -static inline int soc_camera_platform_add(struct soc_camera_device *icd, - struct platform_device **pdev, - struct soc_camera_link *plink, - void (*release)(struct device *dev), - int id) -{ - struct soc_camera_subdev_desc *ssdd = - (struct soc_camera_subdev_desc *)plink; - struct soc_camera_platform_info *info = ssdd->drv_priv; - int ret; - - if (&icd->sdesc->subdev_desc != ssdd) - return -ENODEV; - - if (*pdev) - return -EBUSY; - - *pdev = platform_device_alloc("soc_camera_platform", id); - if (!*pdev) - return -ENOMEM; - - info->icd = icd; - - (*pdev)->dev.platform_data = info; - (*pdev)->dev.release = release; - - ret = platform_device_add(*pdev); - if (ret < 0) { - platform_device_put(*pdev); - *pdev = NULL; - info->icd = NULL; - } - - return ret; -} - -static inline void soc_camera_platform_del(const struct soc_camera_device *icd, - struct platform_device *pdev, - const struct soc_camera_link *plink) -{ - const struct soc_camera_subdev_desc *ssdd = - (const struct soc_camera_subdev_desc *)plink; - if (&icd->sdesc->subdev_desc != ssdd || !pdev) - return; - - platform_device_unregister(pdev); -} - -#endif /* __SOC_CAMERA_H__ */ -- cgit From cf5c6c211b7e9eb4f4219f83671432c9ef257187 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 17 Jan 2019 15:25:04 +0800 Subject: perf: Remove duplicated workqueue.h include from perf_event.h It is already included a little bit higher up in that file. Signed-off-by: YueHaibing Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190117072504.14428-1-yuehaibing@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cec02dc63b51..f8ec36197718 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -53,7 +53,6 @@ struct perf_guest_info_callbacks { #include #include #include -#include #include #include -- cgit From 5620196951192f7cd2da0a04e7c0113f40bfc14e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Jan 2019 13:20:20 -0300 Subject: perf: Make perf_event_output() propagate the output() return For the original mode of operation it isn't needed, since we report back errors via PERF_RECORD_LOST records in the ring buffer, but for use in bpf_perf_event_output() it is convenient to return the errors, basically -ENOSPC. Currently bpf_perf_event_output() returns an error indication, the last thing it does, which is to push it to the ring buffer is that can fail and if so, this failure won't be reported back to its users, fix it. Reported-by: Jamal Hadi Salim Tested-by: Jamal Hadi Salim Acked-by: Peter Zijlstra (Intel) Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/r/20190118150938.GN5823@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 6 +++--- kernel/events/core.c | 11 +++++++---- kernel/trace/bpf_trace.c | 3 +-- tools/perf/examples/bpf/augmented_raw_syscalls.c | 4 ++-- tools/perf/examples/bpf/augmented_syscalls.c | 14 +++++++------- tools/perf/examples/bpf/etcsnoop.c | 10 +++++----- 6 files changed, 25 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f8ec36197718..4eb88065a9b5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,9 +978,9 @@ extern void perf_event_output_forward(struct perf_event *event, extern void perf_event_output_backward(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); -extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); +extern int perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) diff --git a/kernel/events/core.c b/kernel/events/core.c index fbe59b793b36..bc525cd1615c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6489,7 +6489,7 @@ void perf_prepare_sample(struct perf_event_header *header, data->phys_addr = perf_virt_to_phys(data->addr); } -static __always_inline void +static __always_inline int __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, @@ -6499,13 +6499,15 @@ __perf_event_output(struct perf_event *event, { struct perf_output_handle handle; struct perf_event_header header; + int err; /* protect the callchain buffers */ rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (output_begin(&handle, event, header.size)) + err = output_begin(&handle, event, header.size); + if (err) goto exit; perf_output_sample(&handle, &header, data, event); @@ -6514,6 +6516,7 @@ __perf_event_output(struct perf_event *event, exit: rcu_read_unlock(); + return err; } void @@ -6532,12 +6535,12 @@ perf_event_output_backward(struct perf_event *event, __perf_event_output(event, data, regs, perf_output_begin_backward); } -void +int perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - __perf_event_output(event, data, regs, perf_output_begin); + return __perf_event_output(event, data, regs, perf_output_begin); } /* diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..088c2032ceaf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_event_output(event, sd, regs); - return 0; + return perf_event_output(event, sd, regs); } BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..9e9d4c66e53c 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -141,8 +141,8 @@ int sys_enter(struct syscall_enter_args *args) len = sizeof(augmented_args.args); } - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); - return 0; + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); } SEC("raw_syscalls:sys_exit") diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 2ae44813ef2d..b7dba114e36c 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -55,9 +55,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ len &= sizeof(augmented_args.filename.value) - 1; \ } \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ @@ -125,10 +125,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ /* addrlen = augmented_args.args.addrlen; */ \ /* */ \ probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c index b59e8812ee8c..550e69c2e8d1 100644 --- a/tools/perf/examples/bpf/etcsnoop.c +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -49,11 +49,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ args->filename_ptr); \ if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ return 0; \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ } struct syscall_enter_openat_args { -- cgit From 76193a94522f1d4edf2447a536f3f796ce56343b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:13 -0800 Subject: perf, bpf: Introduce PERF_RECORD_KSYMBOL For better performance analysis of dynamically JITed and loaded kernel functions, such as BPF programs, this patch introduces PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol register/unregister information to user space. The following data structure is used for PERF_RECORD_KSYMBOL. /* * struct { * struct perf_event_header header; * u64 addr; * u32 len; * u16 ksym_type; * u16 flags; * char name[]; * struct sample_id sample_id; * }; */ Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 8 ++++ include/uapi/linux/perf_event.h | 26 ++++++++++- kernel/events/core.c | 98 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 130 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4eb88065a9b5..136fe0495374 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1122,6 +1122,10 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, } extern void perf_event_mmap(struct vm_area_struct *vma); + +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, const char *sym); + extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); @@ -1342,6 +1346,10 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } + +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, const char *sym) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_namespaces(struct task_struct *tsk) { } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index ea19b5d491bf..1dee5c8f166b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -372,7 +372,8 @@ struct perf_event_attr { context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ - __reserved_1 : 35; + ksymbol : 1, /* include ksymbol events */ + __reserved_1 : 34; union { __u32 wakeup_events; /* wakeup every n events */ @@ -963,9 +964,32 @@ enum perf_event_type { */ PERF_RECORD_NAMESPACES = 16, + /* + * Record ksymbol register/unregister events: + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u32 len; + * u16 ksym_type; + * u16 flags; + * char name[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_KSYMBOL = 17, + PERF_RECORD_MAX, /* non-ABI */ }; +enum perf_record_ksymbol_type { + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ +}; + +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 diff --git a/kernel/events/core.c b/kernel/events/core.c index bc525cd1615c..e04ab5f325cf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; +static atomic_t nr_ksymbol_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event) if (attr->mmap || attr->mmap_data || attr->mmap2 || attr->comm || attr->comm_exec || - attr->task || + attr->task || attr->ksymbol || attr->context_switch) return true; return false; @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event) dec = true; if (has_branch_stack(event)) dec = true; + if (event->attr.ksymbol) + atomic_dec(&nr_ksymbol_events); if (dec) { if (!atomic_add_unless(&perf_sched_count, -1, 1)) @@ -7653,6 +7656,97 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_output_end(&handle); } +/* + * ksymbol register/unregister tracking + */ + +struct perf_ksymbol_event { + const char *name; + int name_len; + struct { + struct perf_event_header header; + u64 addr; + u32 len; + u16 ksym_type; + u16 flags; + } event_id; +}; + +static int perf_event_ksymbol_match(struct perf_event *event) +{ + return event->attr.ksymbol; +} + +static void perf_event_ksymbol_output(struct perf_event *event, void *data) +{ + struct perf_ksymbol_event *ksymbol_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_ksymbol_match(event)) + return; + + perf_event_header__init_id(&ksymbol_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + ksymbol_event->event_id.header.size); + if (ret) + return; + + perf_output_put(&handle, ksymbol_event->event_id); + __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, + const char *sym) +{ + struct perf_ksymbol_event ksymbol_event; + char name[KSYM_NAME_LEN]; + u16 flags = 0; + int name_len; + + if (!atomic_read(&nr_ksymbol_events)) + return; + + if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX || + ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) + goto err; + + strlcpy(name, sym, KSYM_NAME_LEN); + name_len = strlen(name) + 1; + while (!IS_ALIGNED(name_len, sizeof(u64))) + name[name_len++] = '\0'; + BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64)); + + if (unregister) + flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER; + + ksymbol_event = (struct perf_ksymbol_event){ + .name = name, + .name_len = name_len, + .event_id = { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = sizeof(ksymbol_event.event_id) + + name_len, + }, + .addr = addr, + .len = len, + .ksym_type = ksym_type, + .flags = flags, + }, + }; + + perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL); + return; +err: + WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); +} + void perf_event_itrace_started(struct perf_event *event) { event->attach_state |= PERF_ATTACH_ITRACE; @@ -9912,6 +10006,8 @@ static void account_event(struct perf_event *event) inc = true; if (is_cgroup_event(event)) inc = true; + if (event->attr.ksymbol) + atomic_inc(&nr_ksymbol_events); if (inc) { /* -- cgit From 6ee52e2a3fe4ea35520720736e6791df1fb67106 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:15 -0800 Subject: perf, bpf: Introduce PERF_RECORD_BPF_EVENT For better performance analysis of BPF programs, this patch introduces PERF_RECORD_BPF_EVENT, a new perf_event_type that exposes BPF program load/unload information to user space. Each BPF program may contain up to BPF_MAX_SUBPROGS (256) sub programs. The following example shows kernel symbols for a BPF program with 7 sub programs: ffffffffa0257cf9 t bpf_prog_b07ccb89267cf242_F ffffffffa02592e1 t bpf_prog_2dcecc18072623fc_F ffffffffa025b0e9 t bpf_prog_bb7a405ebaec5d5c_F ffffffffa025dd2c t bpf_prog_a7540d4a39ec1fc7_F ffffffffa025fcca t bpf_prog_05762d4ade0e3737_F ffffffffa026108f t bpf_prog_db4bd11e35df90d4_F ffffffffa0263f00 t bpf_prog_89d64e4abf0f0126_F ffffffffa0257cf9 t bpf_prog_ae31629322c4b018__dummy_tracepoi When a bpf program is loaded, PERF_RECORD_KSYMBOL is generated for each of these sub programs. Therefore, PERF_RECORD_BPF_EVENT is not needed for simple profiling. For annotation, user space need to listen to PERF_RECORD_BPF_EVENT and gather more information about these (sub) programs via sys_bpf. Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Acked-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Tested-by: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-4-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/filter.h | 7 +++ include/linux/perf_event.h | 6 +++ include/uapi/linux/perf_event.h | 29 +++++++++- kernel/bpf/core.c | 2 +- kernel/bpf/syscall.c | 2 + kernel/events/core.c | 115 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 159 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..d531d4250bff 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -951,6 +951,7 @@ bpf_address_lookup(unsigned long addr, unsigned long *size, void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); +void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); #else /* CONFIG_BPF_JIT */ @@ -1006,6 +1007,12 @@ static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } + +static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +{ + sym[0] = '\0'; +} + #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 136fe0495374..a79e59fc3b7d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1125,6 +1125,9 @@ extern void perf_event_mmap(struct vm_area_struct *vma); extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, const char *sym); +extern void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); @@ -1350,6 +1353,9 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, const char *sym) { } +static inline void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_namespaces(struct task_struct *tsk) { } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1dee5c8f166b..7198ddd0c6b1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -373,7 +373,8 @@ struct perf_event_attr { write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - __reserved_1 : 34; + bpf_event : 1, /* include bpf events */ + __reserved_1 : 33; union { __u32 wakeup_events; /* wakeup every n events */ @@ -979,6 +980,25 @@ enum perf_event_type { */ PERF_RECORD_KSYMBOL = 17, + /* + * Record bpf events: + * enum perf_bpf_event_type { + * PERF_BPF_EVENT_UNKNOWN = 0, + * PERF_BPF_EVENT_PROG_LOAD = 1, + * PERF_BPF_EVENT_PROG_UNLOAD = 2, + * }; + * + * struct { + * struct perf_event_header header; + * u16 type; + * u16 flags; + * u32 id; + * u8 tag[BPF_TAG_SIZE]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_BPF_EVENT = 18, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -990,6 +1010,13 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) +enum perf_bpf_event_type { + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ +}; + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f908b9356025..19c49313c709 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -495,7 +495,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog, *symbol_end = addr + hdr->pages * PAGE_SIZE; } -static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { const char *end = sym + KSYM_NAME_LEN; const struct btf_type *type; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..30ebd085790b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1211,6 +1211,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del_all(prog); @@ -1554,6 +1555,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) } bpf_prog_kallsyms_add(prog); + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); return err; free_used_maps: diff --git a/kernel/events/core.c b/kernel/events/core.c index e04ab5f325cf..236bb8ddb7bc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -386,6 +386,7 @@ static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; static atomic_t nr_ksymbol_events __read_mostly; +static atomic_t nr_bpf_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4308,6 +4309,8 @@ static void unaccount_event(struct perf_event *event) dec = true; if (event->attr.ksymbol) atomic_dec(&nr_ksymbol_events); + if (event->attr.bpf_event) + atomic_dec(&nr_bpf_events); if (dec) { if (!atomic_add_unless(&perf_sched_count, -1, 1)) @@ -7747,6 +7750,116 @@ err: WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); } +/* + * bpf program load/unload tracking + */ + +struct perf_bpf_event { + struct bpf_prog *prog; + struct { + struct perf_event_header header; + u16 type; + u16 flags; + u32 id; + u8 tag[BPF_TAG_SIZE]; + } event_id; +}; + +static int perf_event_bpf_match(struct perf_event *event) +{ + return event->attr.bpf_event; +} + +static void perf_event_bpf_output(struct perf_event *event, void *data) +{ + struct perf_bpf_event *bpf_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_bpf_match(event)) + return; + + perf_event_header__init_id(&bpf_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + bpf_event->event_id.header.size); + if (ret) + return; + + perf_output_put(&handle, bpf_event->event_id); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, + enum perf_bpf_event_type type) +{ + bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD; + char sym[KSYM_NAME_LEN]; + int i; + + if (prog->aux->func_cnt == 0) { + bpf_get_prog_name(prog, sym); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)prog->bpf_func, + prog->jited_len, unregister, sym); + } else { + for (i = 0; i < prog->aux->func_cnt; i++) { + struct bpf_prog *subprog = prog->aux->func[i]; + + bpf_get_prog_name(subprog, sym); + perf_event_ksymbol( + PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)subprog->bpf_func, + subprog->jited_len, unregister, sym); + } + } +} + +void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags) +{ + struct perf_bpf_event bpf_event; + + if (type <= PERF_BPF_EVENT_UNKNOWN || + type >= PERF_BPF_EVENT_MAX) + return; + + switch (type) { + case PERF_BPF_EVENT_PROG_LOAD: + case PERF_BPF_EVENT_PROG_UNLOAD: + if (atomic_read(&nr_ksymbol_events)) + perf_event_bpf_emit_ksymbols(prog, type); + break; + default: + break; + } + + if (!atomic_read(&nr_bpf_events)) + return; + + bpf_event = (struct perf_bpf_event){ + .prog = prog, + .event_id = { + .header = { + .type = PERF_RECORD_BPF_EVENT, + .size = sizeof(bpf_event.event_id), + }, + .type = type, + .flags = flags, + .id = prog->aux->id, + }, + }; + + BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64)); + + memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE); + perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL); +} + void perf_event_itrace_started(struct perf_event *event) { event->attach_state |= PERF_ATTACH_ITRACE; @@ -10008,6 +10121,8 @@ static void account_event(struct perf_event *event) inc = true; if (event->attr.ksymbol) atomic_inc(&nr_ksymbol_events); + if (event->attr.bpf_event) + atomic_inc(&nr_bpf_events); if (inc) { /* -- cgit From 534fd7aac56a7994d16032f32123def9923e339f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 13 Jan 2019 16:01:17 +0200 Subject: IB/mlx5: Manage indirection mkey upon DEVX flow for ODP Manage indirection mkey upon DEVX flow to support ODP. To support a page fault event on the indirection mkey it needs to be part of the device mkey radix tree. Both the creation and the deletion flows for a DEVX object which is indirection mkey were adapted to handle that. Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 89 +++++++++++++++++++++++++++++++++++- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +++ include/linux/mlx5/driver.h | 1 + 4 files changed, 96 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index b7ff2138ac2a..bbf9a26d8fa6 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -17,12 +17,18 @@ #define UVERBS_MODULE_NAME mlx5_ib #include +enum devx_obj_flags { + DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, +}; + #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) struct devx_obj { struct mlx5_core_dev *mdev; u64 obj_id; u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + struct mlx5_ib_devx_mr devx_mr; }; struct devx_umem { @@ -1011,6 +1017,36 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, } } +static int devx_handle_mkey_indirect(struct devx_obj *obj, + struct mlx5_ib_dev *dev, + void *in, void *out) +{ + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; + struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; + unsigned long flags; + struct mlx5_core_mkey *mkey; + void *mkc; + u8 key; + int err; + + mkey = &devx_mr->mmkey; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + key = MLX5_GET(mkc, mkc, mkey_7_0); + mkey->key = mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, out, mkey_index)) | key; + mkey->type = MLX5_MKEY_INDIRECT_DEVX; + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->pd = MLX5_GET(mkc, mkc, pd); + devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), + mkey); + write_unlock_irqrestore(&table->lock, flags); + return err; +} + static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, struct devx_obj *obj, void *in, int in_len) @@ -1030,13 +1066,45 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2; if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS || - access_mode == MLX5_MKC_ACCESS_MODE_KSM) + access_mode == MLX5_MKC_ACCESS_MODE_KSM) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY; return 0; + } MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); return 0; } +static void devx_free_indirect_mkey(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); +} + +/* This function to delete from the radix tree needs to be called before + * destroying the underlying mkey. Otherwise a race might occur in case that + * other thread will get the same mkey before this one will be deleted, + * in that case it will fail via inserting to the tree its own data. + * + * Note: + * An error in the destroy is not expected unless there is some other indirect + * mkey which points to this one. In a kernel cleanup flow it will be just + * destroyed in the iterative destruction call. In a user flow, in case + * the application didn't close in the expected order it's its own problem, + * the mkey won't be part of the tree, in both cases the kernel is safe. + */ +static void devx_cleanup_mkey(struct devx_obj *obj) +{ + struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; + struct mlx5_core_mkey *del_mkey; + unsigned long flags; + + write_lock_irqsave(&table->lock, flags); + del_mkey = radix_tree_delete(&table->tree, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + write_unlock_irqrestore(&table->lock, flags); +} + static int devx_obj_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -1044,10 +1112,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, struct devx_obj *obj = uobject->object; int ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + + call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, + devx_free_indirect_mkey); + return ret; + } + kfree(obj); return ret; } @@ -1108,6 +1187,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) goto obj_destroy; @@ -1116,6 +1201,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( return 0; obj_destroy: + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 61064b7171fc..ae00f994673b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5724,6 +5724,7 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + srcu_barrier(&dev->mr_srcu); cleanup_srcu_struct(&dev->mr_srcu); drain_workqueue(dev->advise_mr_wq); destroy_workqueue(dev->advise_mr_wq); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0a37ca2a714..819207190343 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -602,6 +602,12 @@ struct mlx5_ib_mw { int ndescs; }; +struct mlx5_ib_devx_mr { + struct mlx5_core_mkey mmkey; + int ndescs; + struct rcu_head rcu; +}; + struct mlx5_ib_umr_context { struct ib_cqe cqe; enum ib_wc_status status; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b6f5839f129a..619d6fee96a1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -364,6 +364,7 @@ struct mlx5_core_sig_ctx { enum { MLX5_MKEY_MR = 1, MLX5_MKEY_MW, + MLX5_MKEY_INDIRECT_DEVX, }; struct mlx5_core_mkey { -- cgit From 1278cf66cf4b1c3d30e311200b50c45457c92baa Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: nvram: Replace nvram_* function exports with static functions Replace nvram_* functions with static functions in nvram.h. These will become wrappers for struct nvram_ops method calls. This patch effectively disables existing NVRAM functionality so as to allow the rest of the series to be bisected without build failures. That functionality is gradually re-implemented in subsequent patches. Replace the sole validate-checksum-and-read-byte sequence with a call to nvram_read() which will gain the same semantics in subsequent patches. Remove unused exports. Acked-by: Geert Uytterhoeven Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/m68k/atari/nvram.c | 39 +++------------------------------------ drivers/char/nvram.c | 27 +++++---------------------- drivers/scsi/atari_scsi.c | 8 +++++--- include/linux/nvram.h | 32 +++++++++++++++++++++++++------- 4 files changed, 38 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c index a8c457e40b0b..1d767847ffa6 100644 --- a/arch/m68k/atari/nvram.c +++ b/arch/m68k/atari/nvram.c @@ -34,38 +34,17 @@ * periodic 11 min sync from kernel/time/ntp.c vs. this driver.) */ -unsigned char __nvram_read_byte(int i) +static unsigned char __nvram_read_byte(int i) { return CMOS_READ(NVRAM_FIRST_BYTE + i); } -unsigned char nvram_read_byte(int i) -{ - unsigned long flags; - unsigned char c; - - spin_lock_irqsave(&rtc_lock, flags); - c = __nvram_read_byte(i); - spin_unlock_irqrestore(&rtc_lock, flags); - return c; -} -EXPORT_SYMBOL(nvram_read_byte); - /* This races nicely with trying to read with checksum checking */ -void __nvram_write_byte(unsigned char c, int i) +static void __nvram_write_byte(unsigned char c, int i) { CMOS_WRITE(c, NVRAM_FIRST_BYTE + i); } -void nvram_write_byte(unsigned char c, int i) -{ - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - __nvram_write_byte(c, i); - spin_unlock_irqrestore(&rtc_lock, flags); -} - /* On Ataris, the checksum is over all bytes except the checksum bytes * themselves; these are at the very end. */ @@ -73,7 +52,7 @@ void nvram_write_byte(unsigned char c, int i) #define ATARI_CKS_RANGE_END 47 #define ATARI_CKS_LOC 48 -int __nvram_check_checksum(void) +static int __nvram_check_checksum(void) { int i; unsigned char sum = 0; @@ -84,18 +63,6 @@ int __nvram_check_checksum(void) (__nvram_read_byte(ATARI_CKS_LOC + 1) == (sum & 0xff)); } -int nvram_check_checksum(void) -{ - unsigned long flags; - int rv; - - spin_lock_irqsave(&rtc_lock, flags); - rv = __nvram_check_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); - return rv; -} -EXPORT_SYMBOL(nvram_check_checksum); - static void __nvram_set_checksum(void) { int i; diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c660cff9faf4..c98775bfd896 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -74,13 +74,12 @@ static int nvram_open_mode; /* special open modes */ * periodic 11 min sync from kernel/time/ntp.c vs. this driver.) */ -unsigned char __nvram_read_byte(int i) +static unsigned char __nvram_read_byte(int i) { return CMOS_READ(NVRAM_FIRST_BYTE + i); } -EXPORT_SYMBOL(__nvram_read_byte); -unsigned char nvram_read_byte(int i) +static unsigned char pc_nvram_read_byte(int i) { unsigned long flags; unsigned char c; @@ -90,16 +89,14 @@ unsigned char nvram_read_byte(int i) spin_unlock_irqrestore(&rtc_lock, flags); return c; } -EXPORT_SYMBOL(nvram_read_byte); /* This races nicely with trying to read with checksum checking (nvram_read) */ -void __nvram_write_byte(unsigned char c, int i) +static void __nvram_write_byte(unsigned char c, int i) { CMOS_WRITE(c, NVRAM_FIRST_BYTE + i); } -EXPORT_SYMBOL(__nvram_write_byte); -void nvram_write_byte(unsigned char c, int i) +static void pc_nvram_write_byte(unsigned char c, int i) { unsigned long flags; @@ -107,14 +104,13 @@ void nvram_write_byte(unsigned char c, int i) __nvram_write_byte(c, i); spin_unlock_irqrestore(&rtc_lock, flags); } -EXPORT_SYMBOL(nvram_write_byte); /* On PCs, the checksum is built only over bytes 2..31 */ #define PC_CKS_RANGE_START 2 #define PC_CKS_RANGE_END 31 #define PC_CKS_LOC 32 -int __nvram_check_checksum(void) +static int __nvram_check_checksum(void) { int i; unsigned short sum = 0; @@ -126,19 +122,6 @@ int __nvram_check_checksum(void) __nvram_read_byte(PC_CKS_LOC+1); return (sum & 0xffff) == expect; } -EXPORT_SYMBOL(__nvram_check_checksum); - -int nvram_check_checksum(void) -{ - unsigned long flags; - int rv; - - spin_lock_irqsave(&rtc_lock, flags); - rv = __nvram_check_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); - return rv; -} -EXPORT_SYMBOL(nvram_check_checksum); static void __nvram_set_checksum(void) { diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c index 78b43200c99e..e809493d0d06 100644 --- a/drivers/scsi/atari_scsi.c +++ b/drivers/scsi/atari_scsi.c @@ -759,13 +759,15 @@ static int __init atari_scsi_probe(struct platform_device *pdev) atari_scsi_template.this_id = setup_hostid & 7; } else if (IS_REACHABLE(CONFIG_NVRAM)) { /* Test if a host id is set in the NVRam */ - if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) { - unsigned char b = nvram_read_byte(16); + if (ATARIHW_PRESENT(TT_CLK)) { + unsigned char b; + loff_t offset = 16; + ssize_t count = nvram_read(&b, 1, &offset); /* Arbitration enabled? (for TOS) * If yes, use configured host ID */ - if (b & 0x80) + if ((count == 1) && (b & 0x80)) atari_scsi_template.this_id = b & 7; } } diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 28bfb9ab94ca..eb5b52a9a747 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -2,13 +2,31 @@ #ifndef _LINUX_NVRAM_H #define _LINUX_NVRAM_H +#include #include -/* __foo is foo without grabbing the rtc_lock - get it yourself */ -extern unsigned char __nvram_read_byte(int i); -extern unsigned char nvram_read_byte(int i); -extern void __nvram_write_byte(unsigned char c, int i); -extern void nvram_write_byte(unsigned char c, int i); -extern int __nvram_check_checksum(void); -extern int nvram_check_checksum(void); +static inline ssize_t nvram_get_size(void) +{ + return -ENODEV; +} + +static inline unsigned char nvram_read_byte(int addr) +{ + return 0xFF; +} + +static inline void nvram_write_byte(unsigned char val, int addr) +{ +} + +static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif /* _LINUX_NVRAM_H */ -- cgit From a084dbf6592c22468eb946014b2e731fb42da7a9 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: m68k/atari: Implement arch_nvram_ops struct By implementing an arch_nvram_ops struct, a platform can re-use the drivers/char/nvram.c module without needing any arch-specific code in that module. Atari does so here. Acked-by: Geert Uytterhoeven Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/m68k/atari/nvram.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nvram.h | 14 ++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c index 1d767847ffa6..e75adebe6e7d 100644 --- a/arch/m68k/atari/nvram.c +++ b/arch/m68k/atari/nvram.c @@ -74,6 +74,55 @@ static void __nvram_set_checksum(void) __nvram_write_byte(sum, ATARI_CKS_LOC + 1); } +static ssize_t atari_nvram_read(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + *p = __nvram_read_byte(i); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t atari_nvram_write(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + __nvram_write_byte(*p, i); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t atari_nvram_get_size(void) +{ + return NVRAM_BYTES; +} + +const struct nvram_ops arch_nvram_ops = { + .read = atari_nvram_read, + .write = atari_nvram_write, + .get_size = atari_nvram_get_size, +}; +EXPORT_SYMBOL(arch_nvram_ops); + #ifdef CONFIG_PROC_FS static struct { unsigned char val; diff --git a/include/linux/nvram.h b/include/linux/nvram.h index eb5b52a9a747..a1e01dc89759 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,8 +5,18 @@ #include #include +struct nvram_ops { + ssize_t (*get_size)(void); + ssize_t (*read)(char *, size_t, loff_t *); + ssize_t (*write)(char *, size_t, loff_t *); +}; + +extern const struct nvram_ops arch_nvram_ops; + static inline ssize_t nvram_get_size(void) { + if (arch_nvram_ops.get_size) + return arch_nvram_ops.get_size(); return -ENODEV; } @@ -21,11 +31,15 @@ static inline void nvram_write_byte(unsigned char val, int addr) static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { + if (arch_nvram_ops.read) + return arch_nvram_ops.read(buf, count, ppos); return -ENODEV; } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { + if (arch_nvram_ops.write) + return arch_nvram_ops.write(buf, count, ppos); return -ENODEV; } -- cgit From a156c7ba669c65b55c7afcc3994e1199cc0cad47 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Replace nvram_* extern declarations with standard header Remove the nvram_read_byte() and nvram_write_byte() declarations in powerpc/include/asm/nvram.h and use the cross-platform static functions in linux/nvram.h instead. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/nvram.h | 6 ------ arch/powerpc/kernel/setup_32.c | 25 +------------------------ drivers/char/generic_nvram.c | 1 + drivers/video/fbdev/matrox/matroxfb_base.c | 2 +- include/linux/nvram.h | 3 +++ 5 files changed, 6 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h index 09a518bb7c03..56a388da9c4f 100644 --- a/arch/powerpc/include/asm/nvram.h +++ b/arch/powerpc/include/asm/nvram.h @@ -98,10 +98,4 @@ extern int nvram_write_os_partition(struct nvram_os_partition *part, unsigned int err_type, unsigned int error_log_cnt); -/* Determine NVRAM size */ -extern ssize_t nvram_get_size(void); - -/* Normal access to NVRAM */ -extern unsigned char nvram_read_byte(int i); -extern void nvram_write_byte(unsigned char c, int i); #endif /* _ASM_POWERPC_NVRAM_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 947f904688b0..f5107796e2d7 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -149,30 +150,6 @@ __setup("l3cr=", ppc_setup_l3cr); #ifdef CONFIG_GENERIC_NVRAM -/* Generic nvram hooks used by drivers/char/gen_nvram.c */ -unsigned char nvram_read_byte(int addr) -{ - if (ppc_md.nvram_read_val) - return ppc_md.nvram_read_val(addr); - return 0xff; -} -EXPORT_SYMBOL(nvram_read_byte); - -void nvram_write_byte(unsigned char val, int addr) -{ - if (ppc_md.nvram_write_val) - ppc_md.nvram_write_val(addr, val); -} -EXPORT_SYMBOL(nvram_write_byte); - -ssize_t nvram_get_size(void) -{ - if (ppc_md.nvram_size) - return ppc_md.nvram_size(); - return -1; -} -EXPORT_SYMBOL(nvram_get_size); - void nvram_sync(void) { if (ppc_md.nvram_sync) diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c index ff5394f47587..0c22b9503e84 100644 --- a/drivers/char/generic_nvram.c +++ b/drivers/char/generic_nvram.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 838869c6490c..0a4e5bad33f4 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -111,12 +111,12 @@ #include "matroxfb_g450.h" #include #include +#include #include #include #ifdef CONFIG_PPC_PMAC #include -unsigned char nvram_read_byte(int); static int default_vmode = VMODE_NVRAM; static int default_cmode = CMODE_NVRAM; #endif diff --git a/include/linux/nvram.h b/include/linux/nvram.h index a1e01dc89759..79431dab87a1 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -15,8 +15,11 @@ extern const struct nvram_ops arch_nvram_ops; static inline ssize_t nvram_get_size(void) { +#ifdef CONFIG_PPC +#else if (arch_nvram_ops.get_size) return arch_nvram_ops.get_size(); +#endif return -ENODEV; } -- cgit From d5bbb5021ce8d9ff561c7469f5b4589ccb3bc4a6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Adopt arch_nvram_ops NVRAMs on different platforms and architectures have different attributes and access methods. E.g. some platforms have byte-at-a-time accessor functions while others have byte-range accessor functions. Some have checksum functionality while others do not. By calling ops struct methods via the common wrapper functions, the nvram module and other drivers can make use of the available NVRAM functionality in a portable way. Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 30 ++++++++++++++++++++++++------ include/linux/nvram.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c98775bfd896..2df391f78986 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -52,9 +52,11 @@ static DEFINE_MUTEX(nvram_mutex); static DEFINE_SPINLOCK(nvram_state_lock); static int nvram_open_cnt; /* #times opened */ static int nvram_open_mode; /* special open modes */ +static ssize_t nvram_size; #define NVRAM_WRITE 1 /* opened for writing (exclusive) */ #define NVRAM_EXCL 2 /* opened with O_EXCL */ +#ifdef CONFIG_X86 /* * These functions are provided to be called internally or by other parts of * the kernel. It's up to the caller to ensure correct checksum before reading @@ -145,6 +147,19 @@ void nvram_set_checksum(void) } #endif /* 0 */ +static ssize_t pc_nvram_get_size(void) +{ + return NVRAM_BYTES; +} + +const struct nvram_ops arch_nvram_ops = { + .read_byte = pc_nvram_read_byte, + .write_byte = pc_nvram_write_byte, + .get_size = pc_nvram_get_size, +}; +EXPORT_SYMBOL(arch_nvram_ops); +#endif /* CONFIG_X86 */ + /* * The are the file operation function for user access to /dev/nvram */ @@ -152,7 +167,7 @@ void nvram_set_checksum(void) static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin) { return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE, - NVRAM_BYTES); + nvram_size); } static ssize_t nvram_misc_read(struct file *file, char __user *buf, @@ -303,8 +318,7 @@ static int nvram_misc_release(struct inode *inode, struct file *file) return 0; } -#ifdef CONFIG_PROC_FS - +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) static const char * const floppy_types[] = { "none", "5.25'' 360k", "5.25'' 1.2M", "3.5'' 720k", "3.5'' 1.44M", "3.5'' 2.88M", "3.5'' 2.88M" @@ -394,7 +408,7 @@ static int nvram_proc_read(struct seq_file *seq, void *offset) return 0; } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_X86 && CONFIG_PROC_FS */ static const struct file_operations nvram_misc_fops = { .owner = THIS_MODULE, @@ -416,13 +430,17 @@ static int __init nvram_module_init(void) { int ret; + nvram_size = nvram_get_size(); + if (nvram_size < 0) + return nvram_size; + ret = misc_register(&nvram_misc); if (ret) { pr_err("nvram: can't misc_register on minor=%d\n", NVRAM_MINOR); return ret; } -#ifdef CONFIG_PROC_FS +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) if (!proc_create_single("driver/nvram", 0, NULL, nvram_proc_read)) { pr_err("nvram: can't create /proc/driver/nvram\n"); misc_deregister(&nvram_misc); @@ -436,7 +454,7 @@ static int __init nvram_module_init(void) static void __exit nvram_module_exit(void) { -#ifdef CONFIG_PROC_FS +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) remove_proc_entry("driver/nvram", NULL); #endif misc_deregister(&nvram_misc); diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 79431dab87a1..bb4ea8cc6ea6 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,8 +5,30 @@ #include #include +/** + * struct nvram_ops - NVRAM functionality made available to drivers + * @read: validate checksum (if any) then load a range of bytes from NVRAM + * @write: store a range of bytes to NVRAM then update checksum (if any) + * @read_byte: load a single byte from NVRAM + * @write_byte: store a single byte to NVRAM + * @get_size: return the fixed number of bytes in the NVRAM + * + * Architectures which provide an nvram ops struct need not implement all + * of these methods. If the NVRAM hardware can be accessed only one byte + * at a time then it may be sufficient to provide .read_byte and .write_byte. + * If the NVRAM has a checksum (and it is to be checked) the .read and + * .write methods can be used to implement that efficiently. + * + * Portable drivers may use the wrapper functions defined here. + * The nvram_read() and nvram_write() functions call the .read and .write + * methods when available and fall back on the .read_byte and .write_byte + * methods otherwise. + */ + struct nvram_ops { ssize_t (*get_size)(void); + unsigned char (*read_byte)(int); + void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); }; @@ -25,11 +47,21 @@ static inline ssize_t nvram_get_size(void) static inline unsigned char nvram_read_byte(int addr) { +#ifdef CONFIG_PPC +#else + if (arch_nvram_ops.read_byte) + return arch_nvram_ops.read_byte(addr); +#endif return 0xFF; } static inline void nvram_write_byte(unsigned char val, int addr) { +#ifdef CONFIG_PPC +#else + if (arch_nvram_ops.write_byte) + arch_nvram_ops.write_byte(val, addr); +#endif } static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) -- cgit From 2d58636e0af724f38acad25246c1625efec36122 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Allow the set_checksum and initialize ioctls to be omitted The drivers/char/nvram.c module has previously supported only RTC "CMOS" NVRAM, for which it provides appropriate checksum ioctls. Make these ioctls optional so the module can be re-used with other kinds of NVRAM. The ops struct methods that implement the ioctls now return error codes so that a multi-platform kernel binary can do the right thing when running on hardware without a suitable NVRAM. Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 70 +++++++++++++++++++++++++++++---------------------- include/linux/nvram.h | 2 ++ 2 files changed, 42 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 2df391f78986..f88ef41d0598 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -136,16 +136,25 @@ static void __nvram_set_checksum(void) __nvram_write_byte(sum & 0xff, PC_CKS_LOC + 1); } -#if 0 -void nvram_set_checksum(void) +static long pc_nvram_set_checksum(void) { - unsigned long flags; + spin_lock_irq(&rtc_lock); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + return 0; +} - spin_lock_irqsave(&rtc_lock, flags); +static long pc_nvram_initialize(void) +{ + ssize_t i; + + spin_lock_irq(&rtc_lock); + for (i = 0; i < NVRAM_BYTES; ++i) + __nvram_write_byte(0, i); __nvram_set_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock_irq(&rtc_lock); + return 0; } -#endif /* 0 */ static ssize_t pc_nvram_get_size(void) { @@ -156,6 +165,8 @@ const struct nvram_ops arch_nvram_ops = { .read_byte = pc_nvram_read_byte, .write_byte = pc_nvram_write_byte, .get_size = pc_nvram_get_size, + .set_checksum = pc_nvram_set_checksum, + .initialize = pc_nvram_initialize, }; EXPORT_SYMBOL(arch_nvram_ops); #endif /* CONFIG_X86 */ @@ -241,51 +252,50 @@ checksum_err: static long nvram_misc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int i; + long ret = -ENOTTY; switch (cmd) { - case NVRAM_INIT: /* initialize NVRAM contents and checksum */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - mutex_lock(&nvram_mutex); - spin_lock_irq(&rtc_lock); - - for (i = 0; i < NVRAM_BYTES; ++i) - __nvram_write_byte(0, i); - __nvram_set_checksum(); - - spin_unlock_irq(&rtc_lock); - mutex_unlock(&nvram_mutex); - return 0; - + if (arch_nvram_ops.initialize != NULL) { + mutex_lock(&nvram_mutex); + ret = arch_nvram_ops.initialize(); + mutex_unlock(&nvram_mutex); + } + break; case NVRAM_SETCKS: /* just set checksum, contents unchanged (maybe useful after * checksum garbaged somehow...) */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - mutex_lock(&nvram_mutex); - spin_lock_irq(&rtc_lock); - __nvram_set_checksum(); - spin_unlock_irq(&rtc_lock); - mutex_unlock(&nvram_mutex); - return 0; - - default: - return -ENOTTY; + if (arch_nvram_ops.set_checksum != NULL) { + mutex_lock(&nvram_mutex); + ret = arch_nvram_ops.set_checksum(); + mutex_unlock(&nvram_mutex); + } + break; } + return ret; } static int nvram_misc_open(struct inode *inode, struct file *file) { spin_lock(&nvram_state_lock); + /* Prevent multiple readers/writers if desired. */ if ((nvram_open_cnt && (file->f_flags & O_EXCL)) || - (nvram_open_mode & NVRAM_EXCL) || - ((file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE))) { + (nvram_open_mode & NVRAM_EXCL)) { + spin_unlock(&nvram_state_lock); + return -EBUSY; + } + + /* Prevent multiple writers if the set_checksum ioctl is implemented. */ + if ((arch_nvram_ops.set_checksum != NULL) && + (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) { spin_unlock(&nvram_state_lock); return -EBUSY; } diff --git a/include/linux/nvram.h b/include/linux/nvram.h index bb4ea8cc6ea6..31c763087746 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -31,6 +31,8 @@ struct nvram_ops { void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); + long (*initialize)(void); + long (*set_checksum)(void); }; extern const struct nvram_ops arch_nvram_ops; -- cgit From 109b3a89a7c48405d61a05d7a1720581a4f1574c Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Implement NVRAM read/write methods Refactor the RTC "CMOS" NVRAM functions so that they can be used as arch_nvram_ops methods. Checksumming logic is moved from the misc device operations to the nvram read/write operations. This makes the misc device implementation more generic. This preserves the locking mechanism such that "read if checksum valid" and "write and update checksum" remain atomic operations. Some platforms implement byte-range read/write methods which are similar to file_operations struct methods. Other platforms provide only byte-at-a-time methods. The former are more efficient but may be unavailable so fall back on the latter methods when necessary. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 120 +++++++++++++++++++++++++++++++------------------- include/linux/nvram.h | 32 +++++++++++++- 2 files changed, 104 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index f88ef41d0598..adcc213c331e 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -161,7 +162,46 @@ static ssize_t pc_nvram_get_size(void) return NVRAM_BYTES; } +static ssize_t pc_nvram_read(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + *p = __nvram_read_byte(i); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t pc_nvram_write(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + __nvram_write_byte(*p, i); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + const struct nvram_ops arch_nvram_ops = { + .read = pc_nvram_read, + .write = pc_nvram_write, .read_byte = pc_nvram_read_byte, .write_byte = pc_nvram_write_byte, .get_size = pc_nvram_get_size, @@ -184,69 +224,57 @@ static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin) static ssize_t nvram_misc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - unsigned char contents[NVRAM_BYTES]; - unsigned i = *ppos; - unsigned char *tmp; - - spin_lock_irq(&rtc_lock); + char *tmp; + ssize_t ret; - if (!__nvram_check_checksum()) - goto checksum_err; - for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp) - *tmp = __nvram_read_byte(i); + if (!access_ok(buf, count)) + return -EFAULT; + if (*ppos >= nvram_size) + return 0; - spin_unlock_irq(&rtc_lock); + count = min_t(size_t, count, nvram_size - *ppos); + count = min_t(size_t, count, PAGE_SIZE); - if (copy_to_user(buf, contents, tmp - contents)) - return -EFAULT; + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + return -ENOMEM; - *ppos = i; + ret = nvram_read(tmp, count, ppos); + if (ret <= 0) + goto out; - return tmp - contents; + if (copy_to_user(buf, tmp, ret)) { + *ppos -= ret; + ret = -EFAULT; + } -checksum_err: - spin_unlock_irq(&rtc_lock); - return -EIO; +out: + kfree(tmp); + return ret; } static ssize_t nvram_misc_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned char contents[NVRAM_BYTES]; - unsigned i = *ppos; - unsigned char *tmp; - - if (i >= NVRAM_BYTES) - return 0; /* Past EOF */ - - if (count > NVRAM_BYTES - i) - count = NVRAM_BYTES - i; - if (count > NVRAM_BYTES) - return -EFAULT; /* Can't happen, but prove it to gcc */ + char *tmp; + ssize_t ret; - if (copy_from_user(contents, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; + if (*ppos >= nvram_size) + return 0; - spin_lock_irq(&rtc_lock); - - if (!__nvram_check_checksum()) - goto checksum_err; - - for (tmp = contents; count--; ++i, ++tmp) - __nvram_write_byte(*tmp, i); + count = min_t(size_t, count, nvram_size - *ppos); + count = min_t(size_t, count, PAGE_SIZE); - __nvram_set_checksum(); - - spin_unlock_irq(&rtc_lock); + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); - *ppos = i; - - return tmp - contents; - -checksum_err: - spin_unlock_irq(&rtc_lock); - return -EIO; + ret = nvram_write(tmp, count, ppos); + kfree(tmp); + return ret; } static long nvram_misc_ioctl(struct file *file, unsigned int cmd, diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 31c763087746..9df85703735c 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -66,18 +66,46 @@ static inline void nvram_write_byte(unsigned char val, int addr) #endif } +static inline ssize_t nvram_read_bytes(char *buf, size_t count, loff_t *ppos) +{ + ssize_t nvram_size = nvram_get_size(); + loff_t i; + char *p = buf; + + if (nvram_size < 0) + return nvram_size; + for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) + *p = nvram_read_byte(i); + *ppos = i; + return p - buf; +} + +static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos) +{ + ssize_t nvram_size = nvram_get_size(); + loff_t i; + char *p = buf; + + if (nvram_size < 0) + return nvram_size; + for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) + nvram_write_byte(*p, i); + *ppos = i; + return p - buf; +} + static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { if (arch_nvram_ops.read) return arch_nvram_ops.read(buf, count, ppos); - return -ENODEV; + return nvram_read_bytes(buf, count, ppos); } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { if (arch_nvram_ops.write) return arch_nvram_ops.write(buf, count, ppos); - return -ENODEV; + return nvram_write_bytes(buf, count, ppos); } #endif /* _LINUX_NVRAM_H */ -- cgit From 95ac14b8a32817dcd1f13ae4787891484966d2d5 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Implement nvram ioctls Add the powerpc-specific ioctls to the nvram module. This allows the nvram module to replace the generic_nvram module. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/nvram.h | 2 ++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c9e295d73dc5..944f05fddacd 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -48,6 +48,9 @@ #include #include +#ifdef CONFIG_PPC +#include +#endif static DEFINE_MUTEX(nvram_mutex); static DEFINE_SPINLOCK(nvram_state_lock); @@ -283,6 +286,38 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd, long ret = -ENOTTY; switch (cmd) { +#ifdef CONFIG_PPC + case OBSOLETE_PMAC_NVRAM_GET_OFFSET: + pr_warn("nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n"); + /* fall through */ + case IOC_NVRAM_GET_OFFSET: + ret = -EINVAL; +#ifdef CONFIG_PPC_PMAC + if (machine_is(powermac)) { + int part, offset; + + if (copy_from_user(&part, (void __user *)arg, + sizeof(part)) != 0) + return -EFAULT; + if (part < pmac_nvram_OF || part > pmac_nvram_NR) + return -EINVAL; + offset = pmac_get_partition(part); + if (copy_to_user((void __user *)arg, + &offset, sizeof(offset)) != 0) + return -EFAULT; + ret = 0; + } +#endif + break; + case IOC_NVRAM_SYNC: + if (ppc_md.nvram_sync != NULL) { + mutex_lock(&nvram_mutex); + ppc_md.nvram_sync(); + mutex_unlock(&nvram_mutex); + } + ret = 0; + break; +#elif defined(CONFIG_X86) || defined(CONFIG_M68K) case NVRAM_INIT: /* initialize NVRAM contents and checksum */ if (!capable(CAP_SYS_ADMIN)) @@ -306,6 +341,7 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&nvram_mutex); } break; +#endif /* CONFIG_X86 || CONFIG_M68K */ } return ret; } @@ -321,12 +357,14 @@ static int nvram_misc_open(struct inode *inode, struct file *file) return -EBUSY; } +#if defined(CONFIG_X86) || defined(CONFIG_M68K) /* Prevent multiple writers if the set_checksum ioctl is implemented. */ if ((arch_nvram_ops.set_checksum != NULL) && (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) { spin_unlock(&nvram_state_lock); return -EBUSY; } +#endif if (file->f_flags & O_EXCL) nvram_open_mode |= NVRAM_EXCL; diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 9df85703735c..9e3a957c8f1f 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -31,8 +31,10 @@ struct nvram_ops { void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); +#if defined(CONFIG_X86) || defined(CONFIG_M68K) long (*initialize)(void); long (*set_checksum)(void); +#endif }; extern const struct nvram_ops arch_nvram_ops; -- cgit From f9c3a570f5fc584f2ca2dd222d1b8c8537fc55f6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Enable HAVE_ARCH_NVRAM_OPS and disable GENERIC_NVRAM Switch PPC32 kernels from the generic_nvram module to the nvram module. Also fix a theoretical bug where CHRP omits the chrp_nvram_init() call when CONFIG_NVRAM_MODULE=m. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 6 +----- arch/powerpc/include/asm/nvram.h | 3 --- arch/powerpc/kernel/setup_32.c | 11 ----------- arch/powerpc/platforms/chrp/Makefile | 2 +- arch/powerpc/platforms/chrp/setup.c | 2 +- arch/powerpc/platforms/powermac/setup.c | 3 +-- drivers/char/Kconfig | 19 +++++++++---------- include/linux/nvram.h | 20 ++++++++++++++++++++ 8 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2890d36eb531..f62e6a3f9c4e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -178,6 +178,7 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_NVRAM_OPS if PPC32 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_CBPF_JIT if !PPC64 @@ -274,11 +275,6 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y -# All PPC32s use generic nvram driver through ppc_md -config GENERIC_NVRAM - bool - default y if PPC32 - config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h index 56a388da9c4f..629a5cdcc865 100644 --- a/arch/powerpc/include/asm/nvram.h +++ b/arch/powerpc/include/asm/nvram.h @@ -78,9 +78,6 @@ extern int pmac_get_partition(int partition); extern u8 pmac_xpram_read(int xpaddr); extern void pmac_xpram_write(int xpaddr, u8 data); -/* Synchronize NVRAM */ -extern void nvram_sync(void); - /* Initialize NVRAM OS partition */ extern int __init nvram_init_os_partition(struct nvram_os_partition *part); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index f5107796e2d7..c31082233a25 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -148,17 +148,6 @@ static int __init ppc_setup_l3cr(char *str) } __setup("l3cr=", ppc_setup_l3cr); -#ifdef CONFIG_GENERIC_NVRAM - -void nvram_sync(void) -{ - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); -} -EXPORT_SYMBOL(nvram_sync); - -#endif /* CONFIG_NVRAM */ - static int __init ppc_init(void) { /* clear the progress line */ diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile index 4b3bfadc70fa..dc3465cc8bc6 100644 --- a/arch/powerpc/platforms/chrp/Makefile +++ b/arch/powerpc/platforms/chrp/Makefile @@ -1,3 +1,3 @@ obj-y += setup.o time.o pegasos_eth.o pci.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_NVRAM) += nvram.o +obj-$(CONFIG_NVRAM:m=y) += nvram.o diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index e66644e0fb40..e8e804289c8e 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -550,7 +550,7 @@ static void __init chrp_init_IRQ(void) static void __init chrp_init2(void) { -#ifdef CONFIG_NVRAM +#if IS_ENABLED(CONFIG_NVRAM) chrp_nvram_init(); #endif diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 2e8221e20ee8..b47f49cf9c4d 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -316,8 +316,7 @@ static void __init pmac_setup_arch(void) find_via_pmu(); smu_init(); -#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \ - defined(CONFIG_PPC64) +#if IS_ENABLED(CONFIG_NVRAM) || defined(CONFIG_PPC64) pmac_nvram_init(); #endif #ifdef CONFIG_PPC32 diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index ce9979529cf3..72866a004f07 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -244,25 +244,24 @@ source "drivers/char/hw_random/Kconfig" config NVRAM tristate "/dev/nvram support" - depends on X86 || GENERIC_NVRAM || HAVE_ARCH_NVRAM_OPS - default M68K + depends on X86 || HAVE_ARCH_NVRAM_OPS + default M68K || PPC ---help--- If you say Y here and create a character special file /dev/nvram with major number 10 and minor number 144 using mknod ("man mknod"), - you get read and write access to the extra bytes of non-volatile - memory in the real time clock (RTC), which is contained in every PC - and most Ataris. The actual number of bytes varies, depending on the - nvram in the system, but is usually 114 (128-14 for the RTC). - - This memory is conventionally called "CMOS RAM" on PCs and "NVRAM" - on Ataris. /dev/nvram may be used to view settings there, or to - change them (with some utility). It could also be used to frequently + you get read and write access to the non-volatile memory. + + /dev/nvram may be used to view settings in NVRAM or to change them + (with some utility). It could also be used to frequently save a few bits of very important data that may not be lost over power-off and for which writing to disk is too insecure. Note however that most NVRAM space in a PC belongs to the BIOS and you should NEVER idly tamper with it. See Ralf Brown's interrupt list for a guide to the use of CMOS bytes by your BIOS. + This memory is conventionally called "NVRAM" on PowerPC machines, + "CMOS RAM" on PCs, "NVRAM" on Ataris and "PRAM" on Macintoshes. + To compile this driver as a module, choose M here: the module will be called nvram. diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 9e3a957c8f1f..d29d9c93a927 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,6 +5,10 @@ #include #include +#ifdef CONFIG_PPC +#include +#endif + /** * struct nvram_ops - NVRAM functionality made available to drivers * @read: validate checksum (if any) then load a range of bytes from NVRAM @@ -42,6 +46,8 @@ extern const struct nvram_ops arch_nvram_ops; static inline ssize_t nvram_get_size(void) { #ifdef CONFIG_PPC + if (ppc_md.nvram_size) + return ppc_md.nvram_size(); #else if (arch_nvram_ops.get_size) return arch_nvram_ops.get_size(); @@ -52,6 +58,8 @@ static inline ssize_t nvram_get_size(void) static inline unsigned char nvram_read_byte(int addr) { #ifdef CONFIG_PPC + if (ppc_md.nvram_read_val) + return ppc_md.nvram_read_val(addr); #else if (arch_nvram_ops.read_byte) return arch_nvram_ops.read_byte(addr); @@ -62,6 +70,8 @@ static inline unsigned char nvram_read_byte(int addr) static inline void nvram_write_byte(unsigned char val, int addr) { #ifdef CONFIG_PPC + if (ppc_md.nvram_write_val) + ppc_md.nvram_write_val(addr, val); #else if (arch_nvram_ops.write_byte) arch_nvram_ops.write_byte(val, addr); @@ -98,15 +108,25 @@ static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos) static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { +#ifdef CONFIG_PPC + if (ppc_md.nvram_read) + return ppc_md.nvram_read(buf, count, ppos); +#else if (arch_nvram_ops.read) return arch_nvram_ops.read(buf, count, ppos); +#endif return nvram_read_bytes(buf, count, ppos); } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { +#ifdef CONFIG_PPC + if (ppc_md.nvram_write) + return ppc_md.nvram_write(buf, count, ppos); +#else if (arch_nvram_ops.write) return arch_nvram_ops.write(buf, count, ppos); +#endif return nvram_write_bytes(buf, count, ppos); } -- cgit From 8092e79204e7884f4bee3584ecfe6cf4a124d129 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 20 Dec 2018 23:28:37 -0800 Subject: ihex: Share code between ihex_validate_fw() and ihex_next_binrec() Convert both ihex_validate_fw() and ihex_next_binrec() to use a helper function to calculate next record offest. This way we only have one place implementing next record offset calculation logic. No functional change intended. Cc: Chris Healy Cc: Kyle McMartin Cc: Andrew Morton Cc: Masahiro Yamada Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: linux-kernel Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/ihex.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 75c194391869..9c701521176b 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -23,29 +23,34 @@ struct ihex_binrec { /* Find the next record, taking into account the 4-byte alignment */ static inline const struct ihex_binrec * -ihex_next_binrec(const struct ihex_binrec *rec) +__ihex_next_binrec(const struct ihex_binrec *rec) { int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2; rec = (void *)&rec->data[next]; + return rec; +} + +static inline const struct ihex_binrec * +ihex_next_binrec(const struct ihex_binrec *rec) +{ + rec = __ihex_next_binrec(rec); + return be16_to_cpu(rec->len) ? rec : NULL; } /* Check that ihex_next_binrec() won't take us off the end of the image... */ static inline int ihex_validate_fw(const struct firmware *fw) { - const struct ihex_binrec *rec; - size_t ofs = 0; + const struct ihex_binrec *end, *rec; - while (ofs <= fw->size - sizeof(*rec)) { - rec = (void *)&fw->data[ofs]; + rec = (const void *)fw->data; + end = (const void *)&fw->data[fw->size - sizeof(*end)]; + for (; rec <= end; rec = __ihex_next_binrec(rec)) { /* Zero length marks end of records */ if (!be16_to_cpu(rec->len)) return 0; - - /* Point to next record... */ - ofs += (sizeof(*rec) + be16_to_cpu(rec->len) + 3) & ~3; } return -EINVAL; } -- cgit From 5158c36ec9d0b3343f58987cec7ebfd866331fd0 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 20 Dec 2018 23:28:38 -0800 Subject: ihex: Check if zero-length record is at the end of the blob When verifying the validity of IHEX file we need to make sure that zero-length record we found is located at the end of the file. Not doing that could result in an invalid file with a bogus zero-length in the middle short-circuiting the check and being reported as valid. Cc: Chris Healy Cc: Kyle McMartin Cc: Andrew Morton Cc: Masahiro Yamada Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: linux-kernel Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/ihex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 9c701521176b..9130f307a420 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -49,7 +49,7 @@ static inline int ihex_validate_fw(const struct firmware *fw) for (; rec <= end; rec = __ihex_next_binrec(rec)) { /* Zero length marks end of records */ - if (!be16_to_cpu(rec->len)) + if (rec == end && !be16_to_cpu(rec->len)) return 0; } return -EINVAL; -- cgit From 9fb4ab4d3dd665a62da9c176a89e7c7feaf5d9e4 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 20 Dec 2018 23:28:39 -0800 Subject: ihex: Simplify next record offset calculation Next record calucaltion can be reduced to a much more tivial ALIGN operation as follows: 1. Splitting 5 into 2 + 3 we get next = ((be16_to_cpu(rec->len) + 2 + 3) & ~3) - 2 (1) 2. Using ALIGN macro we reduce (1) to: ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2 (2) 3. Subsituting 'next' in original next record calucation we get: (void *)&rec->data[ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2] (3) 4. Converting array index to pointer arithmetic we convert (3) into: (void *)rec + sizeof(*rec) + ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2 (4) 5. Subsituting sizeof(*rec) with its value, 6, and substracting 2, in (4) we get: (void *)rec + ALIGN(be16_to_cpu(rec->len) + 2, 4) + 4 (5) 6. Since ALIGN(X, 4) + 4 == ALIGN(X + 4, 4), (5) can be converted to: (void *)rec + ALIGN(be16_to_cpu(rec->len) + 6, 4) (6) 5. Subsituting 6 in (6) to sizeof(*rec) we get: (void *)rec + ALIGN(be16_to_cpu(rec->len) + sizeof(*rec), 4) (7) Using expression (7) should make it more clear that next record is located by adding full size of the current record (payload + auxiliary data) aligned to 4 bytes, to the location of the current one. No functional change intended. Cc: Chris Healy Cc: Kyle McMartin Cc: Andrew Morton Cc: Masahiro Yamada Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: linux-kernel Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/ihex.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 9130f307a420..98cb5ce0b0a0 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -21,14 +21,18 @@ struct ihex_binrec { uint8_t data[0]; } __attribute__((packed)); +static inline uint16_t ihex_binrec_size(const struct ihex_binrec *p) +{ + return be16_to_cpu(p->len) + sizeof(*p); +} + /* Find the next record, taking into account the 4-byte alignment */ static inline const struct ihex_binrec * __ihex_next_binrec(const struct ihex_binrec *rec) { - int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2; - rec = (void *)&rec->data[next]; + const void *p = rec; - return rec; + return p + ALIGN(ihex_binrec_size(rec), 4); } static inline const struct ihex_binrec * -- cgit From 11f1ceca7031deefc1a34236ab7b94360016b71d Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 16 Jan 2019 18:10:56 +0200 Subject: interconnect: Add generic on-chip interconnect API This patch introduces a new API to get requirements and configure the interconnect buses across the entire chipset to fit with the current demand. The API is using a consumer/provider-based model, where the providers are the interconnect buses and the consumers could be various drivers. The consumers request interconnect resources (path) between endpoints and set the desired constraints on this data flow path. The providers receive requests from consumers and aggregate these requests for all master-slave pairs on that path. Then the providers configure each node along the path to support a bandwidth that satisfies all bandwidth requests that cross through that node. The topology could be complicated and multi-tiered and is SoC specific. Reviewed-by: Evan Green Signed-off-by: Georgi Djakov Signed-off-by: Greg Kroah-Hartman --- Documentation/interconnect/interconnect.rst | 94 +++++ drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/interconnect/Kconfig | 10 + drivers/interconnect/Makefile | 5 + drivers/interconnect/core.c | 567 ++++++++++++++++++++++++++++ include/linux/interconnect-provider.h | 125 ++++++ include/linux/interconnect.h | 52 +++ 8 files changed, 856 insertions(+) create mode 100644 Documentation/interconnect/interconnect.rst create mode 100644 drivers/interconnect/Kconfig create mode 100644 drivers/interconnect/Makefile create mode 100644 drivers/interconnect/core.c create mode 100644 include/linux/interconnect-provider.h create mode 100644 include/linux/interconnect.h (limited to 'include/linux') diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/interconnect/interconnect.rst new file mode 100644 index 000000000000..b8107dcc4cd3 --- /dev/null +++ b/Documentation/interconnect/interconnect.rst @@ -0,0 +1,94 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +GENERIC SYSTEM INTERCONNECT SUBSYSTEM +===================================== + +Introduction +------------ + +This framework is designed to provide a standard kernel interface to control +the settings of the interconnects on an SoC. These settings can be throughput, +latency and priority between multiple interconnected devices or functional +blocks. This can be controlled dynamically in order to save power or provide +maximum performance. + +The interconnect bus is hardware with configurable parameters, which can be +set on a data path according to the requests received from various drivers. +An example of interconnect buses are the interconnects between various +components or functional blocks in chipsets. There can be multiple interconnects +on an SoC that can be multi-tiered. + +Below is a simplified diagram of a real-world SoC interconnect bus topology. + +:: + + +----------------+ +----------------+ + | HW Accelerator |--->| M NoC |<---------------+ + +----------------+ +----------------+ | + | | +------------+ + +-----+ +-------------+ V +------+ | | + | DDR | | +--------+ | PCIe | | | + +-----+ | | Slaves | +------+ | | + ^ ^ | +--------+ | | C NoC | + | | V V | | + +------------------+ +------------------------+ | | +-----+ + | |-->| |-->| |-->| CPU | + | |-->| |<--| | +-----+ + | Mem NoC | | S NoC | +------------+ + | |<--| |---------+ | + | |<--| |<------+ | | +--------+ + +------------------+ +------------------------+ | | +-->| Slaves | + ^ ^ ^ ^ ^ | | +--------+ + | | | | | | V + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | CPUs | | | GPU | | DSP | | Masters |-->| P NoC |-->| Slaves | + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | + +-------+ + | Modem | + +-------+ + +Terminology +----------- + +Interconnect provider is the software definition of the interconnect hardware. +The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC +and Mem NoC. + +Interconnect node is the software definition of the interconnect hardware +port. Each interconnect provider consists of multiple interconnect nodes, +which are connected to other SoC components including other interconnect +providers. The point on the diagram where the CPUs connect to the memory is +called an interconnect node, which belongs to the Mem NoC interconnect provider. + +Interconnect endpoints are the first or the last element of the path. Every +endpoint is a node, but not every node is an endpoint. + +Interconnect path is everything between two endpoints including all the nodes +that have to be traversed to reach from a source to destination node. It may +include multiple master-slave pairs across several interconnect providers. + +Interconnect consumers are the entities which make use of the data paths exposed +by the providers. The consumers send requests to providers requesting various +throughput, latency and priority. Usually the consumers are device drivers, that +send request based on their needs. An example for a consumer is a video decoder +that supports various formats and image sizes. + +Interconnect providers +---------------------- + +Interconnect provider is an entity that implements methods to initialize and +configure interconnect bus hardware. The interconnect provider drivers should +be registered with the interconnect provider core. + +.. kernel-doc:: include/linux/interconnect-provider.h + +Interconnect consumers +---------------------- + +Interconnect consumers are the clients which use the interconnect APIs to +get paths between endpoints and set their bandwidth/latency/QoS requirements +for these interconnect paths. + +.. kernel-doc:: include/linux/interconnect.h diff --git a/drivers/Kconfig b/drivers/Kconfig index 4f9f99057ff8..45f9decb9848 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -228,4 +228,6 @@ source "drivers/siox/Kconfig" source "drivers/slimbus/Kconfig" +source "drivers/interconnect/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index e1ce029d28fd..bb15b9d0e793 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER) += mux/ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ obj-$(CONFIG_SIOX) += siox/ obj-$(CONFIG_GNSS) += gnss/ +obj-$(CONFIG_INTERCONNECT) += interconnect/ diff --git a/drivers/interconnect/Kconfig b/drivers/interconnect/Kconfig new file mode 100644 index 000000000000..a261c7d41deb --- /dev/null +++ b/drivers/interconnect/Kconfig @@ -0,0 +1,10 @@ +menuconfig INTERCONNECT + tristate "On-Chip Interconnect management support" + help + Support for management of the on-chip interconnects. + + This framework is designed to provide a generic interface for + managing the interconnects in a SoC. + + If unsure, say no. + diff --git a/drivers/interconnect/Makefile b/drivers/interconnect/Makefile new file mode 100644 index 000000000000..7a01f33b5593 --- /dev/null +++ b/drivers/interconnect/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +icc-core-objs := core.o + +obj-$(CONFIG_INTERCONNECT) += icc-core.o diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c new file mode 100644 index 000000000000..2b937b4f43c4 --- /dev/null +++ b/drivers/interconnect/core.c @@ -0,0 +1,567 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Interconnect framework core driver + * + * Copyright (c) 2017-2019, Linaro Ltd. + * Author: Georgi Djakov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_IDR(icc_idr); +static LIST_HEAD(icc_providers); +static DEFINE_MUTEX(icc_lock); + +/** + * struct icc_req - constraints that are attached to each node + * @req_node: entry in list of requests for the particular @node + * @node: the interconnect node to which this constraint applies + * @dev: reference to the device that sets the constraints + * @avg_bw: an integer describing the average bandwidth in kBps + * @peak_bw: an integer describing the peak bandwidth in kBps + */ +struct icc_req { + struct hlist_node req_node; + struct icc_node *node; + struct device *dev; + u32 avg_bw; + u32 peak_bw; +}; + +/** + * struct icc_path - interconnect path structure + * @num_nodes: number of hops (nodes) + * @reqs: array of the requests applicable to this path of nodes + */ +struct icc_path { + size_t num_nodes; + struct icc_req reqs[]; +}; + +static struct icc_node *node_find(const int id) +{ + return idr_find(&icc_idr, id); +} + +static struct icc_path *path_init(struct device *dev, struct icc_node *dst, + ssize_t num_nodes) +{ + struct icc_node *node = dst; + struct icc_path *path; + int i; + + path = kzalloc(struct_size(path, reqs, num_nodes), GFP_KERNEL); + if (!path) + return ERR_PTR(-ENOMEM); + + path->num_nodes = num_nodes; + + for (i = num_nodes - 1; i >= 0; i--) { + node->provider->users++; + hlist_add_head(&path->reqs[i].req_node, &node->req_list); + path->reqs[i].node = node; + path->reqs[i].dev = dev; + /* reference to previous node was saved during path traversal */ + node = node->reverse; + } + + return path; +} + +static struct icc_path *path_find(struct device *dev, struct icc_node *src, + struct icc_node *dst) +{ + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + struct icc_node *n, *node = NULL; + struct list_head traverse_list; + struct list_head edge_list; + struct list_head visited_list; + size_t i, depth = 1; + bool found = false; + + INIT_LIST_HEAD(&traverse_list); + INIT_LIST_HEAD(&edge_list); + INIT_LIST_HEAD(&visited_list); + + list_add(&src->search_list, &traverse_list); + src->reverse = NULL; + + do { + list_for_each_entry_safe(node, n, &traverse_list, search_list) { + if (node == dst) { + found = true; + list_splice_init(&edge_list, &visited_list); + list_splice_init(&traverse_list, &visited_list); + break; + } + for (i = 0; i < node->num_links; i++) { + struct icc_node *tmp = node->links[i]; + + if (!tmp) { + path = ERR_PTR(-ENOENT); + goto out; + } + + if (tmp->is_traversed) + continue; + + tmp->is_traversed = true; + tmp->reverse = node; + list_add_tail(&tmp->search_list, &edge_list); + } + } + + if (found) + break; + + list_splice_init(&traverse_list, &visited_list); + list_splice_init(&edge_list, &traverse_list); + + /* count the hops including the source */ + depth++; + + } while (!list_empty(&traverse_list)); + +out: + + /* reset the traversed state */ + list_for_each_entry_reverse(n, &visited_list, search_list) + n->is_traversed = false; + + if (found) + path = path_init(dev, dst, depth); + + return path; +} + +/* + * We want the path to honor all bandwidth requests, so the average and peak + * bandwidth requirements from each consumer are aggregated at each node. + * The aggregation is platform specific, so each platform can customize it by + * implementing its own aggregate() function. + */ + +static int aggregate_requests(struct icc_node *node) +{ + struct icc_provider *p = node->provider; + struct icc_req *r; + + node->avg_bw = 0; + node->peak_bw = 0; + + hlist_for_each_entry(r, &node->req_list, req_node) + p->aggregate(node, r->avg_bw, r->peak_bw, + &node->avg_bw, &node->peak_bw); + + return 0; +} + +static int apply_constraints(struct icc_path *path) +{ + struct icc_node *next, *prev = NULL; + int ret = -EINVAL; + int i; + + for (i = 0; i < path->num_nodes; i++) { + next = path->reqs[i].node; + + /* + * Both endpoints should be valid master-slave pairs of the + * same interconnect provider that will be configured. + */ + if (!prev || next->provider != prev->provider) { + prev = next; + continue; + } + + /* set the constraints */ + ret = next->provider->set(prev, next); + if (ret) + goto out; + + prev = next; + } +out: + return ret; +} + +/** + * icc_set_bw() - set bandwidth constraints on an interconnect path + * @path: reference to the path returned by icc_get() + * @avg_bw: average bandwidth in kilobytes per second + * @peak_bw: peak bandwidth in kilobytes per second + * + * This function is used by an interconnect consumer to express its own needs + * in terms of bandwidth for a previously requested path between two endpoints. + * The requests are aggregated and each node is updated accordingly. The entire + * path is locked by a mutex to ensure that the set() is completed. + * The @path can be NULL when the "interconnects" DT properties is missing, + * which will mean that no constraints will be set. + * + * Returns 0 on success, or an appropriate error code otherwise. + */ +int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) +{ + struct icc_node *node; + size_t i; + int ret; + + if (!path) + return 0; + + mutex_lock(&icc_lock); + + for (i = 0; i < path->num_nodes; i++) { + node = path->reqs[i].node; + + /* update the consumer request for this path */ + path->reqs[i].avg_bw = avg_bw; + path->reqs[i].peak_bw = peak_bw; + + /* aggregate requests for this node */ + aggregate_requests(node); + } + + ret = apply_constraints(path); + if (ret) + pr_debug("interconnect: error applying constraints (%d)\n", + ret); + + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_set_bw); + +/** + * icc_get() - return a handle for path between two endpoints + * @dev: the device requesting the path + * @src_id: source device port id + * @dst_id: destination device port id + * + * This function will search for a path between two endpoints and return an + * icc_path handle on success. Use icc_put() to release + * constraints when they are not needed anymore. + * If the interconnect API is disabled, NULL is returned and the consumer + * drivers will still build. Drivers are free to handle this specifically, + * but they don't have to. + * + * Return: icc_path pointer on success, ERR_PTR() on error or NULL if the + * interconnect API is disabled. + */ +struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id) +{ + struct icc_node *src, *dst; + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + + mutex_lock(&icc_lock); + + src = node_find(src_id); + if (!src) + goto out; + + dst = node_find(dst_id); + if (!dst) + goto out; + + path = path_find(dev, src, dst); + if (IS_ERR(path)) + dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path)); + +out: + mutex_unlock(&icc_lock); + return path; +} +EXPORT_SYMBOL_GPL(icc_get); + +/** + * icc_put() - release the reference to the icc_path + * @path: interconnect path + * + * Use this function to release the constraints on a path when the path is + * no longer needed. The constraints will be re-aggregated. + */ +void icc_put(struct icc_path *path) +{ + struct icc_node *node; + size_t i; + int ret; + + if (!path || WARN_ON(IS_ERR(path))) + return; + + ret = icc_set_bw(path, 0, 0); + if (ret) + pr_err("%s: error (%d)\n", __func__, ret); + + mutex_lock(&icc_lock); + for (i = 0; i < path->num_nodes; i++) { + node = path->reqs[i].node; + hlist_del(&path->reqs[i].req_node); + if (!WARN_ON(!node->provider->users)) + node->provider->users--; + } + mutex_unlock(&icc_lock); + + kfree(path); +} +EXPORT_SYMBOL_GPL(icc_put); + +static struct icc_node *icc_node_create_nolock(int id) +{ + struct icc_node *node; + + /* check if node already exists */ + node = node_find(id); + if (node) + return node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + id = idr_alloc(&icc_idr, node, id, id + 1, GFP_KERNEL); + if (id < 0) { + WARN(1, "%s: couldn't get idr\n", __func__); + kfree(node); + return ERR_PTR(id); + } + + node->id = id; + + return node; +} + +/** + * icc_node_create() - create a node + * @id: node id + * + * Return: icc_node pointer on success, or ERR_PTR() on error + */ +struct icc_node *icc_node_create(int id) +{ + struct icc_node *node; + + mutex_lock(&icc_lock); + + node = icc_node_create_nolock(id); + + mutex_unlock(&icc_lock); + + return node; +} +EXPORT_SYMBOL_GPL(icc_node_create); + +/** + * icc_node_destroy() - destroy a node + * @id: node id + */ +void icc_node_destroy(int id) +{ + struct icc_node *node; + + mutex_lock(&icc_lock); + + node = node_find(id); + if (node) { + idr_remove(&icc_idr, node->id); + WARN_ON(!hlist_empty(&node->req_list)); + } + + mutex_unlock(&icc_lock); + + kfree(node); +} +EXPORT_SYMBOL_GPL(icc_node_destroy); + +/** + * icc_link_create() - create a link between two nodes + * @node: source node id + * @dst_id: destination node id + * + * Create a link between two nodes. The nodes might belong to different + * interconnect providers and the @dst_id node might not exist (if the + * provider driver has not probed yet). So just create the @dst_id node + * and when the actual provider driver is probed, the rest of the node + * data is filled. + * + * Return: 0 on success, or an error code otherwise + */ +int icc_link_create(struct icc_node *node, const int dst_id) +{ + struct icc_node *dst; + struct icc_node **new; + int ret = 0; + + if (!node->provider) + return -EINVAL; + + mutex_lock(&icc_lock); + + dst = node_find(dst_id); + if (!dst) { + dst = icc_node_create_nolock(dst_id); + + if (IS_ERR(dst)) { + ret = PTR_ERR(dst); + goto out; + } + } + + new = krealloc(node->links, + (node->num_links + 1) * sizeof(*node->links), + GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto out; + } + + node->links = new; + node->links[node->num_links++] = dst; + +out: + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_link_create); + +/** + * icc_link_destroy() - destroy a link between two nodes + * @src: pointer to source node + * @dst: pointer to destination node + * + * Return: 0 on success, or an error code otherwise + */ +int icc_link_destroy(struct icc_node *src, struct icc_node *dst) +{ + struct icc_node **new; + size_t slot; + int ret = 0; + + if (IS_ERR_OR_NULL(src)) + return -EINVAL; + + if (IS_ERR_OR_NULL(dst)) + return -EINVAL; + + mutex_lock(&icc_lock); + + for (slot = 0; slot < src->num_links; slot++) + if (src->links[slot] == dst) + break; + + if (WARN_ON(slot == src->num_links)) { + ret = -ENXIO; + goto out; + } + + src->links[slot] = src->links[--src->num_links]; + + new = krealloc(src->links, src->num_links * sizeof(*src->links), + GFP_KERNEL); + if (new) + src->links = new; + +out: + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_link_destroy); + +/** + * icc_node_add() - add interconnect node to interconnect provider + * @node: pointer to the interconnect node + * @provider: pointer to the interconnect provider + */ +void icc_node_add(struct icc_node *node, struct icc_provider *provider) +{ + mutex_lock(&icc_lock); + + node->provider = provider; + list_add_tail(&node->node_list, &provider->nodes); + + mutex_unlock(&icc_lock); +} +EXPORT_SYMBOL_GPL(icc_node_add); + +/** + * icc_node_del() - delete interconnect node from interconnect provider + * @node: pointer to the interconnect node + */ +void icc_node_del(struct icc_node *node) +{ + mutex_lock(&icc_lock); + + list_del(&node->node_list); + + mutex_unlock(&icc_lock); +} +EXPORT_SYMBOL_GPL(icc_node_del); + +/** + * icc_provider_add() - add a new interconnect provider + * @provider: the interconnect provider that will be added into topology + * + * Return: 0 on success, or an error code otherwise + */ +int icc_provider_add(struct icc_provider *provider) +{ + if (WARN_ON(!provider->set)) + return -EINVAL; + + mutex_lock(&icc_lock); + + INIT_LIST_HEAD(&provider->nodes); + list_add_tail(&provider->provider_list, &icc_providers); + + mutex_unlock(&icc_lock); + + dev_dbg(provider->dev, "interconnect provider added to topology\n"); + + return 0; +} +EXPORT_SYMBOL_GPL(icc_provider_add); + +/** + * icc_provider_del() - delete previously added interconnect provider + * @provider: the interconnect provider that will be removed from topology + * + * Return: 0 on success, or an error code otherwise + */ +int icc_provider_del(struct icc_provider *provider) +{ + mutex_lock(&icc_lock); + if (provider->users) { + pr_warn("interconnect provider still has %d users\n", + provider->users); + mutex_unlock(&icc_lock); + return -EBUSY; + } + + if (!list_empty(&provider->nodes)) { + pr_warn("interconnect provider still has nodes\n"); + mutex_unlock(&icc_lock); + return -EBUSY; + } + + list_del(&provider->provider_list); + mutex_unlock(&icc_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(icc_provider_del); + +MODULE_AUTHOR("Georgi Djakov "); +MODULE_DESCRIPTION("Interconnect Driver Core"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h new file mode 100644 index 000000000000..78208a754181 --- /dev/null +++ b/include/linux/interconnect-provider.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018, Linaro Ltd. + * Author: Georgi Djakov + */ + +#ifndef __LINUX_INTERCONNECT_PROVIDER_H +#define __LINUX_INTERCONNECT_PROVIDER_H + +#include + +#define icc_units_to_bps(bw) ((bw) * 1000ULL) + +struct icc_node; + +/** + * struct icc_provider - interconnect provider (controller) entity that might + * provide multiple interconnect controls + * + * @provider_list: list of the registered interconnect providers + * @nodes: internal list of the interconnect provider nodes + * @set: pointer to device specific set operation function + * @aggregate: pointer to device specific aggregate operation function + * @dev: the device this interconnect provider belongs to + * @users: count of active users + * @data: pointer to private data + */ +struct icc_provider { + struct list_head provider_list; + struct list_head nodes; + int (*set)(struct icc_node *src, struct icc_node *dst); + int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw, + u32 *agg_avg, u32 *agg_peak); + struct device *dev; + int users; + void *data; +}; + +/** + * struct icc_node - entity that is part of the interconnect topology + * + * @id: platform specific node id + * @name: node name used in debugfs + * @links: a list of targets pointing to where we can go next when traversing + * @num_links: number of links to other interconnect nodes + * @provider: points to the interconnect provider of this node + * @node_list: the list entry in the parent provider's "nodes" list + * @search_list: list used when walking the nodes graph + * @reverse: pointer to previous node when walking the nodes graph + * @is_traversed: flag that is used when walking the nodes graph + * @req_list: a list of QoS constraint requests associated with this node + * @avg_bw: aggregated value of average bandwidth requests from all consumers + * @peak_bw: aggregated value of peak bandwidth requests from all consumers + * @data: pointer to private data + */ +struct icc_node { + int id; + const char *name; + struct icc_node **links; + size_t num_links; + + struct icc_provider *provider; + struct list_head node_list; + struct list_head search_list; + struct icc_node *reverse; + u8 is_traversed:1; + struct hlist_head req_list; + u32 avg_bw; + u32 peak_bw; + void *data; +}; + +#if IS_ENABLED(CONFIG_INTERCONNECT) + +struct icc_node *icc_node_create(int id); +void icc_node_destroy(int id); +int icc_link_create(struct icc_node *node, const int dst_id); +int icc_link_destroy(struct icc_node *src, struct icc_node *dst); +void icc_node_add(struct icc_node *node, struct icc_provider *provider); +void icc_node_del(struct icc_node *node); +int icc_provider_add(struct icc_provider *provider); +int icc_provider_del(struct icc_provider *provider); + +#else + +static inline struct icc_node *icc_node_create(int id) +{ + return ERR_PTR(-ENOTSUPP); +} + +void icc_node_destroy(int id) +{ +} + +static inline int icc_link_create(struct icc_node *node, const int dst_id) +{ + return -ENOTSUPP; +} + +int icc_link_destroy(struct icc_node *src, struct icc_node *dst) +{ + return -ENOTSUPP; +} + +void icc_node_add(struct icc_node *node, struct icc_provider *provider) +{ +} + +void icc_node_del(struct icc_node *node) +{ +} + +static inline int icc_provider_add(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +static inline int icc_provider_del(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +#endif /* CONFIG_INTERCONNECT */ + +#endif /* __LINUX_INTERCONNECT_PROVIDER_H */ diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h new file mode 100644 index 000000000000..c331afb3a2c8 --- /dev/null +++ b/include/linux/interconnect.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019, Linaro Ltd. + * Author: Georgi Djakov + */ + +#ifndef __LINUX_INTERCONNECT_H +#define __LINUX_INTERCONNECT_H + +#include +#include + +/* macros for converting to icc units */ +#define Bps_to_icc(x) ((x) / 1000) +#define kBps_to_icc(x) (x) +#define MBps_to_icc(x) ((x) * 1000) +#define GBps_to_icc(x) ((x) * 1000 * 1000) +#define bps_to_icc(x) (1) +#define kbps_to_icc(x) ((x) / 8 + ((x) % 8 ? 1 : 0)) +#define Mbps_to_icc(x) ((x) * 1000 / 8) +#define Gbps_to_icc(x) ((x) * 1000 * 1000 / 8) + +struct icc_path; +struct device; + +#if IS_ENABLED(CONFIG_INTERCONNECT) + +struct icc_path *icc_get(struct device *dev, const int src_id, + const int dst_id); +void icc_put(struct icc_path *path); +int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); + +#else + +static inline struct icc_path *icc_get(struct device *dev, const int src_id, + const int dst_id) +{ + return NULL; +} + +static inline void icc_put(struct icc_path *path) +{ +} + +static inline int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) +{ + return 0; +} + +#endif /* CONFIG_INTERCONNECT */ + +#endif /* __LINUX_INTERCONNECT_H */ -- cgit From 87e3031b6fbd83ea83adf1bf9602bcce313ee787 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 16 Jan 2019 18:10:58 +0200 Subject: interconnect: Allow endpoints translation via DT Currently we support only platform data for specifying the interconnect endpoints. As now the endpoints are hard-coded into the consumer driver this may lead to complications when a single driver is used by multiple SoCs, which may have different interconnect topology. To avoid cluttering the consumer drivers, introduce a translation function to help us get the board specific interconnect data from device-tree. Reviewed-by: Evan Green Signed-off-by: Georgi Djakov Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 149 ++++++++++++++++++++++++++++++++++ include/linux/interconnect-provider.h | 17 ++++ include/linux/interconnect.h | 7 ++ 3 files changed, 173 insertions(+) (limited to 'include/linux') diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 2b937b4f43c4..a8c2bd35197f 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include static DEFINE_IDR(icc_idr); @@ -194,6 +195,152 @@ out: return ret; } +/* of_icc_xlate_onecell() - Translate function using a single index. + * @spec: OF phandle args to map into an interconnect node. + * @data: private data (pointer to struct icc_onecell_data) + * + * This is a generic translate function that can be used to model simple + * interconnect providers that have one device tree node and provide + * multiple interconnect nodes. A single cell is used as an index into + * an array of icc nodes specified in the icc_onecell_data struct when + * registering the provider. + */ +struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, + void *data) +{ + struct icc_onecell_data *icc_data = data; + unsigned int idx = spec->args[0]; + + if (idx >= icc_data->num_nodes) { + pr_err("%s: invalid index %u\n", __func__, idx); + return ERR_PTR(-EINVAL); + } + + return icc_data->nodes[idx]; +} +EXPORT_SYMBOL_GPL(of_icc_xlate_onecell); + +/** + * of_icc_get_from_provider() - Look-up interconnect node + * @spec: OF phandle args to use for look-up + * + * Looks for interconnect provider under the node specified by @spec and if + * found, uses xlate function of the provider to map phandle args to node. + * + * Returns a valid pointer to struct icc_node on success or ERR_PTR() + * on failure. + */ +static struct icc_node *of_icc_get_from_provider(struct of_phandle_args *spec) +{ + struct icc_node *node = ERR_PTR(-EPROBE_DEFER); + struct icc_provider *provider; + + if (!spec || spec->args_count != 1) + return ERR_PTR(-EINVAL); + + mutex_lock(&icc_lock); + list_for_each_entry(provider, &icc_providers, provider_list) { + if (provider->dev->of_node == spec->np) + node = provider->xlate(spec, provider->data); + if (!IS_ERR(node)) + break; + } + mutex_unlock(&icc_lock); + + return node; +} + +/** + * of_icc_get() - get a path handle from a DT node based on name + * @dev: device pointer for the consumer device + * @name: interconnect path name + * + * This function will search for a path between two endpoints and return an + * icc_path handle on success. Use icc_put() to release constraints when they + * are not needed anymore. + * If the interconnect API is disabled, NULL is returned and the consumer + * drivers will still build. Drivers are free to handle this specifically, + * but they don't have to. + * + * Return: icc_path pointer on success or ERR_PTR() on error. NULL is returned + * when the API is disabled or the "interconnects" DT property is missing. + */ +struct icc_path *of_icc_get(struct device *dev, const char *name) +{ + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + struct icc_node *src_node, *dst_node; + struct device_node *np = NULL; + struct of_phandle_args src_args, dst_args; + int idx = 0; + int ret; + + if (!dev || !dev->of_node) + return ERR_PTR(-ENODEV); + + np = dev->of_node; + + /* + * When the consumer DT node do not have "interconnects" property + * return a NULL path to skip setting constraints. + */ + if (!of_find_property(np, "interconnects", NULL)) + return NULL; + + /* + * We use a combination of phandle and specifier for endpoint. For now + * lets support only global ids and extend this in the future if needed + * without breaking DT compatibility. + */ + if (name) { + idx = of_property_match_string(np, "interconnect-names", name); + if (idx < 0) + return ERR_PTR(idx); + } + + ret = of_parse_phandle_with_args(np, "interconnects", + "#interconnect-cells", idx * 2, + &src_args); + if (ret) + return ERR_PTR(ret); + + of_node_put(src_args.np); + + ret = of_parse_phandle_with_args(np, "interconnects", + "#interconnect-cells", idx * 2 + 1, + &dst_args); + if (ret) + return ERR_PTR(ret); + + of_node_put(dst_args.np); + + src_node = of_icc_get_from_provider(&src_args); + + if (IS_ERR(src_node)) { + if (PTR_ERR(src_node) != -EPROBE_DEFER) + dev_err(dev, "error finding src node: %ld\n", + PTR_ERR(src_node)); + return ERR_CAST(src_node); + } + + dst_node = of_icc_get_from_provider(&dst_args); + + if (IS_ERR(dst_node)) { + if (PTR_ERR(dst_node) != -EPROBE_DEFER) + dev_err(dev, "error finding dst node: %ld\n", + PTR_ERR(dst_node)); + return ERR_CAST(dst_node); + } + + mutex_lock(&icc_lock); + path = path_find(dev, src_node, dst_node); + if (IS_ERR(path)) + dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path)); + mutex_unlock(&icc_lock); + + return path; +} +EXPORT_SYMBOL_GPL(of_icc_get); + /** * icc_set_bw() - set bandwidth constraints on an interconnect path * @path: reference to the path returned by icc_get() @@ -519,6 +666,8 @@ int icc_provider_add(struct icc_provider *provider) { if (WARN_ON(!provider->set)) return -EINVAL; + if (WARN_ON(!provider->xlate)) + return -EINVAL; mutex_lock(&icc_lock); diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 78208a754181..63caccadc2db 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -12,6 +12,21 @@ #define icc_units_to_bps(bw) ((bw) * 1000ULL) struct icc_node; +struct of_phandle_args; + +/** + * struct icc_onecell_data - driver data for onecell interconnect providers + * + * @num_nodes: number of nodes in this device + * @nodes: array of pointers to the nodes in this device + */ +struct icc_onecell_data { + unsigned int num_nodes; + struct icc_node *nodes[]; +}; + +struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, + void *data); /** * struct icc_provider - interconnect provider (controller) entity that might @@ -21,6 +36,7 @@ struct icc_node; * @nodes: internal list of the interconnect provider nodes * @set: pointer to device specific set operation function * @aggregate: pointer to device specific aggregate operation function + * @xlate: provider-specific callback for mapping nodes from phandle arguments * @dev: the device this interconnect provider belongs to * @users: count of active users * @data: pointer to private data @@ -31,6 +47,7 @@ struct icc_provider { int (*set)(struct icc_node *src, struct icc_node *dst); int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw, u32 *agg_avg, u32 *agg_peak); + struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); struct device *dev; int users; void *data; diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index c331afb3a2c8..dc25864755ba 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -27,6 +27,7 @@ struct device; struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id); +struct icc_path *of_icc_get(struct device *dev, const char *name); void icc_put(struct icc_path *path); int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); @@ -38,6 +39,12 @@ static inline struct icc_path *icc_get(struct device *dev, const int src_id, return NULL; } +static inline struct icc_path *of_icc_get(struct device *dev, + const char *name) +{ + return NULL; +} + static inline void icc_put(struct icc_path *path) { } -- cgit From c81d64d3dc1f2decf8f3a9354416b7496b5c389b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 16 Jan 2019 11:25:21 -0700 Subject: io-64-nonatomic: add io{read|write}64[be]{_lo_hi|_hi_lo} macros This patch adds generic io{read|write}64[be]{_lo_hi|_hi_lo} macros if they are not already defined by the architecture. (As they are provided by the generic iomap library). The patch also points io{read|write}64[be] to the variant specified by the header name. This is because new drivers are encouraged to use ioreadXX, et al instead of readX[1], et al -- and mixing ioreadXX with readq is pretty ugly. [1] LDD3: section 9.4.2 Signed-off-by: Logan Gunthorpe Reviewed-by: Andy Shevchenko Cc: Christoph Hellwig Cc: Arnd Bergmann Cc: Alan Cox Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- include/linux/io-64-nonatomic-hi-lo.h | 64 +++++++++++++++++++++++++++++++++++ include/linux/io-64-nonatomic-lo-hi.h | 64 +++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index 862d786a904f..ae21b72cce85 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -55,4 +55,68 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed hi_lo_writeq_relaxed #endif +#ifndef ioread64_hi_lo +#define ioread64_hi_lo ioread64_hi_lo +static inline u64 ioread64_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32(addr + sizeof(u32)); + low = ioread32(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_hi_lo +#define iowrite64_hi_lo iowrite64_hi_lo +static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32(val >> 32, addr + sizeof(u32)); + iowrite32(val, addr); +} +#endif + +#ifndef ioread64be_hi_lo +#define ioread64be_hi_lo ioread64be_hi_lo +static inline u64 ioread64be_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32be(addr); + low = ioread32be(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_hi_lo +#define iowrite64be_hi_lo iowrite64be_hi_lo +static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32be(val >> 32, addr); + iowrite32be(val, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_hi_lo +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_hi_lo +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_hi_lo +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_hi_lo +#endif + #endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index d042e7bb5adb..faaa842dbdb9 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -55,4 +55,68 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed lo_hi_writeq_relaxed #endif +#ifndef ioread64_lo_hi +#define ioread64_lo_hi ioread64_lo_hi +static inline u64 ioread64_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_lo_hi +#define iowrite64_lo_hi iowrite64_lo_hi +static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64be_lo_hi +#define ioread64be_lo_hi ioread64be_lo_hi +static inline u64 ioread64be_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32be(addr + sizeof(u32)); + high = ioread32be(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_lo_hi +#define iowrite64be_lo_hi iowrite64be_lo_hi +static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32be(val, addr + sizeof(u32)); + iowrite32be(val >> 32, addr); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_lo_hi +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_lo_hi +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_lo_hi +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_lo_hi +#endif + #endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ -- cgit From 51c48b310183ab6ba5419edfc6a8de889cc04521 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 19 Jan 2019 11:35:04 -0600 Subject: PCI: Probe bridge window attributes once at enumeration-time pci_bridge_check_ranges() determines whether a bridge supports the optional I/O and prefetchable memory windows and sets the flag bits in the bridge resources. This *could* be done once during enumeration except that the resource allocation code completely clears the flag bits, e.g., in the pci_assign_unassigned_bridge_resources() path. The problem with pci_bridge_check_ranges() in the resource allocation path is that we may allocate resources after devices have been claimed by drivers, and pci_bridge_check_ranges() *changes* the window registers to determine whether they're writable. This may break concurrent accesses to devices behind the bridge. Add a new pci_read_bridge_windows() to determine whether a bridge supports the optional windows, call it once during enumeration, remember the results, and change pci_bridge_check_ranges() so it doesn't touch the bridge windows but sets the flag bits based on those remembered results. Link: https://lore.kernel.org/linux-pci/1506151482-113560-1-git-send-email-wangzhou1@hisilicon.com Link: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg02082.html Reported-by: Yandong Xu Tested-by: Yandong Xu Signed-off-by: Bjorn Helgaas Cc: Michael S. Tsirkin Cc: Sagi Grimberg Cc: Ofer Hayut Cc: Roy Shterman Cc: Keith Busch Cc: Zhou Wang --- drivers/pci/probe.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/setup-bus.c | 45 ++++-------------------------------------- include/linux/pci.h | 3 +++ 3 files changed, 59 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 257b9f6f2ebb..2ef8b954c65a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -348,6 +348,57 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) } } +static void pci_read_bridge_windows(struct pci_dev *bridge) +{ + u16 io; + u32 pmem, tmp; + + pci_read_config_word(bridge, PCI_IO_BASE, &io); + if (!io) { + pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0); + pci_read_config_word(bridge, PCI_IO_BASE, &io); + pci_write_config_word(bridge, PCI_IO_BASE, 0x0); + } + if (io) + bridge->io_window = 1; + + /* + * DECchip 21050 pass 2 errata: the bridge may miss an address + * disconnect boundary by one PCI data phase. Workaround: do not + * use prefetching on this device. + */ + if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001) + return; + + pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); + if (!pmem) { + pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, + 0xffe0fff0); + pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); + pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); + } + if (!pmem) + return; + + bridge->pref_window = 1; + + if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { + + /* + * Bridge claims to have a 64-bit prefetchable memory + * window; verify that the upper bits are actually + * writable. + */ + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &pmem); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, + 0xffffffff); + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, pmem); + if (tmp) + bridge->pref_64_window = 1; + } +} + static void pci_read_bridge_io(struct pci_bus *child) { struct pci_dev *dev = child->self; @@ -1739,6 +1790,7 @@ int pci_setup_device(struct pci_dev *dev) pci_read_irq(dev); dev->transparent = ((dev->class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); + pci_read_bridge_windows(dev); set_pcie_hotplug_bridge(dev); pos = pci_find_capability(dev, PCI_CAP_ID_SSVID); if (pos) { diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ed960436df5e..1941bb0a6c13 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -735,58 +735,21 @@ int pci_claim_bridge_resource(struct pci_dev *bridge, int i) base/limit registers must be read-only and read as 0. */ static void pci_bridge_check_ranges(struct pci_bus *bus) { - u16 io; - u32 pmem; struct pci_dev *bridge = bus->self; - struct resource *b_res; + struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; - b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; b_res[1].flags |= IORESOURCE_MEM; - pci_read_config_word(bridge, PCI_IO_BASE, &io); - if (!io) { - pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0); - pci_read_config_word(bridge, PCI_IO_BASE, &io); - pci_write_config_word(bridge, PCI_IO_BASE, 0x0); - } - if (io) + if (bridge->io_window) b_res[0].flags |= IORESOURCE_IO; - /* DECchip 21050 pass 2 errata: the bridge may miss an address - disconnect boundary by one PCI data phase. - Workaround: do not use prefetching on this device. */ - if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001) - return; - - pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); - if (!pmem) { - pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, - 0xffe0fff0); - pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); - pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); - } - if (pmem) { + if (bridge->pref_window) { b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH; - if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == - PCI_PREF_RANGE_TYPE_64) { + if (bridge->pref_64_window) { b_res[2].flags |= IORESOURCE_MEM_64; b_res[2].flags |= PCI_PREF_RANGE_TYPE_64; } } - - /* double check if bridge does support 64 bit pref */ - if (b_res[2].flags & IORESOURCE_MEM_64) { - u32 mem_base_hi, tmp; - pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, - &mem_base_hi); - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, - 0xffffffff); - pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); - if (!tmp) - b_res[2].flags &= ~IORESOURCE_MEM_64; - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, - mem_base_hi); - } } /* Helper function for sizing routines: find first available diff --git a/include/linux/pci.h b/include/linux/pci.h index 65f1d8c2f082..40b327b814aa 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -373,6 +373,9 @@ struct pci_dev { bool match_driver; /* Skip attaching driver */ unsigned int transparent:1; /* Subtractive decode bridge */ + unsigned int io_window:1; /* Bridge has I/O window */ + unsigned int pref_window:1; /* Bridge has pref mem window */ + unsigned int pref_64_window:1; /* Pref mem window is 64-bit */ unsigned int multifunction:1; /* Multi-function device */ unsigned int is_busmaster:1; /* Is busmaster */ -- cgit From 5b93ac542301026eff8954589cf59f801d03db3e Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Thu, 10 Jan 2019 09:32:02 +0530 Subject: OPP: Add support for parsing the 'opp-level' property Now that the OPP bindings are updated to include an optional 'opp-level' property, add support to parse it from device tree and store it as part of dev_pm_opp structure. Also add and export an helper 'dev_pm_opp_get_level()' that can be used to get the level value read from device tree when present. Reviewed-by: Stephen Boyd Acked-by: Viresh Kumar Signed-off-by: Rajendra Nayak Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/opp/core.c | 18 ++++++++++++++++++ drivers/opp/of.c | 2 ++ drivers/opp/opp.h | 2 ++ include/linux/pm_opp.h | 7 +++++++ 4 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e5507add8f04..90b78a122be9 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -130,6 +130,24 @@ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq); +/** + * dev_pm_opp_get_level() - Gets the level corresponding to an available opp + * @opp: opp for which level value has to be returned for + * + * Return: level read from device tree corresponding to the opp, else + * return 0. + */ +unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) +{ + if (IS_ERR_OR_NULL(opp) || !opp->available) { + pr_err("%s: Invalid parameters\n", __func__); + return 0; + } + + return opp->level; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_level); + /** * dev_pm_opp_is_turbo() - Returns if opp is turbo OPP or not * @opp: opp for which turbo mode is being verified diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 06f0f632ec47..1779f2c93291 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -594,6 +594,8 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, new_opp->rate = (unsigned long)rate; } + of_property_read_u32(np, "opp-level", &new_opp->level); + /* Check if the OPP supports hardware's hierarchy of versions or not */ if (!_opp_is_supported(dev, opp_table, np)) { dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index e24d81497375..4458175aa661 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -60,6 +60,7 @@ extern struct list_head opp_tables; * @suspend: true if suspend OPP * @pstate: Device's power domain's performance state. * @rate: Frequency in hertz + * @level: Performance level * @supplies: Power supplies voltage/current values * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's * frequency from any other OPP's frequency. @@ -80,6 +81,7 @@ struct dev_pm_opp { bool suspend; unsigned int pstate; unsigned long rate; + unsigned int level; struct dev_pm_opp_supply *supplies; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0a2a88e5a383..473d2c7516f0 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -86,6 +86,8 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); +unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp); + bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); @@ -157,6 +159,11 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) return 0; } +static inline unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) +{ + return 0; +} + static inline bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp) { return false; -- cgit From 643fa9612bf1a29153eee46fd398117632f93cbe Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Wed, 12 Dec 2018 15:20:12 +0530 Subject: fscrypt: remove filesystem specific build config option In order to have a common code base for fscrypt "post read" processing for all filesystems which support encryption, this commit removes filesystem specific build config option (e.g. CONFIG_EXT4_FS_ENCRYPTION) and replaces it with a build option (i.e. CONFIG_FS_ENCRYPTION) whose value affects all the filesystems making use of fscrypt. Reviewed-by: Eric Biggers Signed-off-by: Chandan Rajendra Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 4 +- arch/mips/configs/generic_defconfig | 2 +- arch/nds32/configs/defconfig | 2 +- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/performance_defconfig | 2 +- fs/crypto/Kconfig | 5 +- fs/crypto/fscrypt_private.h | 1 - fs/ext4/Kconfig | 15 -- fs/ext4/dir.c | 2 - fs/ext4/ext4.h | 7 +- fs/ext4/inode.c | 8 +- fs/ext4/ioctl.c | 4 +- fs/ext4/namei.c | 10 +- fs/ext4/page-io.c | 6 +- fs/ext4/readpage.c | 2 +- fs/ext4/super.c | 6 +- fs/ext4/sysfs.c | 4 +- fs/f2fs/Kconfig | 12 +- fs/f2fs/f2fs.h | 7 +- fs/f2fs/super.c | 8 +- fs/f2fs/sysfs.c | 4 +- fs/ubifs/Kconfig | 12 +- fs/ubifs/Makefile | 2 +- fs/ubifs/ioctl.c | 4 +- fs/ubifs/sb.c | 2 +- fs/ubifs/super.c | 2 +- fs/ubifs/ubifs.h | 5 +- include/linux/fs.h | 4 +- include/linux/fscrypt.h | 416 +++++++++++++++++++++++++++++++- include/linux/fscrypt_notsupp.h | 231 ------------------ include/linux/fscrypt_supp.h | 204 ---------------- 31 files changed, 460 insertions(+), 535 deletions(-) delete mode 100644 include/linux/fscrypt_notsupp.h delete mode 100644 include/linux/fscrypt_supp.h (limited to 'include/linux') diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 3a7b60521b94..43dd989e2a3f 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -343,9 +343,9 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory - ``ENOTTY``: this type of filesystem does not implement encryption - ``EOPNOTSUPP``: the kernel was not configured with encryption - support for this filesystem, or the filesystem superblock has not + support for filesystems, or the filesystem superblock has not had encryption enabled on it. (For example, to use encryption on an - ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the + ext4 filesystem, CONFIG_FS_ENCRYPTION must be enabled in the kernel config, and the superblock must have had the "encrypt" feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O encrypt``.) diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig index 7c138dab87df..5d80521e5d5a 100644 --- a/arch/mips/configs/generic_defconfig +++ b/arch/mips/configs/generic_defconfig @@ -59,7 +59,7 @@ CONFIG_HID_MONTEREY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y CONFIG_FUSE_FS=y CONFIG_CUSE=y diff --git a/arch/nds32/configs/defconfig b/arch/nds32/configs/defconfig index 2546d8770785..65ce9259081b 100644 --- a/arch/nds32/configs/defconfig +++ b/arch/nds32/configs/defconfig @@ -74,7 +74,7 @@ CONFIG_GENERIC_PHY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y +CONFIG_FS_ENCRYPTION=y CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c69cb04b7a59..9824c7bad9d4 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -500,7 +500,6 @@ CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -520,6 +519,7 @@ CONFIG_BTRFS_DEBUG=y CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 32f539dc9c19..4fcbe5792744 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -497,7 +497,6 @@ CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -515,6 +514,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 284b589b4774..f0de238000c0 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,5 +1,5 @@ config FS_ENCRYPTION - tristate "FS Encryption (Per-file encryption)" + bool "FS Encryption (Per-file encryption)" select CRYPTO select CRYPTO_AES select CRYPTO_CBC @@ -12,4 +12,5 @@ config FS_ENCRYPTION Enable encryption of files and directories. This feature is similar to ecryptfs, but it is more memory efficient since it avoids caching the encrypted and - decrypted pages in the page cache. + decrypted pages in the page cache. Currently Ext4, + F2FS and UBIFS make use of this feature. diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 7424f851eb5c..7da276159593 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -12,7 +12,6 @@ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H -#define __FS_HAS_ENCRYPTION 1 #include #include diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index a453cc87082b..031e5a82d556 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -96,21 +96,6 @@ config EXT4_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. -config EXT4_ENCRYPTION - bool "Ext4 Encryption" - depends on EXT4_FS - select FS_ENCRYPTION - help - Enable encryption of ext4 files and directories. This - feature is similar to ecryptfs, but it is more memory - efficient since it avoids caching the encrypted and - decrypted pages in the page cache. - -config EXT4_FS_ENCRYPTION - bool - default y - depends on EXT4_ENCRYPTION - config EXT4_DEBUG bool "EXT4 debugging support" depends on EXT4_FS diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index fb7a64ea5679..0ccd51f72048 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -283,9 +283,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) done: err = 0; errout: -#ifdef CONFIG_EXT4_FS_ENCRYPTION fscrypt_fname_free_buffer(&fstr); -#endif brelse(bh); return err; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index afdb9ad8be0e..5012ddb6daf9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -40,7 +40,6 @@ #include #endif -#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION) #include #include @@ -1326,7 +1325,7 @@ struct ext4_super_block { #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ #define EXT4_MF_TEST_DUMMY_ENCRYPTION 0x0004 -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \ EXT4_MF_TEST_DUMMY_ENCRYPTION)) #else @@ -2051,7 +2050,7 @@ struct ext4_filename { const struct qstr *usr_fname; struct fscrypt_str disk_name; struct dx_hash_info hinfo; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif }; @@ -2279,7 +2278,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static inline int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct ext4_filename *fname) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 71bd2d28f58d..4356ef6d728e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1150,7 +1150,7 @@ int do_journal_get_write_access(handle_t *handle, return ret; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block) { @@ -1302,7 +1302,7 @@ retry_journal: /* In case writeback began while the page was unlocked */ wait_for_stable_page(page); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (ext4_should_dioread_nolock(inode)) ret = ext4_block_write_begin(page, pos, len, ext4_get_block_unwritten); @@ -3104,7 +3104,7 @@ retry_journal: /* In case writeback began while the page was unlocked */ wait_for_stable_page(page); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION ret = ext4_block_write_begin(page, pos, len, ext4_da_get_block_prep); #else @@ -3879,7 +3879,7 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t offset = iocb->ki_pos; ssize_t ret; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) return 0; #endif diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d37dafa1d133..d26bcac291bb 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -210,7 +210,7 @@ journal_err_out: return err; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int uuid_is_zero(__u8 u[16]) { int i; @@ -978,7 +978,7 @@ resizefs_out: return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); case EXT4_IOC_GET_ENCRYPTION_PWSALT: { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION int err, err2; struct ext4_sb_info *sbi = EXT4_SB(sb); handle_t *handle; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index be6cb69beb12..980166a8122a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -612,7 +612,7 @@ static struct stats dx_show_leaf(struct inode *dir, { if (show_names) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION int len; char *name; struct fscrypt_str fname_crypto_str = @@ -984,7 +984,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0)); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION /* Check if the directory is encrypted */ if (IS_ENCRYPTED(dir)) { err = fscrypt_get_encryption_info(dir); @@ -1047,7 +1047,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, } errout: brelse(bh); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION fscrypt_fname_free_buffer(&fname_crypto_str); #endif return count; @@ -1267,7 +1267,7 @@ static inline bool ext4_match(const struct ext4_filename *fname, f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION f.crypto_buf = fname->crypto_buf; #endif return fscrypt_match_name(&f, de->name, de->name_len); @@ -1498,7 +1498,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, ext4_lblk_t block; int retval; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION *res_dir = NULL; #endif frame = dx_probe(fname, dir, NULL, frames); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index c398b55da854..b9d6cabe2ea8 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -66,7 +66,7 @@ static void ext4_finish_bio(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION struct page *data_page = NULL; #endif struct buffer_head *bh, *head; @@ -78,7 +78,7 @@ static void ext4_finish_bio(struct bio *bio) if (!page) continue; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (!page->mapping) { /* The bounce data pages are unmapped. */ data_page = page; @@ -111,7 +111,7 @@ static void ext4_finish_bio(struct bio *bio) bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); local_irq_restore(flags); if (!under_io) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (data_page) fscrypt_restore_control_page(data_page); #endif diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 52d3ff5a9db1..b18881eb8da6 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -49,7 +49,7 @@ static inline bool ext4_bio_encrypted(struct bio *bio) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION return unlikely(bio->bi_private != NULL); #else return false; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb12d3c17c1b..60da0a6e4d86 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1232,7 +1232,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, return try_to_free_buffers(page); } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int ext4_get_context(struct inode *inode, void *ctx, size_t len) { return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, @@ -1922,7 +1922,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); } else if (token == Opt_test_dummy_encryption) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION; ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); @@ -4167,7 +4167,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &ext4_cryptops; #endif #ifdef CONFIG_QUOTA diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 9212a026a1f1..5e4e78fc0b3a 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -224,7 +224,7 @@ static struct attribute *ext4_attrs[] = { EXT4_ATTR_FEATURE(lazy_itable_init); EXT4_ATTR_FEATURE(batched_discard); EXT4_ATTR_FEATURE(meta_bg_resize); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION EXT4_ATTR_FEATURE(encryption); #endif EXT4_ATTR_FEATURE(metadata_csum_seed); @@ -233,7 +233,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), ATTR_LIST(batched_discard), ATTR_LIST(meta_bg_resize), -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), #endif ATTR_LIST(metadata_csum_seed), diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 9a20ef42fadd..e57cc754d543 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -3,6 +3,7 @@ config F2FS_FS depends on BLOCK select CRYPTO select CRYPTO_CRC32 + select F2FS_FS_XATTR if FS_ENCRYPTION help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on @@ -70,17 +71,6 @@ config F2FS_CHECK_FS If you want to improve the performance, say N. -config F2FS_FS_ENCRYPTION - bool "F2FS Encryption" - depends on F2FS_FS - depends on F2FS_FS_XATTR - select FS_ENCRYPTION - help - Enable encryption of f2fs files and directories. This - feature is similar to ecryptfs, but it is more memory - efficient since it avoids caching the encrypted and - decrypted pages in the page cache. - config F2FS_IO_TRACE bool "F2FS IO tracer" depends on F2FS_FS diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9ef6f38e51cc..95cc885ccb2f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,7 +24,6 @@ #include #include -#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION) #include #ifdef CONFIG_F2FS_CHECK_FS @@ -1137,7 +1136,7 @@ enum fsync_mode { FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) \ (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) #else @@ -3470,7 +3469,7 @@ static inline bool f2fs_encrypted_file(struct inode *inode) static inline void f2fs_set_encrypted_inode(struct inode *inode) { -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION file_set_encrypt(inode); f2fs_set_inode_flags(inode); #endif @@ -3549,7 +3548,7 @@ static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) static inline bool f2fs_may_encrypt(struct inode *inode) { -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION umode_t mode = inode->i_mode; return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c46a1d4318d4..0f3db3a8e5cb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -757,7 +757,7 @@ static int parse_options(struct super_block *sb, char *options) kvfree(name); break; case Opt_test_dummy_encryption: -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (!f2fs_sb_has_encrypt(sbi)) { f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); return -EINVAL; @@ -1390,7 +1390,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",whint_mode=%s", "user-based"); else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) seq_printf(seq, ",whint_mode=%s", "fs-based"); -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (F2FS_OPTION(sbi).test_dummy_encryption) seq_puts(seq, ",test_dummy_encryption"); #endif @@ -2154,7 +2154,7 @@ static const struct super_operations f2fs_sops = { .remount_fs = f2fs_remount, }; -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) { return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, @@ -3116,7 +3116,7 @@ try_onemore: #endif sb->s_op = &f2fs_sops; -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; #endif sb->s_xattr = f2fs_xattr_handlers; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 0575edbe3ed6..70da6801c86f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -431,7 +431,7 @@ F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); #endif #ifdef CONFIG_BLK_DEV_ZONED @@ -492,7 +492,7 @@ static struct attribute *f2fs_attrs[] = { }; static struct attribute *f2fs_feat_attrs[] = { -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), #endif #ifdef CONFIG_BLK_DEV_ZONED diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index bc1e082d921d..9da2f135121b 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -8,6 +8,7 @@ config UBIFS_FS select CRYPTO_LZO if UBIFS_FS_LZO select CRYPTO_DEFLATE if UBIFS_FS_ZLIB select CRYPTO_HASH_INFO + select UBIFS_FS_XATTR if FS_ENCRYPTION depends on MTD_UBI help UBIFS is a file system for flash devices which works on top of UBI. @@ -60,17 +61,6 @@ config UBIFS_FS_XATTR If unsure, say Y. -config UBIFS_FS_ENCRYPTION - bool "UBIFS Encryption" - depends on UBIFS_FS_XATTR && BLOCK - select FS_ENCRYPTION - default n - help - Enable encryption of UBIFS files and directories. This - feature is similar to ecryptfs, but it is more memory - efficient since it avoids caching the encrypted and - decrypted pages in the page cache. - config UBIFS_FS_SECURITY bool "UBIFS Security Labels" depends on UBIFS_FS_XATTR diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile index 5f838319c8d5..5c4b845754a7 100644 --- a/fs/ubifs/Makefile +++ b/fs/ubifs/Makefile @@ -6,6 +6,6 @@ ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o debug.o ubifs-y += misc.o -ubifs-$(CONFIG_UBIFS_FS_ENCRYPTION) += crypto.o +ubifs-$(CONFIG_FS_ENCRYPTION) += crypto.o ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o ubifs-$(CONFIG_UBIFS_FS_AUTHENTICATION) += auth.o diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 0164bcc827f8..0f9c362a3402 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -185,7 +185,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } case FS_IOC_SET_ENCRYPTION_POLICY: { -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION struct ubifs_info *c = inode->i_sb->s_fs_info; err = ubifs_enable_encryption(c); @@ -198,7 +198,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) #endif } case FS_IOC_GET_ENCRYPTION_POLICY: { -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION return fscrypt_ioctl_get_policy(file, (void __user *)arg); #else return -EOPNOTSUPP; diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 3da90c951c23..67fac1e8adfb 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -748,7 +748,7 @@ int ubifs_read_superblock(struct ubifs_info *c) goto out; } -#ifndef CONFIG_UBIFS_FS_ENCRYPTION +#ifndef CONFIG_FS_ENCRYPTION if (c->encrypted) { ubifs_err(c, "file system contains encrypted files but UBIFS" " was built without crypto support."); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1fac1133dadd..8dc2818fdd84 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2146,7 +2146,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_UBIFS_FS_XATTR sb->s_xattr = ubifs_xattr_handlers; #endif -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &ubifs_crypt_operations; #endif diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 38401adaa00d..1ae12900e01d 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -43,7 +43,6 @@ #include #include -#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION) #include #include "ubifs-media.h" @@ -142,7 +141,7 @@ */ #define WORST_COMPR_FACTOR 2 -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION #define UBIFS_CIPHER_BLOCK_SIZE FS_CRYPTO_BLOCK_SIZE #else #define UBIFS_CIPHER_BLOCK_SIZE 0 @@ -2072,7 +2071,7 @@ int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len, #include "misc.h" #include "key.h" -#ifndef CONFIG_UBIFS_FS_ENCRYPTION +#ifndef CONFIG_FS_ENCRYPTION static inline int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, unsigned int in_len, unsigned int *out_len, diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..ba7889bb9ef6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -698,7 +698,7 @@ struct inode { struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) +#ifdef CONFIG_FS_ENCRYPTION struct fscrypt_info *i_crypt_info; #endif @@ -1403,7 +1403,7 @@ struct super_block { void *s_security; #endif const struct xattr_handler **s_xattr; -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) +#ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 952ab97af325..eec604840568 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -2,9 +2,8 @@ /* * fscrypt.h: declarations for per-file encryption * - * Filesystems that implement per-file encryption include this header - * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem - * is being built with encryption support or not. + * Filesystems that implement per-file encryption must include this header + * file. * * Copyright (C) 2015, Google, Inc. * @@ -15,6 +14,8 @@ #define _LINUX_FSCRYPT_H #include +#include +#include #define FS_CRYPTO_BLOCK_SIZE 16 @@ -42,11 +43,410 @@ struct fscrypt_name { /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 -#if __FS_HAS_ENCRYPTION -#include -#else -#include -#endif +#ifdef CONFIG_FS_ENCRYPTION +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto operations for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned int max_namelen; +}; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode); +} + +/* crypto.c */ +extern void fscrypt_enqueue_decrypt_work(struct work_struct *); +extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); +extern void fscrypt_release_ctx(struct fscrypt_ctx *); +extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, + unsigned int, unsigned int, + u64, gfp_t); +extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, + unsigned int, u64); + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + +extern void fscrypt_restore_control_page(struct page *); + +/* policy.c */ +extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); +extern int fscrypt_ioctl_get_policy(struct file *, void __user *); +extern int fscrypt_has_permitted_context(struct inode *, struct inode *); +extern int fscrypt_inherit_context(struct inode *, struct inode *, + void *, bool); +/* keyinfo.c */ +extern int fscrypt_get_encryption_info(struct inode *); +extern void fscrypt_put_encryption_info(struct inode *); + +/* fname.c */ +extern int fscrypt_setup_filename(struct inode *, const struct qstr *, + int lookup, struct fscrypt_name *); + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + kfree(fname->crypto_buf.name); +} + +extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, + struct fscrypt_str *); +extern void fscrypt_fname_free_buffer(struct fscrypt_str *); +extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, + const struct fscrypt_str *, struct fscrypt_str *); + +#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 + +/* Extracts the second-to-last ciphertext block; see explanation below */ +#define FSCRYPT_FNAME_DIGEST(name, len) \ + ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ + FS_CRYPTO_BLOCK_SIZE)) + +#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE + +/** + * fscrypt_digested_name - alternate identifier for an on-disk filename + * + * When userspace lists an encrypted directory without access to the key, + * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE + * bytes are shown in this abbreviated form (base64-encoded) rather than as the + * full ciphertext (base64-encoded). This is necessary to allow supporting + * filenames up to NAME_MAX bytes, since base64 encoding expands the length. + * + * To make it possible for filesystems to still find the correct directory entry + * despite not knowing the full on-disk name, we encode any filesystem-specific + * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, + * followed by the second-to-last ciphertext block of the filename. Due to the + * use of the CBC-CTS encryption mode, the second-to-last ciphertext block + * depends on the full plaintext. (Note that ciphertext stealing causes the + * last two blocks to appear "flipped".) This makes accidental collisions very + * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they + * share the same filesystem-specific hashes. + * + * However, this scheme isn't immune to intentional collisions, which can be + * created by anyone able to create arbitrary plaintext filenames and view them + * without the key. Making the "digest" be a real cryptographic hash like + * SHA-256 over the full ciphertext would prevent this, although it would be + * less efficient and harder to implement, especially since the filesystem would + * need to calculate it for each directory entry examined during a search. + */ +struct fscrypt_digested_name { + u32 hash; + u32 minor_hash; + u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; +}; + +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and a filename in it is + * very long, then we won't have the full disk_name and we'll instead need to + * match against the fscrypt_digested_name. + * + * Return: %true if the name matches, otherwise %false. + */ +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + if (unlikely(!fname->disk_name.name)) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) + return false; + if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) + return false; + return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), + n->digest, FSCRYPT_FNAME_DIGEST_SIZE); + } + + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + +/* bio.c */ +extern void fscrypt_decrypt_bio(struct bio *); +extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio); +extern void fscrypt_pullback_bio_page(struct page **, bool); +extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, + unsigned int); + +/* hooks.c */ +extern int fscrypt_file_open(struct inode *inode, struct file *filp); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link); +extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, + struct fscrypt_str *disk_link); +extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size, + struct delayed_call *done); +#else /* !CONFIG_FS_ENCRYPTION */ + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return false; +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return false; +} + +/* crypto.c */ +static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) +{ +} + +static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, + gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) +{ + return; +} + +static inline struct page *fscrypt_encrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, + unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int fscrypt_decrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num) +{ + return -EOPNOTSUPP; +} + +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} + +static inline void fscrypt_restore_control_page(struct page *page) +{ + return; +} + +/* policy.c */ +static inline int fscrypt_ioctl_set_policy(struct file *filp, + const void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_has_permitted_context(struct inode *parent, + struct inode *child) +{ + return 0; +} + +static inline int fscrypt_inherit_context(struct inode *parent, + struct inode *child, + void *fs_data, bool preload) +{ + return -EOPNOTSUPP; +} + +/* keyinfo.c */ +static inline int fscrypt_get_encryption_info(struct inode *inode) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_put_encryption_info(struct inode *inode) +{ + return; +} + + /* fname.c */ +static inline int fscrypt_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct fscrypt_name *fname) +{ + if (IS_ENCRYPTED(dir)) + return -EOPNOTSUPP; + + memset(fname, 0, sizeof(struct fscrypt_name)); + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; +} + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + return; +} + +static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, + u32 max_encrypted_len, + struct fscrypt_str *crypto_str) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) +{ + return; +} + +static inline int fscrypt_fname_disk_to_usr(struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) +{ + return -EOPNOTSUPP; +} + +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + /* Encryption support disabled; use standard comparison */ + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + +/* bio.c */ +static inline void fscrypt_decrypt_bio(struct bio *bio) +{ +} + +static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio) +{ +} + +static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) +{ + return; +} + +static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, + sector_t pblk, unsigned int len) +{ + return -EOPNOTSUPP; +} + +/* hooks.c */ + +static inline int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + if (IS_ENCRYPTED(inode)) + return -EOPNOTSUPP; + return 0; +} + +static inline int __fscrypt_prepare_link(struct inode *inode, + struct inode *dir) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_symlink(struct inode *dir, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + + +static inline int __fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline const char *fscrypt_get_symlink(struct inode *inode, + const void *caddr, + unsigned int max_size, + struct delayed_call *done) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif /* !CONFIG_FS_ENCRYPTION */ /** * fscrypt_require_key - require an inode's encryption key diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h deleted file mode 100644 index ee8b43e4c15a..000000000000 --- a/include/linux/fscrypt_notsupp.h +++ /dev/null @@ -1,231 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fscrypt_notsupp.h - * - * This stubs out the fscrypt functions for filesystems configured without - * encryption support. - * - * Do not include this file directly. Use fscrypt.h instead! - */ -#ifndef _LINUX_FSCRYPT_H -#error "Incorrect include of linux/fscrypt_notsupp.h!" -#endif - -#ifndef _LINUX_FSCRYPT_NOTSUPP_H -#define _LINUX_FSCRYPT_NOTSUPP_H - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return false; -} - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - return false; -} - -/* crypto.c */ -static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) -{ -} - -static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, - gfp_t gfp_flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) -{ - return; -} - -static inline struct page *fscrypt_encrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, - unsigned int offs, - u64 lblk_num, gfp_t gfp_flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline int fscrypt_decrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, unsigned int offs, - u64 lblk_num) -{ - return -EOPNOTSUPP; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -} - -static inline void fscrypt_restore_control_page(struct page *page) -{ - return; -} - -/* policy.c */ -static inline int fscrypt_ioctl_set_policy(struct file *filp, - const void __user *arg) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_has_permitted_context(struct inode *parent, - struct inode *child) -{ - return 0; -} - -static inline int fscrypt_inherit_context(struct inode *parent, - struct inode *child, - void *fs_data, bool preload) -{ - return -EOPNOTSUPP; -} - -/* keyinfo.c */ -static inline int fscrypt_get_encryption_info(struct inode *inode) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_put_encryption_info(struct inode *inode) -{ - return; -} - - /* fname.c */ -static inline int fscrypt_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct fscrypt_name *fname) -{ - if (IS_ENCRYPTED(dir)) - return -EOPNOTSUPP; - - memset(fname, 0, sizeof(struct fscrypt_name)); - fname->usr_fname = iname; - fname->disk_name.name = (unsigned char *)iname->name; - fname->disk_name.len = iname->len; - return 0; -} - -static inline void fscrypt_free_filename(struct fscrypt_name *fname) -{ - return; -} - -static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 max_encrypted_len, - struct fscrypt_str *crypto_str) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) -{ - return; -} - -static inline int fscrypt_fname_disk_to_usr(struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - /* Encryption support disabled; use standard comparison */ - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} - -/* bio.c */ -static inline void fscrypt_decrypt_bio(struct bio *bio) -{ -} - -static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio) -{ -} - -static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) -{ - return; -} - -static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) -{ - return -EOPNOTSUPP; -} - -/* hooks.c */ - -static inline int fscrypt_file_open(struct inode *inode, struct file *filp) -{ - if (IS_ENCRYPTED(inode)) - return -EOPNOTSUPP; - return 0; -} - -static inline int __fscrypt_prepare_link(struct inode *inode, - struct inode *dir) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_prepare_rename(struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry, - unsigned int flags) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_prepare_lookup(struct inode *dir, - struct dentry *dentry) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_prepare_symlink(struct inode *dir, - unsigned int len, - unsigned int max_len, - struct fscrypt_str *disk_link) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_encrypt_symlink(struct inode *inode, - const char *target, - unsigned int len, - struct fscrypt_str *disk_link) -{ - return -EOPNOTSUPP; -} - -static inline const char *fscrypt_get_symlink(struct inode *inode, - const void *caddr, - unsigned int max_size, - struct delayed_call *done) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -#endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h deleted file mode 100644 index 6456c6b2005f..000000000000 --- a/include/linux/fscrypt_supp.h +++ /dev/null @@ -1,204 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fscrypt_supp.h - * - * Do not include this file directly. Use fscrypt.h instead! - */ -#ifndef _LINUX_FSCRYPT_H -#error "Incorrect include of linux/fscrypt_supp.h!" -#endif - -#ifndef _LINUX_FSCRYPT_SUPP_H -#define _LINUX_FSCRYPT_SUPP_H - -#include -#include - -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto operations for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - bool (*dummy_context)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned int max_namelen; -}; - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return (inode->i_crypt_info != NULL); -} - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - return inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode); -} - -/* crypto.c */ -extern void fscrypt_enqueue_decrypt_work(struct work_struct *); -extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); -extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, - unsigned int, unsigned int, - u64, gfp_t); -extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, - unsigned int, u64); - -static inline struct page *fscrypt_control_page(struct page *page) -{ - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -} - -extern void fscrypt_restore_control_page(struct page *); - -/* policy.c */ -extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); -extern int fscrypt_ioctl_get_policy(struct file *, void __user *); -extern int fscrypt_has_permitted_context(struct inode *, struct inode *); -extern int fscrypt_inherit_context(struct inode *, struct inode *, - void *, bool); -/* keyinfo.c */ -extern int fscrypt_get_encryption_info(struct inode *); -extern void fscrypt_put_encryption_info(struct inode *); - -/* fname.c */ -extern int fscrypt_setup_filename(struct inode *, const struct qstr *, - int lookup, struct fscrypt_name *); - -static inline void fscrypt_free_filename(struct fscrypt_name *fname) -{ - kfree(fname->crypto_buf.name); -} - -extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, - struct fscrypt_str *); -extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); - -#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 - -/* Extracts the second-to-last ciphertext block; see explanation below */ -#define FSCRYPT_FNAME_DIGEST(name, len) \ - ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ - FS_CRYPTO_BLOCK_SIZE)) - -#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE - -/** - * fscrypt_digested_name - alternate identifier for an on-disk filename - * - * When userspace lists an encrypted directory without access to the key, - * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE - * bytes are shown in this abbreviated form (base64-encoded) rather than as the - * full ciphertext (base64-encoded). This is necessary to allow supporting - * filenames up to NAME_MAX bytes, since base64 encoding expands the length. - * - * To make it possible for filesystems to still find the correct directory entry - * despite not knowing the full on-disk name, we encode any filesystem-specific - * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, - * followed by the second-to-last ciphertext block of the filename. Due to the - * use of the CBC-CTS encryption mode, the second-to-last ciphertext block - * depends on the full plaintext. (Note that ciphertext stealing causes the - * last two blocks to appear "flipped".) This makes accidental collisions very - * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they - * share the same filesystem-specific hashes. - * - * However, this scheme isn't immune to intentional collisions, which can be - * created by anyone able to create arbitrary plaintext filenames and view them - * without the key. Making the "digest" be a real cryptographic hash like - * SHA-256 over the full ciphertext would prevent this, although it would be - * less efficient and harder to implement, especially since the filesystem would - * need to calculate it for each directory entry examined during a search. - */ -struct fscrypt_digested_name { - u32 hash; - u32 minor_hash; - u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; -}; - -/** - * fscrypt_match_name() - test whether the given name matches a directory entry - * @fname: the name being searched for - * @de_name: the name from the directory entry - * @de_name_len: the length of @de_name in bytes - * - * Normally @fname->disk_name will be set, and in that case we simply compare - * that to the name stored in the directory entry. The only exception is that - * if we don't have the key for an encrypted directory and a filename in it is - * very long, then we won't have the full disk_name and we'll instead need to - * match against the fscrypt_digested_name. - * - * Return: %true if the name matches, otherwise %false. - */ -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - if (unlikely(!fname->disk_name.name)) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) - return false; - if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) - return false; - return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), - n->digest, FSCRYPT_FNAME_DIGEST_SIZE); - } - - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} - -/* bio.c */ -extern void fscrypt_decrypt_bio(struct bio *); -extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio); -extern void fscrypt_pullback_bio_page(struct page **, bool); -extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, - unsigned int); - -/* hooks.c */ -extern int fscrypt_file_open(struct inode *inode, struct file *filp); -extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); -extern int __fscrypt_prepare_rename(struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry, - unsigned int flags); -extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); -extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, - unsigned int max_len, - struct fscrypt_str *disk_link); -extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, - unsigned int len, - struct fscrypt_str *disk_link); -extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, - unsigned int max_size, - struct delayed_call *done); - -#endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit From f5e55e777cc93eae1416f0fa4908e8846b6d7825 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Jan 2019 16:20:21 -0800 Subject: fscrypt: return -EXDEV for incompatible rename or link into encrypted dir Currently, trying to rename or link a regular file, directory, or symlink into an encrypted directory fails with EPERM when the source file is unencrypted or is encrypted with a different encryption policy, and is on the same mountpoint. It is correct for the operation to fail, but the choice of EPERM breaks tools like 'mv' that know to copy rather than rename if they see EXDEV, but don't know what to do with EPERM. Our original motivation for EPERM was to encourage users to securely handle their data. Encrypting files by "moving" them into an encrypted directory can be insecure because the unencrypted data may remain in free space on disk, where it can later be recovered by an attacker. It's much better to encrypt the data from the start, or at least try to securely delete the source data e.g. using the 'shred' program. However, the current behavior hasn't been effective at achieving its goal because users tend to be confused, hack around it, and complain; see e.g. https://github.com/google/fscrypt/issues/76. And in some cases it's actually inconsistent or unnecessary. For example, 'mv'-ing files between differently encrypted directories doesn't work even in cases where it can be secure, such as when in userspace the same passphrase protects both directories. Yet, you *can* already 'mv' unencrypted files into an encrypted directory if the source files are on a different mountpoint, even though doing so is often insecure. There are probably better ways to teach users to securely handle their files. For example, the 'fscrypt' userspace tool could provide a command that migrates unencrypted files into an encrypted directory, acting like 'shred' on the source files and providing appropriate warnings depending on the type of the source filesystem and disk. Receiving errors on unimportant files might also force some users to disable encryption, thus making the behavior counterproductive. It's desirable to make encryption as unobtrusive as possible. Therefore, change the error code from EPERM to EXDEV so that tools looking for EXDEV will fall back to a copy. This, of course, doesn't prevent users from still doing the right things to securely manage their files. Note that this also matches the behavior when a file is renamed between two project quota hierarchies; so there's precedent for using EXDEV for things other than mountpoints. xfstests generic/398 will require an update with this change. [Rewritten from an earlier patch series by Michael Halcrow.] Cc: Michael Halcrow Cc: Joe Richey Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 12 ++++++++++-- fs/crypto/hooks.c | 6 +++--- fs/crypto/policy.c | 3 +-- include/linux/fscrypt.h | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 43dd989e2a3f..08c23b60e016 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -451,10 +451,18 @@ astute users may notice some differences in behavior: - Unencrypted files, or files encrypted with a different encryption policy (i.e. different key, modes, or flags), cannot be renamed or linked into an encrypted directory; see `Encryption policy - enforcement`_. Attempts to do so will fail with EPERM. However, + enforcement`_. Attempts to do so will fail with EXDEV. However, encrypted files can be renamed within an encrypted directory, or into an unencrypted directory. + Note: "moving" an unencrypted file into an encrypted directory, e.g. + with the `mv` program, is implemented in userspace by a copy + followed by a delete. Be aware that the original unencrypted data + may remain recoverable from free space on the disk; prefer to keep + all files encrypted from the very beginning. The `shred` program + may be used to overwrite the source files but isn't guaranteed to be + effective on all filesystems and storage devices. + - Direct I/O is not supported on encrypted files. Attempts to use direct I/O on such files will fall back to buffered I/O. @@ -541,7 +549,7 @@ not be encrypted. Except for those special files, it is forbidden to have unencrypted files, or files encrypted with a different encryption policy, in an encrypted directory tree. Attempts to link or rename such a file into -an encrypted directory will fail with EPERM. This is also enforced +an encrypted directory will fail with EXDEV. This is also enforced during ->lookup() to provide limited protection against offline attacks that try to disable or downgrade encryption in known locations where applications may later write sensitive data. It is recommended diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 926e5df20ec3..56debb1fcf5e 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -58,7 +58,7 @@ int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) return err; if (!fscrypt_has_permitted_context(dir, inode)) - return -EPERM; + return -EXDEV; return 0; } @@ -82,13 +82,13 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ENCRYPTED(new_dir) && !fscrypt_has_permitted_context(new_dir, d_inode(old_dentry))) - return -EPERM; + return -EXDEV; if ((flags & RENAME_EXCHANGE) && IS_ENCRYPTED(old_dir) && !fscrypt_has_permitted_context(old_dir, d_inode(new_dentry))) - return -EPERM; + return -EXDEV; } return 0; } diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index f490de921ce8..bd7eaf9b3f00 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -151,8 +151,7 @@ EXPORT_SYMBOL(fscrypt_ioctl_get_policy); * malicious offline violations of this constraint, while the link and rename * checks are needed to prevent online violations of this constraint. * - * Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail - * the filesystem operation with EPERM. + * Return: 1 if permitted, 0 if forbidden. */ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index eec604840568..e5194fc3983e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -489,7 +489,7 @@ static inline int fscrypt_require_key(struct inode *inode) * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, - * -EPERM if the link would result in an inconsistent encryption policy, or + * -EXDEV if the link would result in an inconsistent encryption policy, or * another -errno code. */ static inline int fscrypt_prepare_link(struct dentry *old_dentry, @@ -519,7 +519,7 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry, * We also verify that the rename will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * - * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the + * Return: 0 on success, -ENOKEY if an encryption key is missing, -EXDEV if the * rename would cause inconsistent encryption policies, or another -errno code. */ static inline int fscrypt_prepare_rename(struct inode *old_dir, -- cgit From ef74f70e5a10cc2a78cc5529e564170cabcda9af Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Sat, 19 Jan 2019 15:42:42 -0500 Subject: gpio: add irq domain activate/deactivate functions This adds the two new functions gpiochip_irq_domain_activate and gpiochip_irq_domain_deactivate that can be used as the activate and deactivate functions in the struct irq_domain_ops. This is for situations where only gpiochip_{lock,unlock}_as_irq needs to be called. SPMI and SSBI GPIO are two users that will initially use these functions. Signed-off-by: Brian Masney Suggested-by: Stephen Boyd Reviewed-by: Stephen Boyd Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/gpio/driver.h | 5 +++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 1651d7f0a303..361a09c8138a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1775,6 +1775,43 @@ static const struct irq_domain_ops gpiochip_domain_ops = { .xlate = irq_domain_xlate_twocell, }; +/** + * gpiochip_irq_domain_activate() - Lock a GPIO to be used as an IRQ + * @domain: The IRQ domain used by this IRQ chip + * @data: Outermost irq_data associated with the IRQ + * @reserve: If set, only reserve an interrupt vector instead of assigning one + * + * This function is a wrapper that calls gpiochip_lock_as_irq() and is to be + * used as the activate function for the &struct irq_domain_ops. The host_data + * for the IRQ domain must be the &struct gpio_chip. + */ +int gpiochip_irq_domain_activate(struct irq_domain *domain, + struct irq_data *data, bool reserve) +{ + struct gpio_chip *chip = domain->host_data; + + return gpiochip_lock_as_irq(chip, data->hwirq); +} +EXPORT_SYMBOL_GPL(gpiochip_irq_domain_activate); + +/** + * gpiochip_irq_domain_deactivate() - Unlock a GPIO used as an IRQ + * @domain: The IRQ domain used by this IRQ chip + * @data: Outermost irq_data associated with the IRQ + * + * This function is a wrapper that will call gpiochip_unlock_as_irq() and is to + * be used as the deactivate function for the &struct irq_domain_ops. The + * host_data for the IRQ domain must be the &struct gpio_chip. + */ +void gpiochip_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *data) +{ + struct gpio_chip *chip = domain->host_data; + + return gpiochip_unlock_as_irq(chip, data->hwirq); +} +EXPORT_SYMBOL_GPL(gpiochip_irq_domain_deactivate); + static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) { if (!gpiochip_irqchip_irq_valid(chip, offset)) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 07cddbf45186..01497910f023 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -472,6 +472,11 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq); void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); +int gpiochip_irq_domain_activate(struct irq_domain *domain, + struct irq_data *data, bool reserve); +void gpiochip_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *data); + void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, unsigned int parent_irq, -- cgit From 275f22148e8720e84b180d9e0cdf8abfd69bac5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 31 Dec 2018 22:22:40 +0100 Subject: ipc: rename old-style shmctl/semctl/msgctl syscalls The behavior of these system calls is slightly different between architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION symbol. Most architectures that implement the split IPC syscalls don't set that symbol and only get the modern version, but alpha, arm, microblaze, mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag. For the architectures that so far only implement sys_ipc(), i.e. m68k, mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior when adding the split syscalls, so we need to distinguish between the two groups of architectures. The method I picked for this distinction is to have a separate system call entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl() does not. The system call tables of the five architectures are changed accordingly. As an additional benefit, we no longer need the configuration specific definition for ipc_parse_version(), it always does the same thing now, but simply won't get called on architectures with the modern interface. A small downside is that on architectures that do set ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points that are never called. They only add a few bytes of bloat, so it seems better to keep them compared to adding yet another Kconfig symbol. I considered adding new syscall numbers for the IPC_64 variants for consistency, but decided against that for now. Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 6 ++--- arch/arm/tools/syscall.tbl | 6 ++--- arch/arm64/include/asm/unistd32.h | 6 ++--- arch/microblaze/kernel/syscalls/syscall.tbl | 6 ++--- arch/mips/kernel/syscalls/syscall_n32.tbl | 6 ++--- arch/mips/kernel/syscalls/syscall_n64.tbl | 6 ++--- arch/xtensa/kernel/syscalls/syscall.tbl | 6 ++--- include/linux/syscalls.h | 3 +++ ipc/msg.c | 39 +++++++++++++++++++++++----- ipc/sem.c | 39 +++++++++++++++++++++++----- ipc/shm.c | 40 ++++++++++++++++++++++++----- ipc/syscall.c | 12 ++++----- ipc/util.h | 21 +++++---------- kernel/sys_ni.c | 3 +++ 14 files changed, 137 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index f920b65e8c49..b0e247287908 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -174,17 +174,17 @@ 187 common osf_alt_sigpending sys_ni_syscall 188 common osf_alt_setsid sys_ni_syscall 199 common osf_swapon sys_swapon -200 common msgctl sys_msgctl +200 common msgctl sys_old_msgctl 201 common msgget sys_msgget 202 common msgrcv sys_msgrcv 203 common msgsnd sys_msgsnd -204 common semctl sys_semctl +204 common semctl sys_old_semctl 205 common semget sys_semget 206 common semop sys_semop 207 common osf_utsname sys_osf_utsname 208 common lchown sys_lchown 209 common shmat sys_shmat -210 common shmctl sys_shmctl +210 common shmctl sys_old_shmctl 211 common shmdt sys_shmdt 212 common shmget sys_shmget 213 common osf_mvalid sys_ni_syscall diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 20ed7e026723..b54b7f2bc24a 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -314,15 +314,15 @@ 297 common recvmsg sys_recvmsg 298 common semop sys_semop sys_oabi_semop 299 common semget sys_semget -300 common semctl sys_semctl +300 common semctl sys_old_semctl 301 common msgsnd sys_msgsnd 302 common msgrcv sys_msgrcv 303 common msgget sys_msgget -304 common msgctl sys_msgctl +304 common msgctl sys_old_msgctl 305 common shmat sys_shmat 306 common shmdt sys_shmdt 307 common shmget sys_shmget -308 common shmctl sys_shmctl +308 common shmctl sys_old_shmctl 309 common add_key sys_add_key 310 common request_key sys_request_key 311 common keyctl sys_keyctl diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 8ca1d4c304f4..d10cce69a4b0 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -622,7 +622,7 @@ __SYSCALL(__NR_semop, sys_semop) #define __NR_semget 299 __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 300 -__SYSCALL(__NR_semctl, compat_sys_semctl) +__SYSCALL(__NR_semctl, compat_sys_old_semctl) #define __NR_msgsnd 301 __SYSCALL(__NR_msgsnd, compat_sys_msgsnd) #define __NR_msgrcv 302 @@ -630,7 +630,7 @@ __SYSCALL(__NR_msgrcv, compat_sys_msgrcv) #define __NR_msgget 303 __SYSCALL(__NR_msgget, sys_msgget) #define __NR_msgctl 304 -__SYSCALL(__NR_msgctl, compat_sys_msgctl) +__SYSCALL(__NR_msgctl, compat_sys_old_msgctl) #define __NR_shmat 305 __SYSCALL(__NR_shmat, compat_sys_shmat) #define __NR_shmdt 306 @@ -638,7 +638,7 @@ __SYSCALL(__NR_shmdt, sys_shmdt) #define __NR_shmget 307 __SYSCALL(__NR_shmget, sys_shmget) #define __NR_shmctl 308 -__SYSCALL(__NR_shmctl, compat_sys_shmctl) +__SYSCALL(__NR_shmctl, compat_sys_old_shmctl) #define __NR_add_key 309 __SYSCALL(__NR_add_key, sys_add_key) #define __NR_request_key 310 diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a24d09e937dd..7cc0f9554da3 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -335,15 +335,15 @@ 325 common semtimedop sys_semtimedop 326 common timerfd_settime sys_timerfd_settime 327 common timerfd_gettime sys_timerfd_gettime -328 common semctl sys_semctl +328 common semctl sys_old_semctl 329 common semget sys_semget 330 common semop sys_semop -331 common msgctl sys_msgctl +331 common msgctl sys_old_msgctl 332 common msgget sys_msgget 333 common msgrcv sys_msgrcv 334 common msgsnd sys_msgsnd 335 common shmat sys_shmat -336 common shmctl sys_shmctl +336 common shmctl sys_old_shmctl 337 common shmdt sys_shmdt 338 common shmget sys_shmget 339 common signalfd4 sys_signalfd4 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 53d5862649ae..cc134b1211aa 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -37,7 +37,7 @@ 27 n32 madvise sys_madvise 28 n32 shmget sys_shmget 29 n32 shmat sys_shmat -30 n32 shmctl compat_sys_shmctl +30 n32 shmctl compat_sys_old_shmctl 31 n32 dup sys_dup 32 n32 dup2 sys_dup2 33 n32 pause sys_pause @@ -71,12 +71,12 @@ 61 n32 uname sys_newuname 62 n32 semget sys_semget 63 n32 semop sys_semop -64 n32 semctl compat_sys_semctl +64 n32 semctl compat_sys_old_semctl 65 n32 shmdt sys_shmdt 66 n32 msgget sys_msgget 67 n32 msgsnd compat_sys_msgsnd 68 n32 msgrcv compat_sys_msgrcv -69 n32 msgctl compat_sys_msgctl +69 n32 msgctl compat_sys_old_msgctl 70 n32 fcntl compat_sys_fcntl 71 n32 flock sys_flock 72 n32 fsync sys_fsync diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index a8286ccbb66c..af0da757a7b2 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -37,7 +37,7 @@ 27 n64 madvise sys_madvise 28 n64 shmget sys_shmget 29 n64 shmat sys_shmat -30 n64 shmctl sys_shmctl +30 n64 shmctl sys_old_shmctl 31 n64 dup sys_dup 32 n64 dup2 sys_dup2 33 n64 pause sys_pause @@ -71,12 +71,12 @@ 61 n64 uname sys_newuname 62 n64 semget sys_semget 63 n64 semop sys_semop -64 n64 semctl sys_semctl +64 n64 semctl sys_old_semctl 65 n64 shmdt sys_shmdt 66 n64 msgget sys_msgget 67 n64 msgsnd sys_msgsnd 68 n64 msgrcv sys_msgrcv -69 n64 msgctl sys_msgctl +69 n64 msgctl sys_old_msgctl 70 n64 fcntl sys_fcntl 71 n64 flock sys_flock 72 n64 fsync sys_fsync diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 69cf91b03b26..f8befa11b0c4 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -103,7 +103,7 @@ 91 common madvise sys_madvise 92 common shmget sys_shmget 93 common shmat xtensa_shmat -94 common shmctl sys_shmctl +94 common shmctl sys_old_shmctl 95 common shmdt sys_shmdt # Socket Operations 96 common socket sys_socket @@ -177,12 +177,12 @@ 161 common semtimedop sys_semtimedop 162 common semget sys_semget 163 common semop sys_semop -164 common semctl sys_semctl +164 common semctl sys_old_semctl 165 common available165 sys_ni_syscall 166 common msgget sys_msgget 167 common msgsnd sys_msgsnd 168 common msgrcv sys_msgrcv -169 common msgctl sys_msgctl +169 common msgctl sys_old_msgctl 170 common available170 sys_ni_syscall # File System 171 common umount2 sys_umount diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index fb63045a0fb6..938d8908b9e0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -717,6 +717,7 @@ asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqst /* ipc/msg.c */ asmlinkage long sys_msgget(key_t key, int msgflg); +asmlinkage long sys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); @@ -726,6 +727,7 @@ asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp, /* ipc/sem.c */ asmlinkage long sys_semget(key_t key, int nsems, int semflg); asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg); +asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct __kernel_timespec __user *timeout); @@ -734,6 +736,7 @@ asmlinkage long sys_semop(int semid, struct sembuf __user *sops, /* ipc/shm.c */ asmlinkage long sys_shmget(key_t key, size_t size, int flag); +asmlinkage long sys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg); asmlinkage long sys_shmdt(char __user *shmaddr); diff --git a/ipc/msg.c b/ipc/msg.c index 0833c6405915..8dec945fa030 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -567,9 +567,8 @@ out_unlock: return err; } -long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int version) { - int version; struct ipc_namespace *ns; struct msqid64_ds msqid64; int err; @@ -577,7 +576,6 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) if (msqid < 0 || cmd < 0) return -EINVAL; - version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; switch (cmd) { @@ -613,9 +611,23 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) { - return ksys_msgctl(msqid, cmd, buf); + return ksys_msgctl(msqid, cmd, buf, IPC_64); } +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION +long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +{ + int version = ipc_parse_version(&cmd); + + return ksys_msgctl(msqid, cmd, buf, version); +} + +SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + return ksys_old_msgctl(msqid, cmd, buf); +} +#endif + #ifdef CONFIG_COMPAT struct compat_msqid_ds { @@ -689,12 +701,11 @@ static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in, } } -long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr) +static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int version) { struct ipc_namespace *ns; int err; struct msqid64_ds msqid64; - int version = compat_ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; @@ -734,8 +745,22 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr) COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr) { - return compat_ksys_msgctl(msqid, cmd, uptr); + return compat_ksys_msgctl(msqid, cmd, uptr, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION +long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr) +{ + int version = compat_ipc_parse_version(&cmd); + + return compat_ksys_msgctl(msqid, cmd, uptr, version); +} + +COMPAT_SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, void __user *, uptr) +{ + return compat_ksys_old_msgctl(msqid, cmd, uptr); +} +#endif #endif static int testmsg(struct msg_msg *msg, long type, int mode) diff --git a/ipc/sem.c b/ipc/sem.c index 745dc6187e84..d1efff3a81bb 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1634,9 +1634,8 @@ out_up: return err; } -long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) +static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version) { - int version; struct ipc_namespace *ns; void __user *p = (void __user *)arg; struct semid64_ds semid64; @@ -1645,7 +1644,6 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) if (semid < 0) return -EINVAL; - version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; switch (cmd) { @@ -1691,9 +1689,23 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) { - return ksys_semctl(semid, semnum, cmd, arg); + return ksys_semctl(semid, semnum, cmd, arg, IPC_64); } +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION +long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg) +{ + int version = ipc_parse_version(&cmd); + + return ksys_semctl(semid, semnum, cmd, arg, version); +} + +SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) +{ + return ksys_old_semctl(semid, semnum, cmd, arg); +} +#endif + #ifdef CONFIG_COMPAT struct compat_semid_ds { @@ -1744,12 +1756,11 @@ static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, } } -long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) +static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version) { void __user *p = compat_ptr(arg); struct ipc_namespace *ns; struct semid64_ds semid64; - int version = compat_ipc_parse_version(&cmd); int err; ns = current->nsproxy->ipc_ns; @@ -1792,8 +1803,22 @@ long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) { - return compat_ksys_semctl(semid, semnum, cmd, arg); + return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION +long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg) +{ + int version = compat_ipc_parse_version(&cmd); + + return compat_ksys_semctl(semid, semnum, cmd, arg, version); +} + +COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg) +{ + return compat_ksys_old_semctl(semid, semnum, cmd, arg); +} +#endif #endif /* If the task doesn't already have a undo_list, then allocate one diff --git a/ipc/shm.c b/ipc/shm.c index 0842411cb0e9..ce1ca9f7c6e9 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1137,16 +1137,15 @@ out_unlock1: return err; } -long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) +static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version) { - int err, version; + int err; struct ipc_namespace *ns; struct shmid64_ds sem64; if (cmd < 0 || shmid < 0) return -EINVAL; - version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; switch (cmd) { @@ -1194,8 +1193,22 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { - return ksys_shmctl(shmid, cmd, buf); + return ksys_shmctl(shmid, cmd, buf, IPC_64); +} + +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION +long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) +{ + int version = ipc_parse_version(&cmd); + + return ksys_shmctl(shmid, cmd, buf, version); +} + +SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +{ + return ksys_old_shmctl(shmid, cmd, buf); } +#endif #ifdef CONFIG_COMPAT @@ -1319,11 +1332,10 @@ static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, } } -long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr) +long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version) { struct ipc_namespace *ns; struct shmid64_ds sem64; - int version = compat_ipc_parse_version(&cmd); int err; ns = current->nsproxy->ipc_ns; @@ -1378,8 +1390,22 @@ long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr) COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) { - return compat_ksys_shmctl(shmid, cmd, uptr); + return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION +long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr) +{ + int version = compat_ipc_parse_version(&cmd); + + return compat_ksys_shmctl(shmid, cmd, uptr, version); +} + +COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr) +{ + return compat_ksys_old_shmctl(shmid, cmd, uptr); +} +#endif #endif /* diff --git a/ipc/syscall.c b/ipc/syscall.c index 3cf8ad703a4d..581bdff4e7c5 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -47,7 +47,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second, return -EINVAL; if (get_user(arg, (unsigned long __user *) ptr)) return -EFAULT; - return ksys_semctl(first, second, third, arg); + return ksys_old_semctl(first, second, third, arg); } case MSGSND: @@ -75,7 +75,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second, case MSGGET: return ksys_msgget((key_t) first, second); case MSGCTL: - return ksys_msgctl(first, second, + return ksys_old_msgctl(first, second, (struct msqid_ds __user *)ptr); case SHMAT: @@ -100,7 +100,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second, case SHMGET: return ksys_shmget(first, second, third); case SHMCTL: - return ksys_shmctl(first, second, + return ksys_old_shmctl(first, second, (struct shmid_ds __user *) ptr); default: return -ENOSYS; @@ -152,7 +152,7 @@ int compat_ksys_ipc(u32 call, int first, int second, return -EINVAL; if (get_user(pad, (u32 __user *) compat_ptr(ptr))) return -EFAULT; - return compat_ksys_semctl(first, second, third, pad); + return compat_ksys_old_semctl(first, second, third, pad); case MSGSND: return compat_ksys_msgsnd(first, ptr, second, third); @@ -177,7 +177,7 @@ int compat_ksys_ipc(u32 call, int first, int second, case MSGGET: return ksys_msgget(first, second); case MSGCTL: - return compat_ksys_msgctl(first, second, compat_ptr(ptr)); + return compat_ksys_old_msgctl(first, second, compat_ptr(ptr)); case SHMAT: { int err; @@ -196,7 +196,7 @@ int compat_ksys_ipc(u32 call, int first, int second, case SHMGET: return ksys_shmget(first, (unsigned int)second, third); case SHMCTL: - return compat_ksys_shmctl(first, second, compat_ptr(ptr)); + return compat_ksys_old_shmctl(first, second, compat_ptr(ptr)); } return -ENOSYS; diff --git a/ipc/util.h b/ipc/util.h index d768fdbed515..e272be622ae7 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -160,10 +160,7 @@ static inline void ipc_update_pid(struct pid **pos, struct pid *pid) } } -#ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION -/* On IA-64, we always use the "64-bit version" of the IPC structures. */ -# define ipc_parse_version(cmd) IPC_64 -#else +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION int ipc_parse_version(int *cmd); #endif @@ -246,13 +243,9 @@ int get_compat_ipc64_perm(struct ipc64_perm *, static inline int compat_ipc_parse_version(int *cmd) { -#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION int version = *cmd & IPC_64; *cmd &= ~IPC_64; return version; -#else - return IPC_64; -#endif } #endif @@ -261,29 +254,29 @@ long ksys_semtimedop(int semid, struct sembuf __user *tsops, unsigned int nsops, const struct __kernel_timespec __user *timeout); long ksys_semget(key_t key, int nsems, int semflg); -long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); +long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); -long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); +long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); -long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); +long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct old_timespec32 __user *timeout); #ifdef CONFIG_COMPAT -long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); -long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr); +long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg); +long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr); long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); -long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr); +long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr); #endif /* CONFIG_COMPAT */ #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index bc934f31ab10..ce04431a40d1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -197,6 +197,7 @@ COND_SYSCALL_COMPAT(mq_getsetattr); /* ipc/msg.c */ COND_SYSCALL(msgget); +COND_SYSCALL(old_msgctl); COND_SYSCALL(msgctl); COND_SYSCALL_COMPAT(msgctl); COND_SYSCALL(msgrcv); @@ -206,6 +207,7 @@ COND_SYSCALL_COMPAT(msgsnd); /* ipc/sem.c */ COND_SYSCALL(semget); +COND_SYSCALL(old_semctl); COND_SYSCALL(semctl); COND_SYSCALL_COMPAT(semctl); COND_SYSCALL(semtimedop); @@ -214,6 +216,7 @@ COND_SYSCALL(semop); /* ipc/shm.c */ COND_SYSCALL(shmget); +COND_SYSCALL(old_shmctl); COND_SYSCALL(shmctl); COND_SYSCALL_COMPAT(shmctl); COND_SYSCALL(shmat); -- cgit From f1a2a540c86441016ce3dff6590b7a09080871de Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 10 Jan 2019 23:14:36 +0200 Subject: video/hdmi: Add an enum for HDMI packet types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll be wanting to send more than just infoframes over HDMI. So add an enum for other packet types. TODO: Maybe just include the infoframe types in the packet type enum and get rid of the infoframe type enum? v2: s/AUDIO_CP/ACP/ (Shashank) Cc: Thierry Reding Cc: Hans Verkuil Cc: linux-media@vger.kernel.org Reviewed-by: Shashank Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190110211445.24177-2-ville.syrjala@linux.intel.com --- include/linux/hdmi.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index d2bacf502429..927ad6451105 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -27,6 +27,21 @@ #include #include +enum hdmi_packet_type { + HDMI_PACKET_TYPE_NULL = 0x00, + HDMI_PACKET_TYPE_AUDIO_CLOCK_REGEN = 0x01, + HDMI_PACKET_TYPE_AUDIO_SAMPLE = 0x02, + HDMI_PACKET_TYPE_GENERAL_CONTROL = 0x03, + HDMI_PACKET_TYPE_ACP = 0x04, + HDMI_PACKET_TYPE_ISRC1 = 0x05, + HDMI_PACKET_TYPE_ISRC2 = 0x06, + HDMI_PACKET_TYPE_ONE_BIT_AUDIO_SAMPLE = 0x07, + HDMI_PACKET_TYPE_DST_AUDIO = 0x08, + HDMI_PACKET_TYPE_HBR_AUDIO_STREAM = 0x09, + HDMI_PACKET_TYPE_GAMUT_METADATA = 0x0a, + /* + enum hdmi_infoframe_type */ +}; + enum hdmi_infoframe_type { HDMI_INFOFRAME_TYPE_VENDOR = 0x81, HDMI_INFOFRAME_TYPE_AVI = 0x82, -- cgit From 4b7d248b3a1de483ffe9d05c1debbf32a544164d Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Jan 2019 17:06:39 -0500 Subject: audit: move loginuid and sessionid from CONFIG_AUDITSYSCALL to CONFIG_AUDIT loginuid and sessionid (and audit_log_session_info) should be part of CONFIG_AUDIT scope and not CONFIG_AUDITSYSCALL since it is used in CONFIG_CHANGE, ANOM_LINK, FEATURE_CHANGE (and INTEGRITY_RULE), none of which are otherwise dependent on AUDITSYSCALL. Please see github issue https://github.com/linux-audit/audit-kernel/issues/104 Signed-off-by: Richard Guy Briggs [PM: tweaked subject line for better grep'ing] Signed-off-by: Paul Moore --- fs/proc/base.c | 6 ++-- include/linux/audit.h | 42 +++++++++++++------------ include/linux/sched.h | 2 +- init/init_task.c | 2 +- kernel/audit.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/auditsc.c | 84 -------------------------------------------------- 6 files changed, 113 insertions(+), 108 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index 633a63462573..a23651ce6960 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1210,7 +1210,7 @@ static const struct file_operations proc_oom_score_adj_operations = { .llseek = default_llseek, }; -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT #define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) @@ -3002,7 +3002,7 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif @@ -3390,7 +3390,7 @@ static const struct pid_entry tid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif diff --git a/include/linux/audit.h b/include/linux/audit.h index a625c29a2ea2..ecb5d317d6a2 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -159,6 +159,18 @@ extern int audit_update_lsm_rules(void); extern int audit_rule_change(int type, int seq, void *data, size_t datasz); extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); +extern int audit_set_loginuid(kuid_t loginuid); + +static inline kuid_t audit_get_loginuid(struct task_struct *tsk) +{ + return tsk->loginuid; +} + +static inline unsigned int audit_get_sessionid(struct task_struct *tsk) +{ + return tsk->sessionid; +} + extern u32 audit_enabled; #else /* CONFIG_AUDIT */ static inline __printf(4, 5) @@ -201,6 +213,17 @@ static inline int audit_log_task_context(struct audit_buffer *ab) } static inline void audit_log_task_info(struct audit_buffer *ab) { } + +static inline kuid_t audit_get_loginuid(struct task_struct *tsk) +{ + return INVALID_UID; +} + +static inline unsigned int audit_get_sessionid(struct task_struct *tsk) +{ + return AUDIT_SID_UNSET; +} + #define audit_enabled AUDIT_OFF #endif /* CONFIG_AUDIT */ @@ -323,17 +346,6 @@ static inline void audit_ptrace(struct task_struct *t) extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial); -extern int audit_set_loginuid(kuid_t loginuid); - -static inline kuid_t audit_get_loginuid(struct task_struct *tsk) -{ - return tsk->loginuid; -} - -static inline unsigned int audit_get_sessionid(struct task_struct *tsk) -{ - return tsk->sessionid; -} extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); @@ -519,14 +531,6 @@ static inline int auditsc_get_stamp(struct audit_context *ctx, { return 0; } -static inline kuid_t audit_get_loginuid(struct task_struct *tsk) -{ - return INVALID_UID; -} -static inline unsigned int audit_get_sessionid(struct task_struct *tsk) -{ - return AUDIT_SID_UNSET; -} static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, diff --git a/include/linux/sched.h b/include/linux/sched.h index 89541d248893..f9788bb122c5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -886,7 +886,7 @@ struct task_struct { struct callback_head *task_works; struct audit_context *audit_context; -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT kuid_t loginuid; unsigned int sessionid; #endif diff --git a/init/init_task.c b/init/init_task.c index 5aebe3be4d7c..39c3109acc1a 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -121,7 +121,7 @@ struct task_struct init_task .thread_pid = &init_struct_pid, .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT .loginuid = INVALID_UID, .sessionid = AUDIT_SID_UNSET, #endif diff --git a/kernel/audit.c b/kernel/audit.c index c2a7662cc254..2a32f304223d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2335,6 +2335,91 @@ void audit_log_link_denied(const char *operation) audit_log_end(ab); } +/* global counter which is incremented every time something logs in */ +static atomic_t session_id = ATOMIC_INIT(0); + +static int audit_set_loginuid_perm(kuid_t loginuid) +{ + /* if we are unset, we don't need privs */ + if (!audit_loginuid_set(current)) + return 0; + /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ + if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) + return -EPERM; + /* it is set, you need permission */ + if (!capable(CAP_AUDIT_CONTROL)) + return -EPERM; + /* reject if this is not an unset and we don't allow that */ + if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) + && uid_valid(loginuid)) + return -EPERM; + return 0; +} + +static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, + unsigned int oldsessionid, + unsigned int sessionid, int rc) +{ + struct audit_buffer *ab; + uid_t uid, oldloginuid, loginuid; + struct tty_struct *tty; + + if (!audit_enabled) + return; + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); + if (!ab) + return; + + uid = from_kuid(&init_user_ns, task_uid(current)); + oldloginuid = from_kuid(&init_user_ns, koldloginuid); + loginuid = from_kuid(&init_user_ns, kloginuid), + tty = audit_get_tty(); + + audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); + audit_log_task_context(ab); + audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", + oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", + oldsessionid, sessionid, !rc); + audit_put_tty(tty); + audit_log_end(ab); +} + +/** + * audit_set_loginuid - set current task's loginuid + * @loginuid: loginuid value + * + * Returns 0. + * + * Called (set) from fs/proc/base.c::proc_loginuid_write(). + */ +int audit_set_loginuid(kuid_t loginuid) +{ + unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET; + kuid_t oldloginuid; + int rc; + + oldloginuid = audit_get_loginuid(current); + oldsessionid = audit_get_sessionid(current); + + rc = audit_set_loginuid_perm(loginuid); + if (rc) + goto out; + + /* are we setting or clearing? */ + if (uid_valid(loginuid)) { + sessionid = (unsigned int)atomic_inc_return(&session_id); + if (unlikely(sessionid == AUDIT_SID_UNSET)) + sessionid = (unsigned int)atomic_inc_return(&session_id); + } + + current->sessionid = sessionid; + current->loginuid = loginuid; +out: + audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); + return rc; +} + /** * audit_log_end - end one audit record * @ab: the audit_buffer diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b585ceb2f7a2..572d247957fb 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1983,90 +1983,6 @@ int auditsc_get_stamp(struct audit_context *ctx, return 1; } -/* global counter which is incremented every time something logs in */ -static atomic_t session_id = ATOMIC_INIT(0); - -static int audit_set_loginuid_perm(kuid_t loginuid) -{ - /* if we are unset, we don't need privs */ - if (!audit_loginuid_set(current)) - return 0; - /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ - if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) - return -EPERM; - /* it is set, you need permission */ - if (!capable(CAP_AUDIT_CONTROL)) - return -EPERM; - /* reject if this is not an unset and we don't allow that */ - if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid)) - return -EPERM; - return 0; -} - -static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, - unsigned int oldsessionid, unsigned int sessionid, - int rc) -{ - struct audit_buffer *ab; - uid_t uid, oldloginuid, loginuid; - struct tty_struct *tty; - - if (!audit_enabled) - return; - - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); - if (!ab) - return; - - uid = from_kuid(&init_user_ns, task_uid(current)); - oldloginuid = from_kuid(&init_user_ns, koldloginuid); - loginuid = from_kuid(&init_user_ns, kloginuid), - tty = audit_get_tty(); - - audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); - audit_log_task_context(ab); - audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", - oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", - oldsessionid, sessionid, !rc); - audit_put_tty(tty); - audit_log_end(ab); -} - -/** - * audit_set_loginuid - set current task's audit_context loginuid - * @loginuid: loginuid value - * - * Returns 0. - * - * Called (set) from fs/proc/base.c::proc_loginuid_write(). - */ -int audit_set_loginuid(kuid_t loginuid) -{ - unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET; - kuid_t oldloginuid; - int rc; - - oldloginuid = audit_get_loginuid(current); - oldsessionid = audit_get_sessionid(current); - - rc = audit_set_loginuid_perm(loginuid); - if (rc) - goto out; - - /* are we setting or clearing? */ - if (uid_valid(loginuid)) { - sessionid = (unsigned int)atomic_inc_return(&session_id); - if (unlikely(sessionid == AUDIT_SID_UNSET)) - sessionid = (unsigned int)atomic_inc_return(&session_id); - } - - current->sessionid = sessionid; - current->loginuid = loginuid; -out: - audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); - return rc; -} - /** * __audit_mq_open - record audit data for a POSIX MQ open * @oflag: open flag -- cgit From 2fec30e245a3b46fef89c4cb1f74eefc5fbb29a6 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 23 Jan 2019 21:36:25 -0500 Subject: audit: add support for fcaps v3 V3 namespaced file capabilities were introduced in commit 8db6c34f1dbc ("Introduce v3 namespaced file capabilities") Add support for these by adding the "frootid" field to the existing fcaps fields in the NAME and BPRM_FCAPS records. Please see github issue https://github.com/linux-audit/audit-kernel/issues/103 Signed-off-by: Richard Guy Briggs Acked-by: Serge Hallyn [PM: comment tweak to fit an 80 char line width] Signed-off-by: Paul Moore --- include/linux/capability.h | 5 +++-- kernel/audit.c | 6 ++++-- kernel/audit.h | 1 + kernel/auditsc.c | 4 ++++ security/commoncap.c | 2 ++ 5 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index f640dcbc880c..b769330e9380 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -14,7 +14,7 @@ #define _LINUX_CAPABILITY_H #include - +#include #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 @@ -25,11 +25,12 @@ typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; } kernel_cap_t; -/* exact same as vfs_cap_data but in cpu endian and always filled completely */ +/* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; kernel_cap_t permitted; kernel_cap_t inheritable; + kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) diff --git a/kernel/audit.c b/kernel/audit.c index 2a32f304223d..3f3f1888cac7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2084,8 +2084,9 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) { audit_log_cap(ab, "cap_fp", &name->fcap.permitted); audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); - audit_log_format(ab, " cap_fe=%d cap_fver=%x", - name->fcap.fE, name->fcap_ver); + audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", + name->fcap.fE, name->fcap_ver, + from_kuid(&init_user_ns, name->fcap.rootid)); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2104,6 +2105,7 @@ static inline int audit_copy_fcaps(struct audit_names *name, name->fcap.permitted = caps.permitted; name->fcap.inheritable = caps.inheritable; name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + name->fcap.rootid = caps.rootid; name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; diff --git a/kernel/audit.h b/kernel/audit.h index 6ffb70575082..deefdbe61a47 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -69,6 +69,7 @@ struct audit_cap_data { kernel_cap_t effective; /* effective set of process */ }; kernel_cap_t ambient; + kuid_t rootid; }; /* When fs/namei.c:getname() is called, we store the pointer in name and bump diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 572d247957fb..c16beb25fd0a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1358,6 +1358,9 @@ static void audit_log_exit(void) audit_log_cap(ab, "pi", &axs->new_pcap.inheritable); audit_log_cap(ab, "pe", &axs->new_pcap.effective); audit_log_cap(ab, "pa", &axs->new_pcap.ambient); + audit_log_format(ab, " frootid=%d", + from_kuid(&init_user_ns, + axs->fcap.rootid)); break; } } @@ -2271,6 +2274,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, ax->fcap.permitted = vcaps.permitted; ax->fcap.inheritable = vcaps.inheritable; ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + ax->fcap.rootid = vcaps.rootid; ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; ax->old_pcap.permitted = old->cap_permitted; diff --git a/security/commoncap.c b/security/commoncap.c index 232db019f051..c097f3568001 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -643,6 +643,8 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + cpu_caps->rootid = rootkuid; + return 0; } -- cgit From 40852275a94afb3e836be9248399e036982d1a79 Mon Sep 17 00:00:00 2001 From: Micah Morton Date: Tue, 22 Jan 2019 14:42:09 -0800 Subject: LSM: add SafeSetID module that gates setid calls This change ensures that the set*uid family of syscalls in kernel/sys.c (setreuid, setuid, setresuid, setfsuid) all call ns_capable_common with the CAP_OPT_INSETID flag, so capability checks in the security_capable hook can know whether they are being called from within a set*uid syscall. This change is a no-op by itself, but is needed for the proposed SafeSetID LSM. Signed-off-by: Micah Morton Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/capability.h | 5 +++++ kernel/capability.c | 19 +++++++++++++++++++ kernel/sys.c | 10 +++++----- 3 files changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index f640dcbc880c..c3f9a4d558a0 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -209,6 +209,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); +extern bool ns_capable_setid(struct user_namespace *ns, int cap); #else static inline bool has_capability(struct task_struct *t, int cap) { @@ -240,6 +241,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) { return true; } +static inline bool ns_capable_setid(struct user_namespace *ns, int cap) +{ + return true; +} #endif /* CONFIG_MULTIUSER */ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); diff --git a/kernel/capability.c b/kernel/capability.c index cfbbcb68e11e..1444f3954d75 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -415,6 +415,25 @@ bool ns_capable_noaudit(struct user_namespace *ns, int cap) } EXPORT_SYMBOL(ns_capable_noaudit); +/** + * ns_capable_setid - Determine if the current task has a superior capability + * in effect, while signalling that this check is being done from within a + * setid syscall. + * @ns: The usernamespace we want the capability in + * @cap: The capability to be tested for + * + * Return true if the current task has the given superior capability currently + * available for use, false if not. + * + * This sets PF_SUPERPRIV on the task if the capability is available on the + * assumption that it's about to be used. + */ +bool ns_capable_setid(struct user_namespace *ns, int cap) +{ + return ns_capable_common(ns, cap, CAP_OPT_INSETID); +} +EXPORT_SYMBOL(ns_capable_setid); + /** * capable - Determine if the current task has a superior capability in effect * @cap: The capability to be tested for diff --git a/kernel/sys.c b/kernel/sys.c index f7eb62eceb24..c5f875048aef 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -516,7 +516,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid) new->uid = kruid; if (!uid_eq(old->uid, kruid) && !uid_eq(old->euid, kruid) && - !ns_capable(old->user_ns, CAP_SETUID)) + !ns_capable_setid(old->user_ns, CAP_SETUID)) goto error; } @@ -525,7 +525,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid) if (!uid_eq(old->uid, keuid) && !uid_eq(old->euid, keuid) && !uid_eq(old->suid, keuid) && - !ns_capable(old->user_ns, CAP_SETUID)) + !ns_capable_setid(old->user_ns, CAP_SETUID)) goto error; } @@ -584,7 +584,7 @@ long __sys_setuid(uid_t uid) old = current_cred(); retval = -EPERM; - if (ns_capable(old->user_ns, CAP_SETUID)) { + if (ns_capable_setid(old->user_ns, CAP_SETUID)) { new->suid = new->uid = kuid; if (!uid_eq(kuid, old->uid)) { retval = set_user(new); @@ -646,7 +646,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) old = current_cred(); retval = -EPERM; - if (!ns_capable(old->user_ns, CAP_SETUID)) { + if (!ns_capable_setid(old->user_ns, CAP_SETUID)) { if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) goto error; @@ -814,7 +814,7 @@ long __sys_setfsuid(uid_t uid) if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || - ns_capable(old->user_ns, CAP_SETUID)) { + ns_capable_setid(old->user_ns, CAP_SETUID)) { if (!uid_eq(kuid, old->fsuid)) { new->fsuid = kuid; if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) -- cgit From 6ba7d681aca22e53385bdb35b1d7662e61905760 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Jan 2019 15:22:03 -0800 Subject: rcu: Remove wrapper definitions for obsolete RCU update functions None of synchronize_rcu_bh, synchronize_rcu_bh_expedited, call_rcu_bh, rcu_barrier_bh, synchronize_sched, synchronize_sched_expedited, call_rcu_sched, rcu_barrier_sched, get_state_synchronize_sched, and cond_synchronize_sched are actually used. This commit therefore removes their trivial wrapper-function definitions. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 53 ------------------------------------------------ 1 file changed, 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4db8bcacc51a..0e39e0d2629e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -896,57 +896,4 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) return false; } - -/* Transitional pre-consolidation compatibility definitions. */ - -static inline void synchronize_rcu_bh(void) -{ - synchronize_rcu(); -} - -static inline void synchronize_rcu_bh_expedited(void) -{ - synchronize_rcu_expedited(); -} - -static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) -{ - call_rcu(head, func); -} - -static inline void rcu_barrier_bh(void) -{ - rcu_barrier(); -} - -static inline void synchronize_sched(void) -{ - synchronize_rcu(); -} - -static inline void synchronize_sched_expedited(void) -{ - synchronize_rcu_expedited(); -} - -static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) -{ - call_rcu(head, func); -} - -static inline void rcu_barrier_sched(void) -{ - rcu_barrier(); -} - -static inline unsigned long get_state_synchronize_sched(void) -{ - return get_state_synchronize_rcu(); -} - -static inline void cond_synchronize_sched(unsigned long oldstate) -{ - cond_synchronize_rcu(oldstate); -} - #endif /* __LINUX_RCUPDATE_H */ -- cgit From 2aa5503026ceaa8860697b93c9e5bbbcd025ba89 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Nov 2018 08:29:35 -0800 Subject: rcu: Docbook for rcu_head_init() and rcu_head_after_call_rcu() This commit adds the missing asterisks required to make Sphinx pick up the current header comments for these two functions. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0e39e0d2629e..632113946757 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -859,7 +859,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /* Has the specified rcu_head structure been handed to call_rcu()? */ -/* +/** * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu() * @rhp: The rcu_head structure to initialize. * @@ -874,10 +874,10 @@ static inline void rcu_head_init(struct rcu_head *rhp) rhp->func = (rcu_callback_t)~0L; } -/* +/** * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()? * @rhp: The rcu_head structure to test. - * @func: The function passed to call_rcu() along with @rhp. + * @f: The function passed to call_rcu() along with @rhp. * * Returns @true if the @rhp has been passed to call_rcu() with @func, * and @false otherwise. Emits a warning in any other case, including -- cgit From c98cac603f1ce7d00e2a802b5640bced3bc3c1f2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 21 Nov 2018 11:35:03 -0800 Subject: rcu: Rename rcu_check_callbacks() to rcu_sched_clock_irq() The name rcu_check_callbacks() arguably made sense back in the early 2000s when RCU was quite a bit simpler than it is today, but it has become quite misleading, especially with the advent of dyntick-idle and NO_HZ_FULL. The rcu_check_callbacks() function is RCU's hook into the scheduling-clock interrupt, and is now but one of many ways that callbacks get promoted to invocable state. This commit therefore changes the name to rcu_sched_clock_irq(), which is the same number of characters and clearly indicates this function's relation to the rest of the Linux kernel. In addition, for the sake of consistency, rcu_flavor_check_callbacks() is also renamed to rcu_flavor_sched_clock_irq(). While in the area, the header comments for both functions are reworked. Signed-off-by: Paul E. McKenney --- .../Memory-Ordering/Tree-RCU-Memory-Ordering.html | 4 ++-- .../TreeRCU-callback-invocation.svg | 2 +- .../RCU/Design/Memory-Ordering/TreeRCU-gp.svg | 4 ++-- .../RCU/Design/Memory-Ordering/TreeRCU-qs.svg | 2 +- include/linux/rcupdate.h | 2 +- kernel/rcu/tiny.c | 2 +- kernel/rcu/tree.c | 18 ++++++++-------- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 24 +++++++++------------- kernel/time/timer.c | 2 +- 10 files changed, 29 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html index e4d94fba6c89..a3acfd49255f 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html @@ -485,7 +485,7 @@ section that the grace period must wait on. noted by rcu_node_context_switch() on the left. On the other hand, if the CPU takes a scheduler-clock interrupt while executing in usermode, a quiescent state will be noted by -rcu_check_callbacks() on the right. +rcu_sched_clock_irq() on the right. Either way, the passage through a quiescent state will be noted in a per-CPU variable. @@ -651,7 +651,7 @@ to end. These callbacks are identified by rcu_advance_cbs(), which is usually invoked by __note_gp_changes(). As shown in the diagram below, this invocation can be triggered by -the scheduling-clock interrupt (rcu_check_callbacks() on +the scheduling-clock interrupt (rcu_sched_clock_irq() on the left) or by idle entry (rcu_cleanup_after_idle() on the right, but only for kernels build with CONFIG_RCU_FAST_NO_HZ=y). diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg index 832408313d93..3fcf0c17cef2 100644 --- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg +++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg @@ -349,7 +349,7 @@ font-weight="bold" font-size="192" id="text202-7-5" - style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq() rcu_check_callbacks() + xml:space="preserve">rcu_sched_clock_irq() rcu_check_callbacks() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq() rcu_check_callbacks() + xml:space="preserve">rcu_sched_clock_irq() rcu_read_unlock_special.b.need_qs = false; } } @@ -778,13 +778,13 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) } /* - * Check for a quiescent state from the current CPU. When a task blocks, - * the task is recorded in the corresponding CPU's rcu_node structure, - * which is checked elsewhere. - * - * Caller must disable hard irqs. + * Check for a quiescent state from the current CPU, including voluntary + * context switches for Tasks RCU. When a task blocks, the task is + * recorded in the corresponding CPU's rcu_node structure, which is checked + * elsewhere, hence this function need only check for quiescent states + * related to the current CPU, not to those related to tasks. */ -static void rcu_flavor_check_callbacks(int user) +static void rcu_flavor_sched_clock_irq(int user) { struct task_struct *t = current; @@ -1030,14 +1030,10 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) } /* - * Check to see if this CPU is in a non-context-switch quiescent state - * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). - * Also schedule RCU core processing. - * - * This function must be called from hardirq context. It is normally - * invoked from the scheduling-clock interrupt. + * Check to see if this CPU is in a non-context-switch quiescent state, + * namely user mode and idle loop. */ -static void rcu_flavor_check_callbacks(int user) +static void rcu_flavor_sched_clock_irq(int user) { if (user || rcu_is_cpu_rrupt_from_idle()) { diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 444156debfa0..6eb7cc4b6d52 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1632,7 +1632,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); run_local_timers(); - rcu_check_callbacks(user_tick); + rcu_sched_clock_irq(user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_tick(); -- cgit From 423a86a610cad121742ebe698ef98a3b4c87b5dd Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Wed, 12 Dec 2018 14:37:10 -0800 Subject: rcu: Add sparse check to rcu_assign_pointer() The rcu_assign_pointer() function currently doesn't do any sparse checking on the assigned-to pointer. So its possible that a pointer that is not __rcu annotated is assigned with rcu_assign_pointer without sparse complaints. Because rcu_dereference() already does such checking, this commit makes rcu_assign_pointer() to do the same. The extra error could be helpful in cases where an RCU pointer is assigned with rcu_assign_pointer() but not annotated with __rcu. This doesn't generate any code in the normal case because __CHECKER__ is defined only in the context of sparse. This commit also renames rcu_dereference_sparse() to rcu_check_parse() since the checking now happens not only during derereferencing but also during assignment. Test: Introduced an rcu_assign_pointer in code and checked the output of sparse with and without this change. The change correctly causes sparse to throw an error. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6f8f047c4068..4a2cce4d4bd9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -309,16 +309,16 @@ static inline void rcu_preempt_sleep_check(void) { } */ #ifdef __CHECKER__ -#define rcu_dereference_sparse(p, space) \ +#define rcu_check_sparse(p, space) \ ((void)(((typeof(*p) space *)p) == p)) #else /* #ifdef __CHECKER__ */ -#define rcu_dereference_sparse(p, space) +#define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ #define __rcu_access_pointer(p, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ - rcu_dereference_sparse(p, space); \ + rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_check(p, c, space) \ @@ -326,13 +326,13 @@ static inline void rcu_preempt_sleep_check(void) { } /* Dependency order vs. p above. */ \ typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ - rcu_dereference_sparse(p, space); \ + rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ - rcu_dereference_sparse(p, space); \ + rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) #define rcu_dereference_raw(p) \ @@ -382,6 +382,7 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_assign_pointer(p, v) \ ({ \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ + rcu_check_sparse(p, __rcu); \ \ if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ @@ -785,7 +786,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) */ #define RCU_INIT_POINTER(p, v) \ do { \ - rcu_dereference_sparse(p, __rcu); \ + rcu_check_sparse(p, __rcu); \ WRITE_ONCE(p, RCU_INITIALIZER(v)); \ } while (0) -- cgit From c8ca1aa774b20f182733d1661f3b6aa3105338e7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Nov 2018 10:06:46 -0800 Subject: srcu: Check for invalid idx argument in srcu_read_unlock() The current SRCU implementation has an idx argument of zero or one, and never anything else. This commit therefore adds a WARN_ON_ONCE() to complain if this restriction is violated. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index c614375cd264..33cf83b9bda8 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -223,6 +223,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { + WARN_ON_ONCE(idx & ~0x1); rcu_lock_release(&(ssp)->dep_map); __srcu_read_unlock(ssp, idx); } -- cgit From e81baf4cb19a9b428ba477fd0423f81672a58817 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 11 Dec 2018 12:12:38 +0100 Subject: srcu: Remove srcu_queue_delayed_work_on() srcu_queue_delayed_work_on() disables preemption (and therefore CPU hotplug in RCU's case) and then checks based on its own accounting if a CPU is online. If the CPU is online it uses queue_delayed_work_on() otherwise it fallbacks to queue_delayed_work(). The problem here is that queue_work() on -RT does not work with disabled preemption. queue_work_on() works also on an offlined CPU. queue_delayed_work_on() has the problem that it is possible to program a timer on an offlined CPU. This timer will fire once the CPU is online again. But until then, the timer remains programmed and nothing will happen. Add a local timer which will fire (as requested per delay) on the local CPU and then enqueue the work on the specific CPU. RCUtorture testing with SRCU-P for 24h showed no problems. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 3 ++- kernel/rcu/srcutree.c | 55 +++++++++++++++++++++--------------------------- kernel/rcu/tree.c | 4 ---- kernel/rcu/tree.h | 8 ------- 4 files changed, 26 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 6f292bd3e7db..0faa978c9880 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -45,7 +45,8 @@ struct srcu_data { unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ bool srcu_cblist_invoking; /* Invoking these CBs? */ - struct delayed_work work; /* Context for CB invoking. */ + struct timer_list delay_work; /* Delay for CB invoking */ + struct work_struct work; /* Context for CB invoking. */ struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ struct srcu_node *mynode; /* Leaf srcu_node. */ unsigned long grpmask; /* Mask for leaf srcu_node */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 3600d88d8956..7f041f2435df 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); +static void srcu_delay_timer(struct timer_list *t); /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ #define spin_lock_rcu_node(p) \ @@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static) snp->grphi = cpu; } sdp->cpu = cpu; - INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); + INIT_WORK(&sdp->work, srcu_invoke_callbacks); + timer_setup(&sdp->delay_work, srcu_delay_timer, 0); sdp->ssp = ssp; sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); if (is_static) @@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) } else { flush_delayed_work(&ssp->work); } - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); + if (quiesced) { - if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work))) + if (WARN_ON(timer_pending(&sdp->delay_work))) + return; /* Just leak it! */ + if (WARN_ON(work_pending(&sdp->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work); + del_timer_sync(&sdp->delay_work); + flush_work(&sdp->work); } + } if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || WARN_ON(srcu_readers_active(ssp))) { pr_info("%s: Active srcu_struct %p state: %d\n", @@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp) WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } -/* - * Track online CPUs to guide callback workqueue placement. - */ -DEFINE_PER_CPU(bool, srcu_online); -void srcu_online_cpu(unsigned int cpu) +static void srcu_delay_timer(struct timer_list *t) { - WRITE_ONCE(per_cpu(srcu_online, cpu), true); -} + struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work); -void srcu_offline_cpu(unsigned int cpu) -{ - WRITE_ONCE(per_cpu(srcu_online, cpu), false); + queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work); } -/* - * Place the workqueue handler on the specified CPU if online, otherwise - * just run it whereever. This is useful for placing workqueue handlers - * that are to invoke the specified CPU's callbacks. - */ -static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, - struct delayed_work *dwork, +static void srcu_queue_delayed_work_on(struct srcu_data *sdp, unsigned long delay) { - bool ret; + if (!delay) { + queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work); + return; + } - preempt_disable(); - if (READ_ONCE(per_cpu(srcu_online, cpu))) - ret = queue_delayed_work_on(cpu, wq, dwork, delay); - else - ret = queue_delayed_work(wq, dwork, delay); - preempt_enable(); - return ret; + timer_reduce(&sdp->delay_work, jiffies + delay); } /* @@ -504,7 +496,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, */ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) { - srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay); + srcu_queue_delayed_work_on(sdp, delay); } /* @@ -1186,7 +1178,8 @@ static void srcu_invoke_callbacks(struct work_struct *work) struct srcu_data *sdp; struct srcu_struct *ssp; - sdp = container_of(work, struct srcu_data, work.work); + sdp = container_of(work, struct srcu_data, work); + ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); spin_lock_irq_rcu_node(sdp); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1c4add096078..127255795859 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3408,8 +3408,6 @@ int rcutree_online_cpu(unsigned int cpu) raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->ffmask |= rdp->grpmask; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - if (IS_ENABLED(CONFIG_TREE_SRCU)) - srcu_online_cpu(cpu); if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return 0; /* Too early in boot for scheduler work. */ sync_sched_exp_online_cleanup(cpu); @@ -3434,8 +3432,6 @@ int rcutree_offline_cpu(unsigned int cpu) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); rcutree_affinity_setting(cpu, cpu); - if (IS_ENABLED(CONFIG_TREE_SRCU)) - srcu_offline_cpu(cpu); return 0; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 149557b7c39c..4bba017c703c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -458,11 +458,3 @@ static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(void); static void rcu_dynticks_task_enter(void); static void rcu_dynticks_task_exit(void); - -#ifdef CONFIG_SRCU -void srcu_online_cpu(unsigned int cpu); -void srcu_offline_cpu(unsigned int cpu); -#else /* #ifdef CONFIG_SRCU */ -void srcu_online_cpu(unsigned int cpu) { } -void srcu_offline_cpu(unsigned int cpu) { } -#endif /* #else #ifdef CONFIG_SRCU */ -- cgit From 3a6cb58f159e64241b2af9374acad41a70939349 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Dec 2018 09:44:52 -0800 Subject: rcutorture: Add grace period after CPU offline Beyond a certain point in the CPU-hotplug offline process, timers get stranded on the outgoing CPU, and won't fire until that CPU comes back online, which might well be never. This commit therefore adds a hook in torture_onoff_init() that is invoked from torture_offline(), which rcutorture uses to occasionally wait for a grace period. This should result in failures for RCU implementations that rely on stranded timers eventually firing in the absence of the CPU coming back online. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 3 ++- kernel/locking/locktorture.c | 2 +- kernel/rcu/rcutorture.c | 11 ++++++++++- kernel/torture.c | 6 +++++- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 48fad21109fc..f2d3bcbf4337 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -50,11 +50,12 @@ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0) /* Definitions for online/offline exerciser. */ +typedef void torture_ofl_func(void); bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes, unsigned long *sum_offl, int *min_onl, int *max_onl); bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes, unsigned long *sum_onl, int *min_onl, int *max_onl); -int torture_onoff_init(long ooholdoff, long oointerval); +int torture_onoff_init(long ooholdoff, long oointerval, torture_ofl_func *f); void torture_onoff_stats(void); bool torture_onoff_failures(void); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 7d0b0ed74404..c8b348097bb5 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -970,7 +970,7 @@ static int __init lock_torture_init(void) /* Prepare torture context. */ if (onoff_interval > 0) { firsterr = torture_onoff_init(onoff_holdoff * HZ, - onoff_interval * HZ); + onoff_interval * HZ, NULL); if (firsterr) goto unwind; } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 0955f3a20952..9eb9235c1ec9 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2243,6 +2243,14 @@ static void rcu_test_debug_objects(void) #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ } +static void rcutorture_sync(void) +{ + static unsigned long n; + + if (cur_ops->sync && !(++n & 0xfff)) + cur_ops->sync(); +} + static int __init rcu_torture_init(void) { @@ -2404,7 +2412,8 @@ rcu_torture_init(void) firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); if (firsterr) goto unwind; - firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval); + firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, + rcutorture_sync); if (firsterr) goto unwind; firsterr = rcu_torture_stall_init(); diff --git a/kernel/torture.c b/kernel/torture.c index bbf6d473e50c..a03ff722352b 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -75,6 +75,7 @@ static DEFINE_MUTEX(fullstop_mutex); static struct task_struct *onoff_task; static long onoff_holdoff; static long onoff_interval; +static torture_ofl_func *onoff_f; static long n_offline_attempts; static long n_offline_successes; static unsigned long sum_offline; @@ -118,6 +119,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes, pr_alert("%s" TORTURE_FLAG "torture_onoff task: offlined %d\n", torture_type, cpu); + if (onoff_f) + onoff_f(); (*n_offl_successes)++; delta = jiffies - starttime; *sum_offl += delta; @@ -243,11 +246,12 @@ stop: /* * Initiate online-offline handling. */ -int torture_onoff_init(long ooholdoff, long oointerval) +int torture_onoff_init(long ooholdoff, long oointerval, torture_ofl_func *f) { #ifdef CONFIG_HOTPLUG_CPU onoff_holdoff = ooholdoff; onoff_interval = oointerval; + onoff_f = f; if (onoff_interval <= 0) return 0; return torture_create_kthread(torture_onoff, NULL, onoff_task); -- cgit From 9b28aa1d0eae1be1016c8f4ba504545caff01da3 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 12 Dec 2018 23:59:13 +0000 Subject: platform_data/mlxreg: Document fixes for core platform data Remove "led" from the description, since the structure "mlxreg_core_platform_data" is used not only for led data. Signed-off-by: Vadim Pasternak Signed-off-by: Darren Hart (VMware) --- include/linux/platform_data/mlxreg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 19f5cb618c55..d823713f94ec 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -107,9 +107,9 @@ struct mlxreg_core_item { /** * struct mlxreg_core_platform_data - platform data: * - * @led_data: led private data; + * @data: instance private data; * @regmap: register map of parent device; - * @counter: number of led instances; + * @counter: number of instances; */ struct mlxreg_core_platform_data { struct mlxreg_core_data *data; -- cgit From 946e4e02b11889cb161b15ff4712a8ba21a50eb6 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 12 Dec 2018 23:59:14 +0000 Subject: platform_data/mlxreg: Add capability field to core platform data Add capability field to "mlxreg_core_platform_data" structure. The purpose of this register is to provide additional info to platform driver through the atribute related capability register. Signed-off-by: Vadim Pasternak Signed-off-by: Darren Hart (VMware) --- include/linux/platform_data/mlxreg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index d823713f94ec..1b2f86f96743 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -61,6 +61,7 @@ struct mlxreg_hotplug_device { * @reg: attribute register; * @mask: attribute access mask; * @bit: attribute effective bit; + * @capability: attribute capability register; * @mode: access mode; * @np - pointer to node platform associated with attribute; * @hpdev - hotplug device data; @@ -72,6 +73,7 @@ struct mlxreg_core_data { u32 reg; u32 mask; u32 bit; + u32 capability; umode_t mode; struct device_node *np; struct mlxreg_hotplug_device hpdev; -- cgit From 8d5d0cfb63cbcb4005e19a332b31d687b1d01e58 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 3 Dec 2018 09:56:23 +0000 Subject: sched/topology: Introduce a sysctl for Energy Aware Scheduling In its current state, Energy Aware Scheduling (EAS) starts automatically on asymmetric platforms having an Energy Model (EM). However, there are users who want to have an EM (for thermal management for example), but don't want EAS with it. In order to let users disable EAS explicitly, introduce a new sysctl called 'sched_energy_aware'. It is enabled by default so that EAS can start automatically on platforms where it makes sense. Flipping it to 0 rebuilds the scheduling domains and disables EAS. Signed-off-by: Quentin Perret Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adharmap@codeaurora.org Cc: chris.redpath@arm.com Cc: currojerez@riseup.net Cc: dietmar.eggemann@arm.com Cc: edubezval@gmail.com Cc: gregkh@linuxfoundation.org Cc: javi.merino@kernel.org Cc: joel@joelfernandes.org Cc: juri.lelli@redhat.com Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: pkondeti@codeaurora.org Cc: rjw@rjwysocki.net Cc: skannan@codeaurora.org Cc: smuckle@google.com Cc: srinivas.pandruvada@linux.intel.com Cc: thara.gopinath@linaro.org Cc: tkjos@google.com Cc: valentin.schneider@arm.com Cc: vincent.guittot@linaro.org Cc: viresh.kumar@linaro.org Link: https://lkml.kernel.org/r/20181203095628.11858-11-quentin.perret@arm.com Signed-off-by: Ingo Molnar --- Documentation/sysctl/kernel.txt | 12 ++++++++++++ include/linux/sched/sysctl.h | 7 +++++++ kernel/sched/topology.c | 29 +++++++++++++++++++++++++++++ kernel/sysctl.c | 11 +++++++++++ 4 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index c0527d8a468a..379063e58326 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -79,6 +79,7 @@ show up in /proc/sys/kernel: - reboot-cmd [ SPARC only ] - rtsig-max - rtsig-nr +- sched_energy_aware - seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst - sem - sem_next_id [ sysv ipc ] @@ -890,6 +891,17 @@ rtsig-nr shows the number of RT signals currently queued. ============================================================== +sched_energy_aware: + +Enables/disables Energy Aware Scheduling (EAS). EAS starts +automatically on platforms where it can run (that is, +platforms with asymmetric CPU topologies and having an Energy +Model available). If your platform happens to meet the +requirements for EAS but you do not want to use it, change +this value to 0. + +============================================================== + sched_schedstats: Enables/disables scheduler statistics. Enabling this feature diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index a9c32daeb9d8..99ce6d728df7 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -83,4 +83,11 @@ extern int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +extern unsigned int sysctl_sched_energy_aware; +extern int sched_energy_aware_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 3f35ba1d8fde..50c3fc316c54 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -203,9 +203,35 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) DEFINE_STATIC_KEY_FALSE(sched_energy_present); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +unsigned int sysctl_sched_energy_aware = 1; DEFINE_MUTEX(sched_energy_mutex); bool sched_energy_update; +#ifdef CONFIG_PROC_SYSCTL +int sched_energy_aware_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, state; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!ret && write) { + state = static_branch_unlikely(&sched_energy_present); + if (state != sysctl_sched_energy_aware) { + mutex_lock(&sched_energy_mutex); + sched_energy_update = 1; + rebuild_sched_domains(); + sched_energy_update = 0; + mutex_unlock(&sched_energy_mutex); + } + } + + return ret; +} +#endif + static void free_pd(struct perf_domain *pd) { struct perf_domain *tmp; @@ -322,6 +348,9 @@ static bool build_perf_domains(const struct cpumask *cpu_map) struct cpufreq_policy *policy; struct cpufreq_governor *gov; + if (!sysctl_sched_energy_aware) + goto free; + /* EAS is enabled for asymmetric CPU capacity topologies. */ if (!per_cpu(sd_asym_cpucapacity, cpu)) { if (sched_debug()) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba4d9e85feb8..987ae08147bf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -467,6 +467,17 @@ static struct ctl_table kern_table[] = { .extra1 = &one, }, #endif +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) + { + .procname = "sched_energy_aware", + .data = &sysctl_sched_energy_aware, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_energy_aware_handler, + .extra1 = &zero, + .extra2 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", -- cgit From fdce60787f6215607dc7ac910cbaf4416684b589 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 13 Dec 2018 12:22:32 +0100 Subject: reset: sunxi: declare sun6i_reset_init in a header file Avoid declaring extern functions in c files. To make sure function definition and usage don't get out of sync, declare sun6i_reset_init in a common header. Suggested-by: Stephen Rothwell Signed-off-by: Philipp Zabel --- arch/arm/mach-sunxi/sunxi.c | 2 +- drivers/reset/reset-sunxi.c | 1 + include/linux/reset/sunxi.h | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 include/linux/reset/sunxi.h (limited to 'include/linux') diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 8a7f301839c2..933b6930f024 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -37,7 +38,6 @@ static const char * const sun6i_board_dt_compat[] = { NULL, }; -extern void __init sun6i_reset_init(void); static void __init sun6i_timer_init(void) { of_clk_init(NULL); diff --git a/drivers/reset/reset-sunxi.c b/drivers/reset/reset-sunxi.c index db9a1a75523f..b06d724d8f21 100644 --- a/drivers/reset/reset-sunxi.c +++ b/drivers/reset/reset-sunxi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/reset/sunxi.h b/include/linux/reset/sunxi.h new file mode 100644 index 000000000000..1ad7fffb413e --- /dev/null +++ b/include/linux/reset/sunxi.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_RESET_SUNXI_H__ +#define __LINUX_RESET_SUNXI_H__ + +void __init sun6i_reset_init(void); + +#endif /* __LINUX_RESET_SUNXI_H__ */ -- cgit From cdbeb315ed8dcc142a68054899cedd6e4f1fea3f Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 13 Dec 2018 12:24:36 +0100 Subject: reset: socfpga: declare socfpga_reset_init in a header file Avoid declaring extern functions in c files. To make sure function definition and usage don't get out of sync, declare socfpga_reset_init in a common header. Suggested-by: Stephen Rothwell Signed-off-by: Philipp Zabel Acked-by: Dinh Nguyen --- arch/arm/mach-socfpga/socfpga.c | 3 +-- drivers/reset/reset-socfpga.c | 2 +- include/linux/reset/socfpga.h | 7 +++++++ 3 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 include/linux/reset/socfpga.h (limited to 'include/linux') diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index afd98971d903..816da0eb6616 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -32,8 +33,6 @@ void __iomem *rst_manager_base_addr; void __iomem *sdr_ctl_base_addr; unsigned long socfpga_cpu1start_addr; -extern void __init socfpga_reset_init(void); - static void __init socfpga_sysmgr_init(void) { struct device_node *np; diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index 318cfc51c441..96953992c2bb 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include "reset-simple.h" #define SOCFPGA_NR_BANKS 8 -void __init socfpga_reset_init(void); static int a10_reset_init(struct device_node *np) { diff --git a/include/linux/reset/socfpga.h b/include/linux/reset/socfpga.h new file mode 100644 index 000000000000..b11a2047c342 --- /dev/null +++ b/include/linux/reset/socfpga.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_RESET_SOCFPGA_H__ +#define __LINUX_RESET_SOCFPGA_H__ + +void __init socfpga_reset_init(void); + +#endif /* __LINUX_RESET_SOCFPGA_H__ */ -- cgit From 87eff9af7efb154cc4a940ed12efc803a0bf3fba Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 22 Jan 2019 23:18:21 +0200 Subject: pinctrl: remove pinctrl/machine.h inclusion from pinctrl/pinconf.h The change adds explicit inclusion of linux/pinctrl/machine.h header to the only needed pinctrl-madera-core.c file, and therefore inclusion of pinctrl/machine.h header from pinctrl/pinconf.h can be removed. The change is preparatory to a follow-up reversal of commit f07512e615dd ("pinctrl/pinconfig: add debug interface"). Signed-off-by: Vladimir Zapolskiy Cc: Charles Keepax Reviewed-by Richard Fitzgerald Signed-off-by: Linus Walleij --- drivers/pinctrl/cirrus/pinctrl-madera-core.c | 1 + include/linux/pinctrl/pinconf.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/cirrus/pinctrl-madera-core.c b/drivers/pinctrl/cirrus/pinctrl-madera-core.c index a5dda832024a..7c9694593f79 100644 --- a/drivers/pinctrl/cirrus/pinctrl-madera-core.c +++ b/drivers/pinctrl/cirrus/pinctrl-madera-core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index 8dd85d302b90..109468d9d849 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -14,8 +14,6 @@ #ifdef CONFIG_PINCONF -#include - struct pinctrl_dev; struct seq_file; -- cgit From e73339037f6b6d65e84f5fd42e56dd3cdf0d9e9c Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 22 Jan 2019 23:18:22 +0200 Subject: pinctrl: remove unused 'pinconf-config' debugfs interface The main goal of the change is to remove .pin_config_dbg_parse_modify callback before a driver with its support appears. So far the in-kernel interface did not attract any users since its introduction 5 years ago. Originally .pin_config_dbg_parse_modify callback and the associated 'pinconf-config' debugfs file were introduced in commit f07512e615dd ("pinctrl/pinconfig: add debug interface"), a short description of 'pinconf-config' usage for debugging can be expressed this way: Write to 'pinconf-config' (see pinconf_dbg_config_write() function): % echo -n modify $map_type $device_name $state_name $pin_name $config > \ /sys/kernel/debug/pinctrl/$pinctrl/pinconf-config It supposes to update a global (therefore single!) 'pinconf_dbg_conf' variable with an alternative setting, the arguments should match an existing pinconf device and some registered pinctrl mapping 'map': * $map_type is either 'config_pin' or 'config_group', it should match 'map->type' value of PIN_MAP_TYPE_CONFIGS_PIN or PIN_MAP_TYPE_CONFIGS_GROUP accordingly, * $device_name should match 'map->dev_name' string value, * $state_name should match 'map->name' string value, * $pin_name should match 'map->data.configs.group_or_pin' string value, If all above has matched, then $config is a new value to be set by calling pinconfops->pin_config_dbg_parse_modify(pctldev, config, matched_config). After a successful write into 'pinconf-config' a user can read the file to get information about that single modified pin configuration. The fact is .pin_config_dbg_parse_modify callback has never been defined in 'struct pinconf_ops' of any pinconf driver, thus an actual modification of a pin or group state on any present pinconf controller does not happen, and it declares that all related code is no more than dead code. I discovered the issue while attempting to add .pin_config_dbg_parse_modify support in some drivers and found that too short 'MAX_NAME_LEN' set by drivers/pinctrl/pinconf.c:372:#define MAX_NAME_LEN 15 is practically insufficient to store a regular pinctrl device name, which are like 'e6060000.pin-controller-sh-pfc' or pin names like 'MX6QDL_PAD_ENET_REF_CLK', thus it is another indicator that the code is barely usable, insufficiently tested and unprepossessing. Of course it might be possible to increase MAX_NAME_LEN, and then add .pin_config_dbg_parse_modify callbacks to the drivers, but the whole idea of such a limited debug option looks inviable. A more flexible way to functionally substitute the original approach is to implicitly or explicitly use pinctrl_select_state() function whenever needed. Signed-off-by: Vladimir Zapolskiy Cc: Laurent Meunier Cc: Masahiro Yamada Cc: Russell King Signed-off-by: Linus Walleij --- drivers/pinctrl/pinconf.c | 222 ---------------------------------------- include/linux/pinctrl/pinconf.h | 4 - 2 files changed, 226 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index 2c7229380f08..2678603df14b 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -369,225 +368,6 @@ static int pinconf_groups_show(struct seq_file *s, void *what) DEFINE_SHOW_ATTRIBUTE(pinconf_pins); DEFINE_SHOW_ATTRIBUTE(pinconf_groups); -#define MAX_NAME_LEN 15 - -struct dbg_cfg { - enum pinctrl_map_type map_type; - char dev_name[MAX_NAME_LEN + 1]; - char state_name[MAX_NAME_LEN + 1]; - char pin_name[MAX_NAME_LEN + 1]; -}; - -/* - * Goal is to keep this structure as global in order to simply read the - * pinconf-config file after a write to check config is as expected - */ -static struct dbg_cfg pinconf_dbg_conf; - -/** - * pinconf_dbg_config_print() - display the pinctrl config from the pinctrl - * map, of the dev/pin/state that was last written to pinconf-config file. - * @s: string filled in with config description - * @d: not used - */ -static int pinconf_dbg_config_print(struct seq_file *s, void *d) -{ - struct pinctrl_maps *maps_node; - const struct pinctrl_map *map; - const struct pinctrl_map *found = NULL; - struct pinctrl_dev *pctldev; - struct dbg_cfg *dbg = &pinconf_dbg_conf; - int i; - - mutex_lock(&pinctrl_maps_mutex); - - /* Parse the pinctrl map and look for the elected pin/state */ - for_each_maps(maps_node, i, map) { - if (map->type != dbg->map_type) - continue; - if (strcmp(map->dev_name, dbg->dev_name)) - continue; - if (strcmp(map->name, dbg->state_name)) - continue; - - if (!strcmp(map->data.configs.group_or_pin, dbg->pin_name)) { - /* We found the right pin */ - found = map; - break; - } - } - - if (!found) { - seq_printf(s, "No config found for dev/state/pin, expected:\n"); - seq_printf(s, "Searched dev:%s\n", dbg->dev_name); - seq_printf(s, "Searched state:%s\n", dbg->state_name); - seq_printf(s, "Searched pin:%s\n", dbg->pin_name); - seq_printf(s, "Use: modify config_pin "\ - " \n"); - goto exit; - } - - pctldev = get_pinctrl_dev_from_devname(found->ctrl_dev_name); - seq_printf(s, "Dev %s has config of %s in state %s:\n", - dbg->dev_name, dbg->pin_name, dbg->state_name); - pinconf_show_config(s, pctldev, found->data.configs.configs, - found->data.configs.num_configs); - -exit: - mutex_unlock(&pinctrl_maps_mutex); - - return 0; -} - -/** - * pinconf_dbg_config_write() - modify the pinctrl config in the pinctrl - * map, of a dev/pin/state entry based on user entries to pinconf-config - * @user_buf: contains the modification request with expected format: - * modify - * modify is literal string, alternatives like add/delete not supported yet - * is the configuration to be changed. Supported configs are - * "config_pin" or "config_group", alternatives like config_mux are not - * supported yet. - * are values that should match the pinctrl-maps - * reflects the new config and is driver dependent - */ -static ssize_t pinconf_dbg_config_write(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - struct pinctrl_maps *maps_node; - const struct pinctrl_map *map; - const struct pinctrl_map *found = NULL; - struct pinctrl_dev *pctldev; - const struct pinconf_ops *confops = NULL; - struct dbg_cfg *dbg = &pinconf_dbg_conf; - const struct pinctrl_map_configs *configs; - char config[MAX_NAME_LEN + 1]; - char buf[128]; - char *b = &buf[0]; - int buf_size; - char *token; - int i; - - /* Get userspace string and assure termination */ - buf_size = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - buf[buf_size] = 0; - - /* - * need to parse entry and extract parameters: - * modify configs_pin devicename state pinname newvalue - */ - - /* Get arg: 'modify' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strcmp(token, "modify")) - return -EINVAL; - - /* - * Get arg type: "config_pin" and "config_group" - * types are supported so far - */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (!strcmp(token, "config_pin")) - dbg->map_type = PIN_MAP_TYPE_CONFIGS_PIN; - else if (!strcmp(token, "config_group")) - dbg->map_type = PIN_MAP_TYPE_CONFIGS_GROUP; - else - return -EINVAL; - - /* get arg 'device_name' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(dbg->dev_name, token, MAX_NAME_LEN); - - /* get arg 'state_name' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(dbg->state_name, token, MAX_NAME_LEN); - - /* get arg 'pin_name' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(dbg->pin_name, token, MAX_NAME_LEN); - - /* get new_value of config' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(config, token, MAX_NAME_LEN); - - mutex_lock(&pinctrl_maps_mutex); - - /* Parse the pinctrl map and look for the selected dev/state/pin */ - for_each_maps(maps_node, i, map) { - if (strcmp(map->dev_name, dbg->dev_name)) - continue; - if (map->type != dbg->map_type) - continue; - if (strcmp(map->name, dbg->state_name)) - continue; - - /* we found the right pin / state, so overwrite config */ - if (!strcmp(map->data.configs.group_or_pin, dbg->pin_name)) { - found = map; - break; - } - } - - if (!found) { - count = -EINVAL; - goto exit; - } - - pctldev = get_pinctrl_dev_from_devname(found->ctrl_dev_name); - if (pctldev) - confops = pctldev->desc->confops; - - if (confops && confops->pin_config_dbg_parse_modify) { - configs = &found->data.configs; - for (i = 0; i < configs->num_configs; i++) { - confops->pin_config_dbg_parse_modify(pctldev, - config, - &configs->configs[i]); - } - } - -exit: - mutex_unlock(&pinctrl_maps_mutex); - - return count; -} - -static int pinconf_dbg_config_open(struct inode *inode, struct file *file) -{ - return single_open(file, pinconf_dbg_config_print, inode->i_private); -} - -static const struct file_operations pinconf_dbg_pinconfig_fops = { - .open = pinconf_dbg_config_open, - .write = pinconf_dbg_config_write, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - void pinconf_init_device_debugfs(struct dentry *devroot, struct pinctrl_dev *pctldev) { @@ -595,8 +375,6 @@ void pinconf_init_device_debugfs(struct dentry *devroot, devroot, pctldev, &pinconf_pins_fops); debugfs_create_file("pinconf-groups", S_IFREG | S_IRUGO, devroot, pctldev, &pinconf_groups_fops); - debugfs_create_file("pinconf-config", (S_IRUGO | S_IWUSR | S_IWGRP), - devroot, pctldev, &pinconf_dbg_pinconfig_fops); } #endif diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index 109468d9d849..93c9dd133e9d 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -29,7 +29,6 @@ struct seq_file; * @pin_config_group_get: get configurations for an entire pin group; should * return -ENOTSUPP and -EINVAL using the same rules as pin_config_get. * @pin_config_group_set: configure all pins in a group - * @pin_config_dbg_parse_modify: optional debugfs to modify a pin configuration * @pin_config_dbg_show: optional debugfs display hook that will provide * per-device info for a certain pin in debugfs * @pin_config_group_dbg_show: optional debugfs display hook that will provide @@ -55,9 +54,6 @@ struct pinconf_ops { unsigned selector, unsigned long *configs, unsigned num_configs); - int (*pin_config_dbg_parse_modify) (struct pinctrl_dev *pctldev, - const char *arg, - unsigned long *config); void (*pin_config_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, unsigned offset); -- cgit From 2b6e492467c78183bb629bb0a100ea3509b615a5 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 23 Jan 2019 17:44:16 +0300 Subject: device property: Fix the length used in PROPERTY_ENTRY_STRING() With string type property entries we need to use sizeof(const char *) instead of the number of characters as the length of the entry. If the string was shorter then sizeof(const char *), attempts to read it would have failed with -EOVERFLOW. The problem has been hidden because all build-in string properties have had a string longer then 8 characters until now. Fixes: a85f42047533 ("device property: helper macros for property entry creation") Cc: 4.5+ # 4.5+ Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 3789ec755fb6..65d3420dd5d1 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -258,7 +258,7 @@ struct property_entry { #define PROPERTY_ENTRY_STRING(_name_, _val_) \ (struct property_entry) { \ .name = _name_, \ - .length = sizeof(_val_), \ + .length = sizeof(const char *), \ .type = DEV_PROP_STRING, \ { .value = { .str = _val_ } }, \ } -- cgit From 625c85a62cb7d3c79f6e16de3cfa972033658250 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 Jan 2019 12:53:07 +0530 Subject: cpufreq: Use struct kobj_attribute instead of struct global_attr The cpufreq_global_kobject is created using kobject_create_and_add() helper, which assigns the kobj_type as dynamic_kobj_ktype and show/store routines are set to kobj_attr_show() and kobj_attr_store(). These routines pass struct kobj_attribute as an argument to the show/store callbacks. But all the cpufreq files created using the cpufreq_global_kobject expect the argument to be of type struct attribute. Things work fine currently as no one accesses the "attr" argument. We may not see issues even if the argument is used, as struct kobj_attribute has struct attribute as its first element and so they will both get same address. But this is logically incorrect and we should rather use struct kobj_attribute instead of struct global_attr in the cpufreq core and drivers and the show/store callbacks should take struct kobj_attribute as argument instead. This bug is caught using CFI CLANG builds in android kernel which catches mismatch in function prototypes for such callbacks. Reported-by: Donghee Han Reported-by: Sangkyu Kim Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- drivers/cpufreq/intel_pstate.c | 23 ++++++++++++----------- include/linux/cpufreq.h | 12 ++---------- 3 files changed, 17 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a8fa684f5f90..3eff158d9750 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -545,13 +545,13 @@ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); * SYSFS INTERFACE * *********************************************************************/ static ssize_t show_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled); } -static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) +static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) { int ret, enable; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dd66decf2087..5ab6a4fe93aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -895,7 +895,7 @@ static void intel_pstate_update_policies(void) /************************** sysfs begin ************************/ #define show_one(file_name, object) \ static ssize_t show_##file_name \ - (struct kobject *kobj, struct attribute *attr, char *buf) \ + (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", global.object); \ } @@ -904,7 +904,7 @@ static ssize_t intel_pstate_show_status(char *buf); static int intel_pstate_update_status(const char *buf, size_t size); static ssize_t show_status(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -915,7 +915,7 @@ static ssize_t show_status(struct kobject *kobj, return ret; } -static ssize_t store_status(struct kobject *a, struct attribute *b, +static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { char *p = memchr(buf, '\n', count); @@ -929,7 +929,7 @@ static ssize_t store_status(struct kobject *a, struct attribute *b, } static ssize_t show_turbo_pct(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total, no_turbo, turbo_pct; @@ -955,7 +955,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, } static ssize_t show_num_pstates(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total; @@ -976,7 +976,7 @@ static ssize_t show_num_pstates(struct kobject *kobj, } static ssize_t show_no_turbo(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -998,7 +998,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return ret; } -static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, +static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1045,7 +1045,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1075,7 +1075,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1107,12 +1107,13 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, } static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", hwp_boost); } -static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b, +static ssize_t store_hwp_dynamic_boost(struct kobject *a, + struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bd7fbd6a4478..c19142911554 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -254,20 +254,12 @@ __ATTR(_name, 0644, show_##_name, store_##_name) static struct freq_attr _name = \ __ATTR(_name, 0200, NULL, store_##_name) -struct global_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); - ssize_t (*store)(struct kobject *a, struct attribute *b, - const char *c, size_t count); -}; - #define define_one_global_ro(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0444, show_##_name, NULL) #define define_one_global_rw(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) -- cgit From bc3843d4d357061d92e7800c7da342e2d068772c Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Fri, 25 Jan 2019 13:16:52 +0530 Subject: firmware: xilinx: Add reset API's This Patch Adds reset API's to support release, assert and status functionalities by using firmware interface. Signed-off-by: Nava kishore Manne Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 40 +++++++++++ include/linux/firmware/xlnx-zynqmp.h | 136 +++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 9a1c72a9280f..70b50377ae5f 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -469,6 +469,44 @@ static int zynqmp_pm_ioctl(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2, arg1, arg2, out); } +/** + * zynqmp_pm_reset_assert - Request setting of reset (1 - assert, 0 - release) + * @reset: Reset to be configured + * @assert_flag: Flag stating should reset be asserted (1) or + * released (0) + * + * Return: Returns status, either success or error+reason + */ +static int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, + const enum zynqmp_pm_reset_action assert_flag) +{ + return zynqmp_pm_invoke_fn(PM_RESET_ASSERT, reset, assert_flag, + 0, 0, NULL); +} + +/** + * zynqmp_pm_reset_get_status - Get status of the reset + * @reset: Reset whose status should be returned + * @status: Returned status + * + * Return: Returns status, either success or error+reason + */ +static int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, + u32 *status) +{ + u32 ret_payload[PAYLOAD_ARG_CNT]; + int ret; + + if (!status) + return -EINVAL; + + ret = zynqmp_pm_invoke_fn(PM_RESET_GET_STATUS, reset, 0, + 0, 0, ret_payload); + *status = ret_payload[1]; + + return ret; +} + static const struct zynqmp_eemi_ops eemi_ops = { .get_api_version = zynqmp_pm_get_api_version, .query_data = zynqmp_pm_query_data, @@ -482,6 +520,8 @@ static const struct zynqmp_eemi_ops eemi_ops = { .clock_setparent = zynqmp_pm_clock_setparent, .clock_getparent = zynqmp_pm_clock_getparent, .ioctl = zynqmp_pm_ioctl, + .reset_assert = zynqmp_pm_reset_assert, + .reset_get_status = zynqmp_pm_reset_get_status, }; /** diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 3c3c28eff56a..07c587a0b06e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -34,6 +34,8 @@ enum pm_api_id { PM_GET_API_VERSION = 1, + PM_RESET_ASSERT = 17, + PM_RESET_GET_STATUS, PM_IOCTL = 34, PM_QUERY_DATA, PM_CLOCK_ENABLE, @@ -75,6 +77,137 @@ enum pm_query_id { PM_QID_CLOCK_GET_NUM_CLOCKS = 12, }; +enum zynqmp_pm_reset_action { + PM_RESET_ACTION_RELEASE, + PM_RESET_ACTION_ASSERT, + PM_RESET_ACTION_PULSE, +}; + +enum zynqmp_pm_reset { + ZYNQMP_PM_RESET_START = 1000, + ZYNQMP_PM_RESET_PCIE_CFG = ZYNQMP_PM_RESET_START, + ZYNQMP_PM_RESET_PCIE_BRIDGE, + ZYNQMP_PM_RESET_PCIE_CTRL, + ZYNQMP_PM_RESET_DP, + ZYNQMP_PM_RESET_SWDT_CRF, + ZYNQMP_PM_RESET_AFI_FM5, + ZYNQMP_PM_RESET_AFI_FM4, + ZYNQMP_PM_RESET_AFI_FM3, + ZYNQMP_PM_RESET_AFI_FM2, + ZYNQMP_PM_RESET_AFI_FM1, + ZYNQMP_PM_RESET_AFI_FM0, + ZYNQMP_PM_RESET_GDMA, + ZYNQMP_PM_RESET_GPU_PP1, + ZYNQMP_PM_RESET_GPU_PP0, + ZYNQMP_PM_RESET_GPU, + ZYNQMP_PM_RESET_GT, + ZYNQMP_PM_RESET_SATA, + ZYNQMP_PM_RESET_ACPU3_PWRON, + ZYNQMP_PM_RESET_ACPU2_PWRON, + ZYNQMP_PM_RESET_ACPU1_PWRON, + ZYNQMP_PM_RESET_ACPU0_PWRON, + ZYNQMP_PM_RESET_APU_L2, + ZYNQMP_PM_RESET_ACPU3, + ZYNQMP_PM_RESET_ACPU2, + ZYNQMP_PM_RESET_ACPU1, + ZYNQMP_PM_RESET_ACPU0, + ZYNQMP_PM_RESET_DDR, + ZYNQMP_PM_RESET_APM_FPD, + ZYNQMP_PM_RESET_SOFT, + ZYNQMP_PM_RESET_GEM0, + ZYNQMP_PM_RESET_GEM1, + ZYNQMP_PM_RESET_GEM2, + ZYNQMP_PM_RESET_GEM3, + ZYNQMP_PM_RESET_QSPI, + ZYNQMP_PM_RESET_UART0, + ZYNQMP_PM_RESET_UART1, + ZYNQMP_PM_RESET_SPI0, + ZYNQMP_PM_RESET_SPI1, + ZYNQMP_PM_RESET_SDIO0, + ZYNQMP_PM_RESET_SDIO1, + ZYNQMP_PM_RESET_CAN0, + ZYNQMP_PM_RESET_CAN1, + ZYNQMP_PM_RESET_I2C0, + ZYNQMP_PM_RESET_I2C1, + ZYNQMP_PM_RESET_TTC0, + ZYNQMP_PM_RESET_TTC1, + ZYNQMP_PM_RESET_TTC2, + ZYNQMP_PM_RESET_TTC3, + ZYNQMP_PM_RESET_SWDT_CRL, + ZYNQMP_PM_RESET_NAND, + ZYNQMP_PM_RESET_ADMA, + ZYNQMP_PM_RESET_GPIO, + ZYNQMP_PM_RESET_IOU_CC, + ZYNQMP_PM_RESET_TIMESTAMP, + ZYNQMP_PM_RESET_RPU_R50, + ZYNQMP_PM_RESET_RPU_R51, + ZYNQMP_PM_RESET_RPU_AMBA, + ZYNQMP_PM_RESET_OCM, + ZYNQMP_PM_RESET_RPU_PGE, + ZYNQMP_PM_RESET_USB0_CORERESET, + ZYNQMP_PM_RESET_USB1_CORERESET, + ZYNQMP_PM_RESET_USB0_HIBERRESET, + ZYNQMP_PM_RESET_USB1_HIBERRESET, + ZYNQMP_PM_RESET_USB0_APB, + ZYNQMP_PM_RESET_USB1_APB, + ZYNQMP_PM_RESET_IPI, + ZYNQMP_PM_RESET_APM_LPD, + ZYNQMP_PM_RESET_RTC, + ZYNQMP_PM_RESET_SYSMON, + ZYNQMP_PM_RESET_AFI_FM6, + ZYNQMP_PM_RESET_LPD_SWDT, + ZYNQMP_PM_RESET_FPD, + ZYNQMP_PM_RESET_RPU_DBG1, + ZYNQMP_PM_RESET_RPU_DBG0, + ZYNQMP_PM_RESET_DBG_LPD, + ZYNQMP_PM_RESET_DBG_FPD, + ZYNQMP_PM_RESET_APLL, + ZYNQMP_PM_RESET_DPLL, + ZYNQMP_PM_RESET_VPLL, + ZYNQMP_PM_RESET_IOPLL, + ZYNQMP_PM_RESET_RPLL, + ZYNQMP_PM_RESET_GPO3_PL_0, + ZYNQMP_PM_RESET_GPO3_PL_1, + ZYNQMP_PM_RESET_GPO3_PL_2, + ZYNQMP_PM_RESET_GPO3_PL_3, + ZYNQMP_PM_RESET_GPO3_PL_4, + ZYNQMP_PM_RESET_GPO3_PL_5, + ZYNQMP_PM_RESET_GPO3_PL_6, + ZYNQMP_PM_RESET_GPO3_PL_7, + ZYNQMP_PM_RESET_GPO3_PL_8, + ZYNQMP_PM_RESET_GPO3_PL_9, + ZYNQMP_PM_RESET_GPO3_PL_10, + ZYNQMP_PM_RESET_GPO3_PL_11, + ZYNQMP_PM_RESET_GPO3_PL_12, + ZYNQMP_PM_RESET_GPO3_PL_13, + ZYNQMP_PM_RESET_GPO3_PL_14, + ZYNQMP_PM_RESET_GPO3_PL_15, + ZYNQMP_PM_RESET_GPO3_PL_16, + ZYNQMP_PM_RESET_GPO3_PL_17, + ZYNQMP_PM_RESET_GPO3_PL_18, + ZYNQMP_PM_RESET_GPO3_PL_19, + ZYNQMP_PM_RESET_GPO3_PL_20, + ZYNQMP_PM_RESET_GPO3_PL_21, + ZYNQMP_PM_RESET_GPO3_PL_22, + ZYNQMP_PM_RESET_GPO3_PL_23, + ZYNQMP_PM_RESET_GPO3_PL_24, + ZYNQMP_PM_RESET_GPO3_PL_25, + ZYNQMP_PM_RESET_GPO3_PL_26, + ZYNQMP_PM_RESET_GPO3_PL_27, + ZYNQMP_PM_RESET_GPO3_PL_28, + ZYNQMP_PM_RESET_GPO3_PL_29, + ZYNQMP_PM_RESET_GPO3_PL_30, + ZYNQMP_PM_RESET_GPO3_PL_31, + ZYNQMP_PM_RESET_RPU_LS, + ZYNQMP_PM_RESET_PS_ONLY, + ZYNQMP_PM_RESET_PL, + ZYNQMP_PM_RESET_PS_PL0, + ZYNQMP_PM_RESET_PS_PL1, + ZYNQMP_PM_RESET_PS_PL2, + ZYNQMP_PM_RESET_PS_PL3, + ZYNQMP_PM_RESET_END = ZYNQMP_PM_RESET_PS_PL3 +}; + /** * struct zynqmp_pm_query_data - PM query data * @qid: query ID @@ -102,6 +235,9 @@ struct zynqmp_eemi_ops { int (*clock_setparent)(u32 clock_id, u32 parent_id); int (*clock_getparent)(u32 clock_id, u32 *parent_id); int (*ioctl)(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2, u32 *out); + int (*reset_assert)(const enum zynqmp_pm_reset reset, + const enum zynqmp_pm_reset_action assert_flag); + int (*reset_get_status)(const enum zynqmp_pm_reset reset, u32 *status); }; #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP) -- cgit From 15917dc02841862840efcbfe1da0830f88078b5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Dec 2018 13:04:41 +0100 Subject: sched: Remove stale PF_MUTEX_TESTER bit The RTMUTEX tester was removed long ago but the PF bit stayed around. Remove it and free up the space. Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d2f90fa92468..e2bba022827d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1409,7 +1409,6 @@ extern struct pid *cad_pid; #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ -- cgit From 71368af9027f18fe5d1c6f372cfdff7e4bde8b48 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 16 Jan 2019 17:01:36 -0500 Subject: x86/speculation: Add PR_SPEC_DISABLE_NOEXEC With the default SPEC_STORE_BYPASS_SECCOMP/SPEC_STORE_BYPASS_PRCTL mode, the TIF_SSBD bit will be inherited when a new task is fork'ed or cloned. It will also remain when a new program is execve'ed. Only certain class of applications (like Java) that can run on behalf of multiple users on a single thread will require disabling speculative store bypass for security purposes. Those applications will call prctl(2) at startup time to disable SSB. They won't rely on the fact the SSB might have been disabled. Other applications that don't need SSBD will just move on without checking if SSBD has been turned on or not. The fact that the TIF_SSBD is inherited across execve(2) boundary will cause performance of applications that don't need SSBD but their predecessors have SSBD on to be unwittingly impacted especially if they write to memory a lot. To remedy this problem, a new PR_SPEC_DISABLE_NOEXEC argument for the PR_SET_SPECULATION_CTRL option of prctl(2) is added to allow applications to specify that the SSBD feature bit on the task structure should be cleared whenever a new program is being execve'ed. Suggested-by: Thomas Gleixner Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: David Woodhouse Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Tim Chen Cc: KarimAllah Ahmed Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Link: https://lkml.kernel.org/r/1547676096-3281-1-git-send-email-longman@redhat.com --- Documentation/userspace-api/spec_ctrl.rst | 27 +++++++++++++++------------ arch/x86/kernel/cpu/bugs.c | 12 ++++++++++++ arch/x86/kernel/process.c | 12 ++++++++++++ include/linux/sched.h | 5 +++++ include/uapi/linux/prctl.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 6 files changed, 46 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst index c4dbe6f7cdae..1129c7550a48 100644 --- a/Documentation/userspace-api/spec_ctrl.rst +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -28,18 +28,20 @@ PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature which is selected with arg2 of prctl(2). The return value uses bits 0-3 with the following meaning: -==== ===================== =================================================== -Bit Define Description -==== ===================== =================================================== -0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL. -1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled. -2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled. -3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A - subsequent prctl(..., PR_SPEC_ENABLE) will fail. -==== ===================== =================================================== +==== ====================== ================================================== +Bit Define Description +==== ====================== ================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL. +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled. +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled. +3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A + subsequent prctl(..., PR_SPEC_ENABLE) will fail. +4 PR_SPEC_DISABLE_NOEXEC Same as PR_SPEC_DISABLE, but the state will be + cleared on :manpage:`execve(2)`. +==== ====================== ================================================== If all bits are 0 the CPU is not affected by the speculation misfeature. @@ -92,6 +94,7 @@ Speculation misfeature controls * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE_NOEXEC, 0, 0); - PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes (Mitigate Spectre V2 style attacks against user processes) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1de0f4170178..2faeaf46347a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -798,15 +798,25 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); + task_clear_spec_ssb_noexec(task); task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); + task_clear_spec_ssb_noexec(task); task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); + task_clear_spec_ssb_noexec(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE_NOEXEC: + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_set_spec_ssb_disable(task); + task_set_spec_ssb_noexec(task); task_update_spec_tif(task); break; default: @@ -885,6 +895,8 @@ static int ssb_prctl_get(struct task_struct *task) case SPEC_STORE_BYPASS_PRCTL: if (task_spec_ssb_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_noexec(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC; if (task_spec_ssb_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 90ae0ca51083..58ac7be52c7a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -255,6 +255,18 @@ void arch_setup_new_exec(void) /* If cpuid was previously disabled for this task, re-enable it. */ if (test_thread_flag(TIF_NOCPUID)) enable_cpuid(); + + /* + * Don't inherit TIF_SSBD across exec boundary when + * PR_SPEC_DISABLE_NOEXEC is used. + */ + if (test_thread_flag(TIF_SSBD) && + task_spec_ssb_noexec(current)) { + clear_thread_flag(TIF_SSBD); + task_clear_spec_ssb_disable(current); + task_clear_spec_ssb_noexec(current); + speculation_ctrl_update(task_thread_info(current)->flags); + } } static inline void switch_to_bitmap(struct thread_struct *prev, diff --git a/include/linux/sched.h b/include/linux/sched.h index d2f90fa92468..fc836dc71bba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1459,6 +1459,7 @@ static inline bool is_percpu_thread(void) #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ +#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1487,6 +1488,10 @@ TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec) +TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec) +TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec) + TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index b4875a93363a..094bb03b9cc2 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -219,6 +219,7 @@ struct prctl_mm_map { # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) # define PR_SPEC_FORCE_DISABLE (1UL << 3) +# define PR_SPEC_DISABLE_NOEXEC (1UL << 4) /* Reset arm64 pointer authentication keys */ #define PR_PAC_RESET_KEYS 54 diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index b4875a93363a..094bb03b9cc2 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -219,6 +219,7 @@ struct prctl_mm_map { # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) # define PR_SPEC_FORCE_DISABLE (1UL << 3) +# define PR_SPEC_DISABLE_NOEXEC (1UL << 4) /* Reset arm64 pointer authentication keys */ #define PR_PAC_RESET_KEYS 54 -- cgit From fab940755d1d78377901450b6ee7c77356e06821 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 27 Jan 2019 14:03:57 +0100 Subject: x86/hw_breakpoints, kprobes: Remove kprobes ifdeffery Remove the ifdeffery in the breakpoint parsing arch_build_bp_info() by adding a within_kprobe_blacklist() stub for the !CONFIG_KPROBES case. It is returning true when kprobes are not enabled to mean that any address is within the kprobes blacklist on such kernels and thus not allow kernel breakpoints on non-kprobes kernels. Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Frederic Weisbecker Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Naveen N. Rao" Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190127131237.4557-1-bp@alien8.de --- arch/x86/kernel/hw_breakpoint.c | 4 ---- include/linux/kprobes.h | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 7d6f91f2869a..ff9bfd40429e 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -261,12 +261,8 @@ static int arch_build_bp_info(struct perf_event *bp, * allow kernel breakpoints at all. */ if (attr->bp_addr >= TASK_SIZE_MAX) { -#ifdef CONFIG_KPROBES if (within_kprobe_blacklist(attr->bp_addr)) return -EINVAL; -#else - return -EINVAL; -#endif } hw->type = X86_BREAKPOINT_EXECUTE; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e07e91daaacc..201f0f2683f2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -442,6 +442,11 @@ static inline int enable_kprobe(struct kprobe *kp) { return -ENOSYS; } + +static inline bool within_kprobe_blacklist(unsigned long addr) +{ + return true; +} #endif /* CONFIG_KPROBES */ static inline int disable_kretprobe(struct kretprobe *rp) { -- cgit From 5c238a8b599f1ae25eaeb08ad0e9e13e2b9eb023 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Wed, 30 Jan 2019 10:52:01 +0530 Subject: cpufreq: Auto-register the driver as a thermal cooling device if asked All cpufreq drivers do similar things to register as a cooling device. Provide a cpufreq driver flag so drivers can just ask the cpufreq core to register the cooling device on their behalf. This allows us to get rid of duplicated code in the drivers. In order to allow this, we add a struct thermal_cooling_device pointer to struct cpufreq_policy so that drivers don't need to store it in a private data structure. Suggested-by: Stephen Boyd Suggested-by: Viresh Kumar Signed-off-by: Amit Kucheria Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Acked-by: Viresh Kumar Reviewed-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 11 +++++++++++ include/linux/cpufreq.h | 9 +++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3eff158d9750..96a69c67a545 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -1316,6 +1317,10 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); + if (IS_ENABLED(CONFIG_CPU_THERMAL) && + cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) + policy->cdev = of_cpufreq_cooling_register(policy); + pr_debug("initialization complete\n"); return 0; @@ -1403,6 +1408,12 @@ static int cpufreq_offline(unsigned int cpu) goto unlock; } + if (IS_ENABLED(CONFIG_CPU_THERMAL) && + cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } + if (cpufreq_driver->stop_cpu) cpufreq_driver->stop_cpu(policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c19142911554..9db074ecbbd7 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -151,6 +151,9 @@ struct cpufreq_policy { /* For cpufreq driver's internal use */ void *driver_data; + + /* Pointer to the cooling device if used for thermal mitigation */ + struct thermal_cooling_device *cdev; }; /* Only for ACPI */ @@ -378,6 +381,12 @@ struct cpufreq_driver { */ #define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING BIT(6) +/* + * Set by drivers that want the core to automatically register the cpufreq + * driver as a thermal cooling device. + */ +#define CPUFREQ_IS_COOLING_DEV BIT(7) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit From 9bc61ab18b1d41f26dc06b9e6d3c203e65f83fe6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 4 Nov 2018 03:19:03 -0500 Subject: vfs: Introduce fs_context, switch vfs_kern_mount() to it. Introduce a filesystem context concept to be used during superblock creation for mount and superblock reconfiguration for remount. This is allocated at the beginning of the mount procedure and into it is placed: (1) Filesystem type. (2) Namespaces. (3) Source/Device names (there may be multiple). (4) Superblock flags (SB_*). (5) Security details. (6) Filesystem-specific data, as set by the mount options. Accessor functions are then provided to set up a context, parameterise it from monolithic mount data (the data page passed to mount(2)) and tear it down again. A legacy wrapper is provided that implements what will be the basic operations, wrapping access to filesystems that aren't yet aware of the fs_context. Finally, vfs_kern_mount() is changed to make use of the fs_context and mount_fs() is replaced by vfs_get_tree(), called from vfs_kern_mount(). [AV -- add missing kstrdup()] [AV -- put_cred() can be unconditional - fc->cred can't be NULL] [AV -- take legacy_validate() contents into legacy_parse_monolithic()] [AV -- merge KERNEL_MOUNT and USER_MOUNT] [AV -- don't unlock superblock on success return from vfs_get_tree()] [AV -- kill 'reference' argument of init_fs_context()] Signed-off-by: David Howells Co-developed-by: Al Viro Signed-off-by: Al Viro --- fs/Makefile | 3 +- fs/fs_context.c | 182 +++++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 9 ++- fs/namespace.c | 46 ++++++++---- fs/super.c | 50 ++++++------- include/linux/fs_context.h | 64 ++++++++++++++++ 6 files changed, 310 insertions(+), 44 deletions(-) create mode 100644 fs/fs_context.c create mode 100644 include/linux/fs_context.h (limited to 'include/linux') diff --git a/fs/Makefile b/fs/Makefile index 293733f61594..5563cf34f7c2 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -12,7 +12,8 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ - stack.o fs_struct.o statfs.o fs_pin.o nsfs.o + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ + fs_context.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/fs_context.c b/fs/fs_context.c new file mode 100644 index 000000000000..4294091b689d --- /dev/null +++ b/fs/fs_context.c @@ -0,0 +1,182 @@ +/* Provide a way to create a superblock configuration context within the kernel + * that allows a superblock to be set up prior to mounting. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mount.h" +#include "internal.h" + +struct legacy_fs_context { + char *legacy_data; /* Data page for legacy filesystems */ + size_t data_size; +}; + +static int legacy_init_fs_context(struct fs_context *fc); + +/** + * alloc_fs_context - Create a filesystem context. + * @fs_type: The filesystem type. + * @reference: The dentry from which this one derives (or NULL) + * @sb_flags: Filesystem/superblock flags (SB_*) + * @sb_flags_mask: Applicable members of @sb_flags + * @purpose: The purpose that this configuration shall be used for. + * + * Open a filesystem and create a mount context. The mount context is + * initialised with the supplied flags and, if a submount/automount from + * another superblock (referred to by @reference) is supplied, may have + * parameters such as namespaces copied across from that superblock. + */ +static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, + struct dentry *reference, + unsigned int sb_flags, + unsigned int sb_flags_mask, + enum fs_context_purpose purpose) +{ + struct fs_context *fc; + int ret = -ENOMEM; + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->purpose = purpose; + fc->sb_flags = sb_flags; + fc->sb_flags_mask = sb_flags_mask; + fc->fs_type = get_filesystem(fs_type); + fc->cred = get_current_cred(); + fc->net_ns = get_net(current->nsproxy->net_ns); + + switch (purpose) { + case FS_CONTEXT_FOR_MOUNT: + fc->user_ns = get_user_ns(fc->cred->user_ns); + break; + } + + ret = legacy_init_fs_context(fc); + if (ret < 0) + goto err_fc; + fc->need_free = true; + return fc; + +err_fc: + put_fs_context(fc); + return ERR_PTR(ret); +} + +struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, + unsigned int sb_flags) +{ + return alloc_fs_context(fs_type, NULL, sb_flags, 0, + FS_CONTEXT_FOR_MOUNT); +} +EXPORT_SYMBOL(fs_context_for_mount); + +static void legacy_fs_context_free(struct fs_context *fc); +/** + * put_fs_context - Dispose of a superblock configuration context. + * @fc: The context to dispose of. + */ +void put_fs_context(struct fs_context *fc) +{ + struct super_block *sb; + + if (fc->root) { + sb = fc->root->d_sb; + dput(fc->root); + fc->root = NULL; + deactivate_super(sb); + } + + if (fc->need_free) + legacy_fs_context_free(fc); + + security_free_mnt_opts(&fc->security); + if (fc->net_ns) + put_net(fc->net_ns); + put_user_ns(fc->user_ns); + put_cred(fc->cred); + kfree(fc->subtype); + put_filesystem(fc->fs_type); + kfree(fc->source); + kfree(fc); +} +EXPORT_SYMBOL(put_fs_context); + +/* + * Free the config for a filesystem that doesn't support fs_context. + */ +static void legacy_fs_context_free(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +/* + * Add monolithic mount data. + */ +static int legacy_parse_monolithic(struct fs_context *fc, void *data) +{ + struct legacy_fs_context *ctx = fc->fs_private; + ctx->legacy_data = data; + if (!ctx->legacy_data) + return 0; + if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) + return 0; + return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security); +} + +/* + * Get a mountable root with the legacy mount command. + */ +int legacy_get_tree(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb; + struct dentry *root; + + root = fc->fs_type->mount(fc->fs_type, fc->sb_flags, + fc->source, ctx->legacy_data); + if (IS_ERR(root)) + return PTR_ERR(root); + + sb = root->d_sb; + BUG_ON(!sb); + + fc->root = root; + return 0; +} + +/* + * Initialise a legacy context for a filesystem that doesn't support + * fs_context. + */ +static int legacy_init_fs_context(struct fs_context *fc) +{ + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); + if (!fc->fs_private) + return -ENOMEM; + return 0; +} + +int parse_monolithic_mount_data(struct fs_context *fc, void *data) +{ + return legacy_parse_monolithic(fc, data); +} diff --git a/fs/internal.h b/fs/internal.h index d410186bc369..f85c3212d25d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -17,6 +17,7 @@ struct linux_binprm; struct path; struct mount; struct shrink_control; +struct fs_context; /* * block_dev.c @@ -51,6 +52,12 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, */ extern void __init chrdev_init(void); +/* + * fs_context.c + */ +extern int legacy_get_tree(struct fs_context *fc); +extern int parse_monolithic_mount_data(struct fs_context *, void *); + /* * namei.c */ @@ -101,8 +108,6 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); */ extern int do_remount_sb(struct super_block *, int, void *, int); extern bool trylock_super(struct super_block *sb); -extern struct dentry *mount_fs(struct file_system_type *, - int, const char *, void *); extern struct super_block *user_get_super(dev_t); /* diff --git a/fs/namespace.c b/fs/namespace.c index f0b8a8ca08df..3f2fd7a34733 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "pnode.h" #include "internal.h" @@ -940,36 +941,53 @@ static struct mount *skip_mnt_tree(struct mount *p) return p; } -struct vfsmount * -vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) +struct vfsmount *vfs_kern_mount(struct file_system_type *type, + int flags, const char *name, + void *data) { + struct fs_context *fc; struct mount *mnt; - struct dentry *root; + int ret = 0; if (!type) return ERR_PTR(-ENODEV); + fc = fs_context_for_mount(type, flags); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + if (name) { + fc->source = kstrdup(name, GFP_KERNEL); + if (!fc->source) + ret = -ENOMEM; + } + if (!ret) + ret = parse_monolithic_mount_data(fc, data); + if (!ret) + ret = vfs_get_tree(fc); + if (ret) { + put_fs_context(fc); + return ERR_PTR(ret); + } + up_write(&fc->root->d_sb->s_umount); mnt = alloc_vfsmnt(name); - if (!mnt) + if (!mnt) { + put_fs_context(fc); return ERR_PTR(-ENOMEM); + } if (flags & SB_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; - root = mount_fs(type, flags, name, data); - if (IS_ERR(root)) { - mnt_free_id(mnt); - free_vfsmnt(mnt); - return ERR_CAST(root); - } - - mnt->mnt.mnt_root = root; - mnt->mnt.mnt_sb = root->d_sb; + atomic_inc(&fc->root->d_sb->s_active); + mnt->mnt.mnt_root = dget(fc->root); + mnt->mnt.mnt_sb = fc->root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; lock_mount_hash(); - list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); + list_add_tail(&mnt->mnt_instance, &fc->root->d_sb->s_mounts); unlock_mount_hash(); + put_fs_context(fc); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); diff --git a/fs/super.c b/fs/super.c index 48e25eba8465..fc3887277ad1 100644 --- a/fs/super.c +++ b/fs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1241,27 +1242,24 @@ struct dentry *mount_single(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_single); -struct dentry * -mount_fs(struct file_system_type *type, int flags, const char *name, void *data) +/** + * vfs_get_tree - Get the mountable root + * @fc: The superblock configuration context. + * + * The filesystem is invoked to get or create a superblock which can then later + * be used for mounting. The filesystem places a pointer to the root to be + * used for mounting in @fc->root. + */ +int vfs_get_tree(struct fs_context *fc) { - struct dentry *root; struct super_block *sb; - int error = -ENOMEM; - void *sec_opts = NULL; + int error; - if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { - error = security_sb_eat_lsm_opts(data, &sec_opts); - if (error) - return ERR_PTR(error); - } + error = legacy_get_tree(fc); + if (error < 0) + return error; - root = type->mount(type, flags, name, data); - if (IS_ERR(root)) { - error = PTR_ERR(root); - goto out_free_secdata; - } - sb = root->d_sb; - BUG_ON(!sb); + sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); /* @@ -1273,11 +1271,11 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) smp_wmb(); sb->s_flags |= SB_BORN; - error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); + error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL); if (error) goto out_sb; - if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) { + if (!(fc->sb_flags & (MS_KERNMOUNT|MS_SUBMOUNT))) { error = security_sb_kern_mount(sb); if (error) goto out_sb; @@ -1290,18 +1288,16 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) * violate this rule. */ WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " - "negative value (%lld)\n", type->name, sb->s_maxbytes); + "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes); - up_write(&sb->s_umount); - security_free_mnt_opts(&sec_opts); - return root; + return 0; out_sb: - dput(root); + dput(fc->root); + fc->root = NULL; deactivate_locked_super(sb); -out_free_secdata: - security_free_mnt_opts(&sec_opts); - return ERR_PTR(error); + return error; } +EXPORT_SYMBOL(vfs_get_tree); /* * Setup private BDI for given superblock. It gets automatically cleaned up diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h new file mode 100644 index 000000000000..9805514444c9 --- /dev/null +++ b/include/linux/fs_context.h @@ -0,0 +1,64 @@ +/* Filesystem superblock creation and reconfiguration context. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_FS_CONTEXT_H +#define _LINUX_FS_CONTEXT_H + +#include +#include +#include + +struct cred; +struct dentry; +struct file_operations; +struct file_system_type; +struct net; +struct user_namespace; + +enum fs_context_purpose { + FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ +}; + +/* + * Filesystem context for holding the parameters used in the creation or + * reconfiguration of a superblock. + * + * Superblock creation fills in ->root whereas reconfiguration begins with this + * already set. + * + * See Documentation/filesystems/mounting.txt + */ +struct fs_context { + struct file_system_type *fs_type; + void *fs_private; /* The filesystem's context */ + struct dentry *root; /* The root and superblock */ + struct user_namespace *user_ns; /* The user namespace for this mount */ + struct net *net_ns; /* The network namespace for this mount */ + const struct cred *cred; /* The mounter's credentials */ + const char *source; /* The source name (eg. dev path) */ + const char *subtype; /* The subtype to set on the superblock */ + void *security; /* Linux S&M options */ + unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ + unsigned int sb_flags_mask; /* Superblock flags that were changed */ + enum fs_context_purpose purpose:8; + bool need_free:1; /* Need to call ops->free() */ +}; + +/* + * fs_context manipulation functions. + */ +extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, + unsigned int sb_flags); + +extern int vfs_get_tree(struct fs_context *fc); +extern void put_fs_context(struct fs_context *fc); + +#endif /* _LINUX_FS_CONTEXT_H */ -- cgit From 8f2918898eb5fe25845dde7f4a77bda0e2966e05 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Nov 2018 06:48:34 -0500 Subject: new helpers: vfs_create_mount(), fc_mount() Create a new helper, vfs_create_mount(), that creates a detached vfsmount object from an fs_context that has a superblock attached to it. Almost all uses will be paired with immediately preceding vfs_get_tree(); add a helper for such combination. Switch vfs_kern_mount() to use this. NOTE: mild behaviour change; passing NULL as 'device name' to something like procfs will change /proc/*/mountstats - "device none" instead on "no device". That is consistent with /proc/mounts et.al. [do'h - EXPORT_SYMBOL_GPL slipped in by mistake; removed] [AV -- remove confused comment from vfs_create_mount()] [AV -- removed the second argument] Reviewed-by: David Howells Signed-off-by: Al Viro --- fs/namespace.c | 76 +++++++++++++++++++++++++++++++++++---------------- include/linux/mount.h | 3 ++ 2 files changed, 55 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 3f2fd7a34733..156771f5745a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -941,12 +941,59 @@ static struct mount *skip_mnt_tree(struct mount *p) return p; } +/** + * vfs_create_mount - Create a mount for a configured superblock + * @fc: The configuration context with the superblock attached + * + * Create a mount to an already configured superblock. If necessary, the + * caller should invoke vfs_get_tree() before calling this. + * + * Note that this does not attach the mount to anything. + */ +struct vfsmount *vfs_create_mount(struct fs_context *fc) +{ + struct mount *mnt; + + if (!fc->root) + return ERR_PTR(-EINVAL); + + mnt = alloc_vfsmnt(fc->source ?: "none"); + if (!mnt) + return ERR_PTR(-ENOMEM); + + if (fc->sb_flags & SB_KERNMOUNT) + mnt->mnt.mnt_flags = MNT_INTERNAL; + + atomic_inc(&fc->root->d_sb->s_active); + mnt->mnt.mnt_sb = fc->root->d_sb; + mnt->mnt.mnt_root = dget(fc->root); + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + + lock_mount_hash(); + list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); + unlock_mount_hash(); + return &mnt->mnt; +} +EXPORT_SYMBOL(vfs_create_mount); + +struct vfsmount *fc_mount(struct fs_context *fc) +{ + int err = vfs_get_tree(fc); + if (!err) { + up_write(&fc->root->d_sb->s_umount); + return vfs_create_mount(fc); + } + return ERR_PTR(err); +} +EXPORT_SYMBOL(fc_mount); + struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct fs_context *fc; - struct mount *mnt; + struct vfsmount *mnt; int ret = 0; if (!type) @@ -964,31 +1011,12 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type, if (!ret) ret = parse_monolithic_mount_data(fc, data); if (!ret) - ret = vfs_get_tree(fc); - if (ret) { - put_fs_context(fc); - return ERR_PTR(ret); - } - up_write(&fc->root->d_sb->s_umount); - mnt = alloc_vfsmnt(name); - if (!mnt) { - put_fs_context(fc); - return ERR_PTR(-ENOMEM); - } - - if (flags & SB_KERNMOUNT) - mnt->mnt.mnt_flags = MNT_INTERNAL; + mnt = fc_mount(fc); + else + mnt = ERR_PTR(ret); - atomic_inc(&fc->root->d_sb->s_active); - mnt->mnt.mnt_root = dget(fc->root); - mnt->mnt.mnt_sb = fc->root->d_sb; - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - lock_mount_hash(); - list_add_tail(&mnt->mnt_instance, &fc->root->d_sb->s_mounts); - unlock_mount_hash(); put_fs_context(fc); - return &mnt->mnt; + return mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); diff --git a/include/linux/mount.h b/include/linux/mount.h index 037eed52164b..9197ddbf35fb 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -21,6 +21,7 @@ struct super_block; struct vfsmount; struct dentry; struct mnt_namespace; +struct fs_context; #define MNT_NOSUID 0x01 #define MNT_NODEV 0x02 @@ -88,6 +89,8 @@ struct path; extern struct vfsmount *clone_private_mount(const struct path *path); struct file_system_type; +extern struct vfsmount *fc_mount(struct fs_context *fc); +extern struct vfsmount *vfs_create_mount(struct fs_context *fc); extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); -- cgit From a0c9a8b8fd9fd572b0d60276beb2142c8f59f9b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Nov 2018 07:18:51 -0500 Subject: teach vfs_get_tree() to handle subtype, switch do_new_mount() to it Roll the handling of subtypes into do_new_mount() and vfs_get_tree(). The former determines any subtype string and hangs it off the fs_context; the latter applies it. Make do_new_mount() create, parameterise and commit an fs_context and create a mount for itself rather than calling vfs_kern_mount(). [AV -- missing kstrdup()] [AV -- ... and no kstrdup() if we get to setting ->s_submount - we simply transfer it from fc, leaving NULL behind] [AV -- constify ->s_submount, while we are at it] Reviewed-by: David Howells Signed-off-by: Al Viro --- fs/namespace.c | 77 ++++++++++++++++++++++++++++++++---------------------- fs/super.c | 5 ++++ include/linux/fs.h | 2 +- 3 files changed, 52 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 156771f5745a..0354cb6ac2d3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2479,29 +2479,6 @@ out: return err; } -static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) -{ - int err; - const char *subtype = strchr(fstype, '.'); - if (subtype) { - subtype++; - err = -EINVAL; - if (!subtype[0]) - goto err; - } else - subtype = ""; - - mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); - err = -ENOMEM; - if (!mnt->mnt_sb->s_subtype) - goto err; - return mnt; - - err: - mntput(mnt); - return ERR_PTR(err); -} - /* * add a mount into a namespace's mount tree */ @@ -2557,7 +2534,9 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, { struct file_system_type *type; struct vfsmount *mnt; - int err; + struct fs_context *fc; + const char *subtype = NULL; + int err = 0; if (!fstype) return -EINVAL; @@ -2566,23 +2545,59 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, if (!type) return -ENODEV; - mnt = vfs_kern_mount(type, sb_flags, name, data); - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && - !mnt->mnt_sb->s_subtype) - mnt = fs_set_subtype(mnt, fstype); + if (type->fs_flags & FS_HAS_SUBTYPE) { + subtype = strchr(fstype, '.'); + if (subtype) { + subtype++; + if (!*subtype) { + put_filesystem(type); + return -EINVAL; + } + } else { + subtype = ""; + } + } + fc = fs_context_for_mount(type, sb_flags); put_filesystem(type); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + if (subtype) { + fc->subtype = kstrdup(subtype, GFP_KERNEL); + if (!fc->subtype) + err = -ENOMEM; + } + if (!err && name) { + fc->source = kstrdup(name, GFP_KERNEL); + if (!fc->source) + err = -ENOMEM; + } + if (!err) + err = parse_monolithic_mount_data(fc, data); + if (!err) + err = vfs_get_tree(fc); + if (err) + goto out; + + up_write(&fc->root->d_sb->s_umount); + mnt = vfs_create_mount(fc); + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out; + } if (mount_too_revealing(mnt, &mnt_flags)) { mntput(mnt); - return -EPERM; + err = -EPERM; + goto out; } err = do_add_mount(real_mount(mnt), path, mnt_flags); if (err) mntput(mnt); +out: + put_fs_context(fc); return err; } diff --git a/fs/super.c b/fs/super.c index fc3887277ad1..b91b6df05b67 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1262,6 +1262,11 @@ int vfs_get_tree(struct fs_context *fc) sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); + if (fc->subtype && !sb->s_subtype) { + sb->s_subtype = fc->subtype; + fc->subtype = NULL; + } + /* * Write barrier is for super_cache_count(). We place it before setting * SB_BORN as the data dependency between the two functions is the diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..36fff12ab890 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1447,7 +1447,7 @@ struct super_block { * Filesystem subtype. If non-empty the filesystem type field * in /proc/mounts will be "type.subtype" */ - char *s_subtype; + const char *s_subtype; const struct dentry_operations *s_d_op; /* default d_op for dentries */ -- cgit From 8d0347f6c3a9d4953ddd636a31c6584da082e084 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 4 Nov 2018 09:28:36 -0500 Subject: convert do_remount_sb() to fs_context Replace do_remount_sb() with a function, reconfigure_super(), that's fs_context aware. The fs_context is expected to be parameterised already and have ->root pointing to the superblock to be reconfigured. A legacy wrapper is provided that is intended to be called from the fs_context ops when those appear, but for now is called directly from reconfigure_super(). This wrapper invokes the ->remount_fs() superblock op for the moment. It is intended that the remount_fs() op will be phased out. The fs_context->purpose is set to FS_CONTEXT_FOR_RECONFIGURE to indicate that the context is being used for reconfiguration. do_umount_root() is provided to consolidate remount-to-R/O for umount and emergency remount by creating a context and invoking reconfiguration. do_remount(), do_umount() and do_emergency_remount_callback() are switched to use the new process. [AV -- fold UMOUNT and EMERGENCY_REMOUNT in; fixes the umount / bug, gets rid of pointless complexity] [AV -- set ->net_ns in all cases; nfs remount will need that] [AV -- shift security_sb_remount() call into reconfigure_super(); the callers that didn't do security_sb_remount() have NULL fc->security anyway, so it's a no-op for them] Signed-off-by: David Howells Co-developed-by: Al Viro Signed-off-by: Al Viro --- fs/fs_context.c | 35 ++++++++++++++- fs/internal.h | 3 +- fs/namespace.c | 61 ++++++++++++++++---------- fs/super.c | 107 +++++++++++++++++++++++++++++++-------------- include/linux/fs.h | 1 + include/linux/fs_context.h | 4 ++ 6 files changed, 152 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index 857cd46a687b..5e2c3aba1dd8 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -69,6 +69,13 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, case FS_CONTEXT_FOR_MOUNT: fc->user_ns = get_user_ns(fc->cred->user_ns); break; + case FS_CONTEXT_FOR_RECONFIGURE: + /* We don't pin any namespaces as the superblock's + * subscriptions cannot be changed at this point. + */ + atomic_inc(&reference->d_sb->s_active); + fc->root = dget(reference); + break; } ret = legacy_init_fs_context(fc); @@ -90,6 +97,15 @@ struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, } EXPORT_SYMBOL(fs_context_for_mount); +struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, + unsigned int sb_flags, + unsigned int sb_flags_mask) +{ + return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags, + sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE); +} +EXPORT_SYMBOL(fs_context_for_reconfigure); + void fc_drop_locked(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; @@ -99,6 +115,7 @@ void fc_drop_locked(struct fs_context *fc) } static void legacy_fs_context_free(struct fs_context *fc); + /** * put_fs_context - Dispose of a superblock configuration context. * @fc: The context to dispose of. @@ -118,8 +135,7 @@ void put_fs_context(struct fs_context *fc) legacy_fs_context_free(fc); security_free_mnt_opts(&fc->security); - if (fc->net_ns) - put_net(fc->net_ns); + put_net(fc->net_ns); put_user_ns(fc->user_ns); put_cred(fc->cred); kfree(fc->subtype); @@ -172,6 +188,21 @@ int legacy_get_tree(struct fs_context *fc) return 0; } +/* + * Handle remount. + */ +int legacy_reconfigure(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; + + if (!sb->s_op->remount_fs) + return 0; + + return sb->s_op->remount_fs(sb, &fc->sb_flags, + ctx ? ctx->legacy_data : NULL); +} + /* * Initialise a legacy context for a filesystem that doesn't support * fs_context. diff --git a/fs/internal.h b/fs/internal.h index 6af26d897034..016a5b8dd305 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -56,6 +56,7 @@ extern void __init chrdev_init(void); * fs_context.c */ extern int legacy_get_tree(struct fs_context *fc); +extern int legacy_reconfigure(struct fs_context *fc); extern int parse_monolithic_mount_data(struct fs_context *, void *); extern void fc_drop_locked(struct fs_context *); @@ -107,7 +108,7 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); /* * super.c */ -extern int do_remount_sb(struct super_block *, int, void *, int); +extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); extern struct super_block *user_get_super(dev_t); diff --git a/fs/namespace.c b/fs/namespace.c index 750500c6c33d..931228d8518a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1489,6 +1489,29 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) static void shrink_submounts(struct mount *mnt); +static int do_umount_root(struct super_block *sb) +{ + int ret = 0; + + down_write(&sb->s_umount); + if (!sb_rdonly(sb)) { + struct fs_context *fc; + + fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY, + SB_RDONLY); + if (IS_ERR(fc)) { + ret = PTR_ERR(fc); + } else { + ret = parse_monolithic_mount_data(fc, NULL); + if (!ret) + ret = reconfigure_super(fc); + put_fs_context(fc); + } + } + up_write(&sb->s_umount); + return ret; +} + static int do_umount(struct mount *mnt, int flags) { struct super_block *sb = mnt->mnt.mnt_sb; @@ -1554,11 +1577,7 @@ static int do_umount(struct mount *mnt, int flags) */ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; - down_write(&sb->s_umount); - if (!sb_rdonly(sb)) - retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); - up_write(&sb->s_umount); - return retval; + return do_umount_root(sb); } namespace_lock(); @@ -2367,7 +2386,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, int err; struct super_block *sb = path->mnt->mnt_sb; struct mount *mnt = real_mount(path->mnt); - void *sec_opts = NULL; + struct fs_context *fc; if (!check_mnt(mnt)) return -EINVAL; @@ -2378,24 +2397,22 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, if (!can_change_locked_flags(mnt, mnt_flags)) return -EPERM; - if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) { - err = security_sb_eat_lsm_opts(data, &sec_opts); - if (err) - return err; - } - err = security_sb_remount(sb, sec_opts); - security_free_mnt_opts(&sec_opts); - if (err) - return err; + fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK); + if (IS_ERR(fc)) + return PTR_ERR(fc); - down_write(&sb->s_umount); - err = -EPERM; - if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { - err = do_remount_sb(sb, sb_flags, data, 0); - if (!err) - set_mount_attributes(mnt, mnt_flags); + err = parse_monolithic_mount_data(fc, data); + if (!err) { + down_write(&sb->s_umount); + err = -EPERM; + if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { + err = reconfigure_super(fc); + if (!err) + set_mount_attributes(mnt, mnt_flags); + } + up_write(&sb->s_umount); } - up_write(&sb->s_umount); + put_fs_context(fc); return err; } diff --git a/fs/super.c b/fs/super.c index 11e2a6cb3baf..50553233dd15 100644 --- a/fs/super.c +++ b/fs/super.c @@ -836,28 +836,35 @@ rescan: } /** - * do_remount_sb - asks filesystem to change mount options. - * @sb: superblock in question - * @sb_flags: revised superblock flags - * @data: the rest of options - * @force: whether or not to force the change + * reconfigure_super - asks filesystem to change superblock parameters + * @fc: The superblock and configuration * - * Alters the mount options of a mounted file system. + * Alters the configuration parameters of a live superblock. */ -int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) +int reconfigure_super(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; int retval; - int remount_ro; + bool remount_ro = false; + bool force = fc->sb_flags & SB_FORCE; + if (fc->sb_flags_mask & ~MS_RMT_MASK) + return -EINVAL; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; + retval = security_sb_remount(sb, fc->security); + if (retval) + return retval; + + if (fc->sb_flags_mask & SB_RDONLY) { #ifdef CONFIG_BLOCK - if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) - return -EACCES; + if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) + return -EACCES; #endif - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); + } if (remount_ro) { if (!hlist_empty(&sb->s_pins)) { @@ -868,13 +875,14 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) return 0; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = !sb_rdonly(sb); } } shrink_dcache_sb(sb); - /* If we are remounting RDONLY and current sb is read/write, - make sure there are no rw files opened */ + /* If we are reconfiguring to RDONLY and current sb is read/write, + * make sure there are no files open for writing. + */ if (remount_ro) { if (force) { sb->s_readonly_remount = 1; @@ -886,17 +894,17 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) } } - if (sb->s_op->remount_fs) { - retval = sb->s_op->remount_fs(sb, &sb_flags, data); - if (retval) { - if (!force) - goto cancel_readonly; - /* If forced remount, go ahead despite any errors */ - WARN(1, "forced remount of a %s fs returned %i\n", - sb->s_type->name, retval); - } + retval = legacy_reconfigure(fc); + if (retval) { + if (!force) + goto cancel_readonly; + /* If forced remount, go ahead despite any errors */ + WARN(1, "forced remount of a %s fs returned %i\n", + sb->s_type->name, retval); } - sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK); + + WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | + (fc->sb_flags & fc->sb_flags_mask))); /* Needs to be ordered wrt mnt_is_readonly() */ smp_wmb(); sb->s_readonly_remount = 0; @@ -923,10 +931,15 @@ static void do_emergency_remount_callback(struct super_block *sb) down_write(&sb->s_umount); if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && !sb_rdonly(sb)) { - /* - * What lock protects sb->s_flags?? - */ - do_remount_sb(sb, SB_RDONLY, NULL, 1); + struct fs_context *fc; + + fc = fs_context_for_reconfigure(sb->s_root, + SB_RDONLY | SB_FORCE, SB_RDONLY); + if (!IS_ERR(fc)) { + if (parse_monolithic_mount_data(fc, NULL) == 0) + (void)reconfigure_super(fc); + put_fs_context(fc); + } } up_write(&sb->s_umount); } @@ -1213,6 +1226,31 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_nodev); +static int reconfigure_single(struct super_block *s, + int flags, void *data) +{ + struct fs_context *fc; + int ret; + + /* The caller really need to be passing fc down into mount_single(), + * then a chunk of this can be removed. [Bollocks -- AV] + * Better yet, reconfiguration shouldn't happen, but rather the second + * mount should be rejected if the parameters are not compatible. + */ + fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + ret = parse_monolithic_mount_data(fc, data); + if (ret < 0) + goto out; + + ret = reconfigure_super(fc); +out: + put_fs_context(fc); + return ret; +} + static int compare_single(struct super_block *s, void *p) { return 1; @@ -1230,13 +1268,14 @@ struct dentry *mount_single(struct file_system_type *fs_type, return ERR_CAST(s); if (!s->s_root) { error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - s->s_flags |= SB_ACTIVE; + if (!error) + s->s_flags |= SB_ACTIVE; } else { - do_remount_sb(s, flags, data, 0); + error = reconfigure_single(s, flags, data); + } + if (unlikely(error)) { + deactivate_locked_super(s); + return ERR_PTR(error); } return dget(s->s_root); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 36fff12ab890..c65d02c5c512 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1337,6 +1337,7 @@ extern int send_sigurg(struct fown_struct *fown); /* These sb flags are internal to the kernel */ #define SB_SUBMOUNT (1<<26) +#define SB_FORCE (1<<27) #define SB_NOSEC (1<<28) #define SB_BORN (1<<29) #define SB_ACTIVE (1<<30) diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 9805514444c9..98772f882a3e 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -25,6 +25,7 @@ struct user_namespace; enum fs_context_purpose { FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ + FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */ }; /* @@ -57,6 +58,9 @@ struct fs_context { */ extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, unsigned int sb_flags); +extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, + unsigned int sb_flags, + unsigned int sb_flags_mask); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit From e1a91586d5da6f879b6dd385a2e7227bf1653570 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Dec 2018 16:25:31 -0500 Subject: fs_context flavour for submounts This is an eventual replacement for vfs_submount() uses. Unlike the "mount" and "remount" cases, the users of that thing are not in VFS - they are buried in various ->d_automount() instances and rather than converting them all at once we introduce the (thankfully small and simple) infrastructure here and deal with the prospective users in afs, nfs, etc. parts of the series. Here we just introduce a new constructor (fs_context_for_submount()) along with the corresponding enum constant to be put into fc->purpose for those. Signed-off-by: Al Viro --- fs/fs_context.c | 10 ++++++++++ include/linux/fs_context.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index 5e2c3aba1dd8..2bd652b6e848 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -69,6 +69,9 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, case FS_CONTEXT_FOR_MOUNT: fc->user_ns = get_user_ns(fc->cred->user_ns); break; + case FS_CONTEXT_FOR_SUBMOUNT: + fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); + break; case FS_CONTEXT_FOR_RECONFIGURE: /* We don't pin any namespaces as the superblock's * subscriptions cannot be changed at this point. @@ -106,6 +109,13 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, } EXPORT_SYMBOL(fs_context_for_reconfigure); +struct fs_context *fs_context_for_submount(struct file_system_type *type, + struct dentry *reference) +{ + return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT); +} +EXPORT_SYMBOL(fs_context_for_submount); + void fc_drop_locked(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 98772f882a3e..7feb018c7a9e 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -25,6 +25,7 @@ struct user_namespace; enum fs_context_purpose { FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ + FS_CONTEXT_FOR_SUBMOUNT, /* New superblock for automatic submount */ FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */ }; @@ -61,6 +62,8 @@ extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, unsigned int sb_flags, unsigned int sb_flags_mask); +extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type, + struct dentry *reference); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit From f3a09c92018a91ad0981146a4ac59414f814d801 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Dec 2018 18:55:56 -0500 Subject: introduce fs_context methods Signed-off-by: Al Viro --- fs/fs_context.c | 28 ++++++++++++++++++++++------ fs/internal.h | 2 -- fs/super.c | 36 ++++++++++++++++++++++++++++-------- include/linux/fs.h | 2 ++ include/linux/fs_context.h | 13 +++++++++++++ 5 files changed, 65 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index 2bd652b6e848..825d1b2c8807 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -51,6 +51,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, unsigned int sb_flags_mask, enum fs_context_purpose purpose) { + int (*init_fs_context)(struct fs_context *); struct fs_context *fc; int ret = -ENOMEM; @@ -81,7 +82,12 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, break; } - ret = legacy_init_fs_context(fc); + /* TODO: Make all filesystems support this unconditionally */ + init_fs_context = fc->fs_type->init_fs_context; + if (!init_fs_context) + init_fs_context = legacy_init_fs_context; + + ret = init_fs_context(fc); if (ret < 0) goto err_fc; fc->need_free = true; @@ -141,8 +147,8 @@ void put_fs_context(struct fs_context *fc) deactivate_super(sb); } - if (fc->need_free) - legacy_fs_context_free(fc); + if (fc->need_free && fc->ops && fc->ops->free) + fc->ops->free(fc); security_free_mnt_opts(&fc->security); put_net(fc->net_ns); @@ -180,7 +186,7 @@ static int legacy_parse_monolithic(struct fs_context *fc, void *data) /* * Get a mountable root with the legacy mount command. */ -int legacy_get_tree(struct fs_context *fc) +static int legacy_get_tree(struct fs_context *fc) { struct legacy_fs_context *ctx = fc->fs_private; struct super_block *sb; @@ -201,7 +207,7 @@ int legacy_get_tree(struct fs_context *fc) /* * Handle remount. */ -int legacy_reconfigure(struct fs_context *fc) +static int legacy_reconfigure(struct fs_context *fc) { struct legacy_fs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; @@ -213,6 +219,13 @@ int legacy_reconfigure(struct fs_context *fc) ctx ? ctx->legacy_data : NULL); } +const struct fs_context_operations legacy_fs_context_ops = { + .free = legacy_fs_context_free, + .parse_monolithic = legacy_parse_monolithic, + .get_tree = legacy_get_tree, + .reconfigure = legacy_reconfigure, +}; + /* * Initialise a legacy context for a filesystem that doesn't support * fs_context. @@ -222,10 +235,13 @@ static int legacy_init_fs_context(struct fs_context *fc) fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); if (!fc->fs_private) return -ENOMEM; + fc->ops = &legacy_fs_context_ops; return 0; } int parse_monolithic_mount_data(struct fs_context *fc, void *data) { - return legacy_parse_monolithic(fc, data); + int (*monolithic_mount_data)(struct fs_context *, void *); + monolithic_mount_data = fc->ops->parse_monolithic; + return monolithic_mount_data(fc, data); } diff --git a/fs/internal.h b/fs/internal.h index 016a5b8dd305..8f8d07cc433f 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -55,8 +55,6 @@ extern void __init chrdev_init(void); /* * fs_context.c */ -extern int legacy_get_tree(struct fs_context *fc); -extern int legacy_reconfigure(struct fs_context *fc); extern int parse_monolithic_mount_data(struct fs_context *, void *); extern void fc_drop_locked(struct fs_context *); diff --git a/fs/super.c b/fs/super.c index 50553233dd15..76b3181c782d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -894,13 +894,15 @@ int reconfigure_super(struct fs_context *fc) } } - retval = legacy_reconfigure(fc); - if (retval) { - if (!force) - goto cancel_readonly; - /* If forced remount, go ahead despite any errors */ - WARN(1, "forced remount of a %s fs returned %i\n", - sb->s_type->name, retval); + if (fc->ops->reconfigure) { + retval = fc->ops->reconfigure(fc); + if (retval) { + if (!force) + goto cancel_readonly; + /* If forced remount, go ahead despite any errors */ + WARN(1, "forced remount of a %s fs returned %i\n", + sb->s_type->name, retval); + } } WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | @@ -1294,10 +1296,28 @@ int vfs_get_tree(struct fs_context *fc) struct super_block *sb; int error; - error = legacy_get_tree(fc); + if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) + return -ENOENT; + + if (fc->root) + return -EBUSY; + + /* Get the mountable root in fc->root, with a ref on the root and a ref + * on the superblock. + */ + error = fc->ops->get_tree(fc); if (error < 0) return error; + if (!fc->root) { + pr_err("Filesystem %s get_tree() didn't set fc->root\n", + fc->fs_type->name); + /* We don't know what the locking state of the superblock is - + * if there is a superblock. + */ + BUG(); + } + sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); diff --git a/include/linux/fs.h b/include/linux/fs.h index c65d02c5c512..8d578a9e1e8c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -61,6 +61,7 @@ struct workqueue_struct; struct iov_iter; struct fscrypt_info; struct fscrypt_operations; +struct fs_context; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2173,6 +2174,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ + int (*init_fs_context)(struct fs_context *); struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); void (*kill_sb) (struct super_block *); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 7feb018c7a9e..087c12954360 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -20,8 +20,13 @@ struct cred; struct dentry; struct file_operations; struct file_system_type; +struct mnt_namespace; struct net; +struct pid_namespace; +struct super_block; struct user_namespace; +struct vfsmount; +struct path; enum fs_context_purpose { FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ @@ -39,6 +44,7 @@ enum fs_context_purpose { * See Documentation/filesystems/mounting.txt */ struct fs_context { + const struct fs_context_operations *ops; struct file_system_type *fs_type; void *fs_private; /* The filesystem's context */ struct dentry *root; /* The root and superblock */ @@ -54,6 +60,13 @@ struct fs_context { bool need_free:1; /* Need to call ops->free() */ }; +struct fs_context_operations { + void (*free)(struct fs_context *fc); + int (*parse_monolithic)(struct fs_context *fc, void *data); + int (*get_tree)(struct fs_context *fc); + int (*reconfigure)(struct fs_context *fc); +}; + /* * fs_context manipulation functions. */ -- cgit From c6b82263f9c6e745eb4c5dfc2578d147c4cd7604 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:23 +0000 Subject: vfs: Introduce logging functions Introduce a set of logging functions through which informational messages, warnings and error messages incurred by the mount procedure can be logged and, in a future patch, passed to userspace instead by way of the filesystem configuration context file descriptor. There are four functions: (1) infof(const char *fmt, ...); Logs an informational message. (2) warnf(const char *fmt, ...); Logs a warning message. (3) errorf(const char *fmt, ...); Logs an error message. (4) invalf(const char *fmt, ...); As errof(), but returns -EINVAL so can be used on a return statement. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fs_context.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 087c12954360..d208cc40b868 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -81,4 +81,46 @@ extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_ty extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); +#define logfc(FC, FMT, ...) pr_notice(FMT, ## __VA_ARGS__) + +/** + * infof - Store supplementary informational message + * @fc: The context in which to log the informational message + * @fmt: The format string + * + * Store the supplementary informational message for the process if the process + * has enabled the facility. + */ +#define infof(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); }) + +/** + * warnf - Store supplementary warning message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary warning message for the process if the process has + * enabled the facility. + */ +#define warnf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); }) + +/** + * errorf - Store supplementary error message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary error message for the process if the process has + * enabled the facility. + */ +#define errorf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); }) + +/** + * invalf - Store supplementary invalid argument error message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary error message for the process if the process has + * enabled the facility and return -EINVAL. + */ +#define invalf(fc, fmt, ...) ({ errorf(fc, fmt, ## __VA_ARGS__); -EINVAL; }) + #endif /* _LINUX_FS_CONTEXT_H */ -- cgit From 57d4657716aca81ef4d7ec23e8123d26e3d28954 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 23 Jan 2019 13:35:00 -0500 Subject: audit: ignore fcaps on umount Don't fetch fcaps when umount2 is called to avoid a process hang while it waits for the missing resource to (possibly never) re-appear. Note the comment above user_path_mountpoint_at(): * A umount is a special case for path walking. We're not actually interested * in the inode in this situation, and ESTALE errors can be a problem. We * simply want track down the dentry and vfsmount attached at the mountpoint * and avoid revalidating the last component. This can happen on ceph, cifs, 9p, lustre, fuse (gluster) or NFS. Please see the github issue tracker https://github.com/linux-audit/audit-kernel/issues/100 Signed-off-by: Richard Guy Briggs [PM: merge fuzz in audit_log_fcaps()] Signed-off-by: Paul Moore --- fs/namei.c | 2 +- fs/namespace.c | 2 ++ include/linux/audit.h | 15 ++++++++++----- include/linux/namei.h | 3 +++ kernel/audit.c | 10 +++++++++- kernel/audit.h | 2 +- kernel/auditsc.c | 6 +++--- 7 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 914178cdbe94..87d7710a2e1d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2720,7 +2720,7 @@ filename_mountpoint(int dfd, struct filename *name, struct path *path, if (unlikely(error == -ESTALE)) error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path); if (likely(!error)) - audit_inode(name, path->dentry, 0); + audit_inode(name, path->dentry, flags & LOOKUP_NO_EVAL); restore_nameidata(); putname(name); return error; diff --git a/fs/namespace.c b/fs/namespace.c index a677b59efd74..e5de0e372df2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1640,6 +1640,8 @@ int ksys_umount(char __user *name, int flags) if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; + lookup_flags |= LOOKUP_NO_EVAL; + retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path); if (retval) goto out; diff --git a/include/linux/audit.h b/include/linux/audit.h index ecb5d317d6a2..29251b18331a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -25,6 +25,7 @@ #include #include +#include /* LOOKUP_* */ #include #define AUDIT_INO_UNSET ((unsigned long)-1) @@ -248,6 +249,7 @@ extern void __audit_getname(struct filename *name); #define AUDIT_INODE_PARENT 1 /* dentry represents the parent */ #define AUDIT_INODE_HIDDEN 2 /* audit record should be hidden */ +#define AUDIT_INODE_NOEVAL 4 /* audit record incomplete */ extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); @@ -308,12 +310,15 @@ static inline void audit_getname(struct filename *name) } static inline void audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int parent) { + unsigned int flags) { if (unlikely(!audit_dummy_context())) { - unsigned int flags = 0; - if (parent) - flags |= AUDIT_INODE_PARENT; - __audit_inode(name, dentry, flags); + unsigned int aflags = 0; + + if (flags & LOOKUP_PARENT) + aflags |= AUDIT_INODE_PARENT; + if (flags & LOOKUP_NO_EVAL) + aflags |= AUDIT_INODE_NOEVAL; + __audit_inode(name, dentry, aflags); } } static inline void audit_file(struct file *file) diff --git a/include/linux/namei.h b/include/linux/namei.h index a78606e8e3df..9138b4471dbf 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -24,6 +24,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; * - internal "there are more path components" flag * - dentry cache is untrusted; force a real lookup * - suppress terminal automount + * - skip revalidation + * - don't fetch xattrs on audit_inode */ #define LOOKUP_FOLLOW 0x0001 #define LOOKUP_DIRECTORY 0x0002 @@ -33,6 +35,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_REVAL 0x0020 #define LOOKUP_RCU 0x0040 #define LOOKUP_NO_REVAL 0x0080 +#define LOOKUP_NO_EVAL 0x0100 /* * Intent data diff --git a/kernel/audit.c b/kernel/audit.c index 3f3f1888cac7..b7177a8def2e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2082,6 +2082,10 @@ void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) { + if (name->fcap_ver == -1) { + audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?"); + return; + } audit_log_cap(ab, "cap_fp", &name->fcap.permitted); audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", @@ -2114,7 +2118,7 @@ static inline int audit_copy_fcaps(struct audit_names *name, /* Copy inode data into an audit_names. */ void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, - struct inode *inode) + struct inode *inode, unsigned int flags) { name->ino = inode->i_ino; name->dev = inode->i_sb->s_dev; @@ -2123,6 +2127,10 @@ void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, name->gid = inode->i_gid; name->rdev = inode->i_rdev; security_inode_getsecid(inode, &name->osid); + if (flags & AUDIT_INODE_NOEVAL) { + name->fcap_ver = -1; + return; + } audit_copy_fcaps(name, dentry); } diff --git a/kernel/audit.h b/kernel/audit.h index 9acb8691ed87..002f0f7ba732 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -215,7 +215,7 @@ extern void audit_log_session_info(struct audit_buffer *ab); extern void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, - struct inode *inode); + struct inode *inode, unsigned int flags); extern void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap); extern void audit_log_name(struct audit_context *context, diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a2696ce790f9..68da71001096 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1856,7 +1856,7 @@ out: n->type = AUDIT_TYPE_NORMAL; } handle_path(dentry); - audit_copy_inode(n, dentry, inode); + audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL); } void __audit_file(const struct file *file) @@ -1955,7 +1955,7 @@ void __audit_inode_child(struct inode *parent, n = audit_alloc_name(context, AUDIT_TYPE_PARENT); if (!n) return; - audit_copy_inode(n, NULL, parent); + audit_copy_inode(n, NULL, parent, 0); } if (!found_child) { @@ -1974,7 +1974,7 @@ void __audit_inode_child(struct inode *parent, } if (inode) - audit_copy_inode(found_child, dentry, inode); + audit_copy_inode(found_child, dentry, inode, 0); else found_child->ino = AUDIT_INO_UNSET; } -- cgit From a08c2a5a31941131c41feaa0429e4c8854cf48f2 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Wed, 23 Jan 2019 08:50:14 +0100 Subject: PM-runtime: Replace jiffies-based accounting with ktime-based accounting Replace jiffies-based accounting for runtime_active_time and runtime_suspended_time with ktime-based accounting. This makes the runtime debug counters inline with genpd and other PM subsytems which use ktime-based accounting. Timekeeping is initialized before driver_init(). It's only at that time that PM-runtime can be enabled. Signed-off-by: Thara Gopinath [switch from ktime to raw nsec] Signed-off-by: Vincent Guittot Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 17 +++++++++-------- drivers/base/power/sysfs.c | 11 ++++++++--- include/linux/pm.h | 6 +++--- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index f23b7ecfce3b..eb1a3b878e1e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -66,8 +66,8 @@ static int rpm_suspend(struct device *dev, int rpmflags); */ void update_pm_runtime_accounting(struct device *dev) { - unsigned long now = jiffies; - unsigned long delta; + u64 now = ktime_to_ns(ktime_get()); + u64 delta; delta = now - dev->power.accounting_timestamp; @@ -77,9 +77,9 @@ void update_pm_runtime_accounting(struct device *dev) return; if (dev->power.runtime_status == RPM_SUSPENDED) - dev->power.suspended_jiffies += delta; + dev->power.suspended_time += delta; else - dev->power.active_jiffies += delta; + dev->power.active_time += delta; } static void __update_runtime_status(struct device *dev, enum rpm_status status) @@ -90,16 +90,17 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status) u64 pm_runtime_suspended_time(struct device *dev) { - unsigned long flags, time; + u64 time; + unsigned long flags; spin_lock_irqsave(&dev->power.lock, flags); update_pm_runtime_accounting(dev); - time = dev->power.suspended_jiffies; + time = dev->power.suspended_time; spin_unlock_irqrestore(&dev->power.lock, flags); - return jiffies_to_nsecs(time); + return time; } EXPORT_SYMBOL_GPL(pm_runtime_suspended_time); @@ -1314,7 +1315,7 @@ void pm_runtime_enable(struct device *dev) /* About to enable runtime pm, set accounting_timestamp to now */ if (!dev->power.disable_depth) - dev->power.accounting_timestamp = jiffies; + dev->power.accounting_timestamp = ktime_to_ns(ktime_get()); } else { dev_warn(dev, "Unbalanced %s!\n", __func__); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index d713738ce796..96c8a227610a 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -125,9 +125,12 @@ static ssize_t runtime_active_time_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; + u64 tmp; spin_lock_irq(&dev->power.lock); update_pm_runtime_accounting(dev); - ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies)); + tmp = dev->power.active_time; + do_div(tmp, NSEC_PER_MSEC); + ret = sprintf(buf, "%llu\n", tmp); spin_unlock_irq(&dev->power.lock); return ret; } @@ -138,10 +141,12 @@ static ssize_t runtime_suspended_time_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; + u64 tmp; spin_lock_irq(&dev->power.lock); update_pm_runtime_accounting(dev); - ret = sprintf(buf, "%i\n", - jiffies_to_msecs(dev->power.suspended_jiffies)); + tmp = dev->power.suspended_time; + do_div(tmp, NSEC_PER_MSEC); + ret = sprintf(buf, "%llu\n", tmp); spin_unlock_irq(&dev->power.lock); return ret; } diff --git a/include/linux/pm.h b/include/linux/pm.h index 0bd9de116826..3d2cbf947768 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -633,9 +633,9 @@ struct dev_pm_info { int runtime_error; int autosuspend_delay; u64 last_busy; - unsigned long active_jiffies; - unsigned long suspended_jiffies; - unsigned long accounting_timestamp; + u64 active_time; + u64 suspended_time; + u64 accounting_timestamp; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ void (*set_latency_tolerance)(struct device *, s32); -- cgit From 8204e0c1113d6b7f599bcd7ebfbfde72e76c102f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 22 Jan 2019 10:39:26 -0800 Subject: workqueue: Provide queue_work_node to queue work near a given NUMA node Provide a new function, queue_work_node, which is meant to schedule work on a "random" CPU of the requested NUMA node. The main motivation for this is to help assist asynchronous init to better improve boot times for devices that are local to a specific node. For now we just default to the first CPU that is in the intersection of the cpumask of the node and the online cpumask. The only exception is if the CPU is local to the node we will just use the current CPU. This should work for our purposes as we are currently only using this for unbound work so the CPU will be translated to a node anyway instead of being directly used. As we are only using the first CPU to represent the NUMA node for now I am limiting the scope of the function so that it can only be used with unbound workqueues. Acked-by: Tejun Heo Reviewed-by: Bart Van Assche Acked-by: Dan Williams Signed-off-by: Alexander Duyck Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 2 ++ kernel/workqueue.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 60d673e15632..1f50c1e586e7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -463,6 +463,8 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); +extern bool queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work); extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 392be4b252f6..d5a26e456f7a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1492,6 +1492,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL(queue_work_on); +/** + * workqueue_select_cpu_near - Select a CPU based on NUMA node + * @node: NUMA node ID that we want to select a CPU from + * + * This function will attempt to find a "random" cpu available on a given + * node. If there are no CPUs available on the given node it will return + * WORK_CPU_UNBOUND indicating that we should just schedule to any + * available CPU if we need to schedule this work. + */ +static int workqueue_select_cpu_near(int node) +{ + int cpu; + + /* No point in doing this if NUMA isn't enabled for workqueues */ + if (!wq_numa_enabled) + return WORK_CPU_UNBOUND; + + /* Delay binding to CPU if node is not valid or online */ + if (node < 0 || node >= MAX_NUMNODES || !node_online(node)) + return WORK_CPU_UNBOUND; + + /* Use local node/cpu if we are already there */ + cpu = raw_smp_processor_id(); + if (node == cpu_to_node(cpu)) + return cpu; + + /* Use "random" otherwise know as "first" online CPU of node */ + cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); + + /* If CPU is valid return that, otherwise just defer */ + return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND; +} + +/** + * queue_work_node - queue work on a "random" cpu for a given NUMA node + * @node: NUMA node that we are targeting the work for + * @wq: workqueue to use + * @work: work to queue + * + * We queue the work to a "random" CPU within a given NUMA node. The basic + * idea here is to provide a way to somehow associate work with a given + * NUMA node. + * + * This function will only make a best effort attempt at getting this onto + * the right NUMA node. If no node is requested or the requested node is + * offline then we just fall back to standard queue_work behavior. + * + * Currently the "random" CPU ends up being the first available CPU in the + * intersection of cpu_online_mask and the cpumask of the node, unless we + * are running on the node. In that case we just use the current CPU. + * + * Return: %false if @work was already on a queue, %true otherwise. + */ +bool queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work) +{ + unsigned long flags; + bool ret = false; + + /* + * This current implementation is specific to unbound workqueues. + * Specifically we only return the first available CPU for a given + * node instead of cycling through individual CPUs within the node. + * + * If this is used with a per-cpu workqueue then the logic in + * workqueue_select_cpu_near would need to be updated to allow for + * some round robin type logic. + */ + WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); + + local_irq_save(flags); + + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + int cpu = workqueue_select_cpu_near(node); + + __queue_work(cpu, wq, work); + ret = true; + } + + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(queue_work_node); + void delayed_work_timer_fn(struct timer_list *t) { struct delayed_work *dwork = from_timer(dwork, t, timer); -- cgit From 6be9238e5cb64741ff95c3ae440b112753ad93de Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 22 Jan 2019 10:39:31 -0800 Subject: async: Add support for queueing on specific NUMA node Introduce four new variants of the async_schedule_ functions that allow scheduling on a specific NUMA node. The first two functions are async_schedule_near and async_schedule_near_domain end up mapping to async_schedule and async_schedule_domain, but provide NUMA node specific functionality. They replace the original functions which were moved to inline function definitions that call the new functions while passing NUMA_NO_NODE. The second two functions are async_schedule_dev and async_schedule_dev_domain which provide NUMA specific functionality when passing a device as the data member and that device has a NUMA node other than NUMA_NO_NODE. The main motivation behind this is to address the need to be able to schedule device specific init work on specific NUMA nodes in order to improve performance of memory initialization. I have seen a significant improvement in initialziation time for persistent memory as a result of this approach. In the case of 3TB of memory on a single node the initialization time in the worst case went from 36s down to about 26s for a 10s improvement. As such the data shows a general benefit for affinitizing the async work to the node local to the device. Reviewed-by: Bart Van Assche Reviewed-by: Dan Williams Signed-off-by: Alexander Duyck Signed-off-by: Greg Kroah-Hartman --- include/linux/async.h | 82 +++++++++++++++++++++++++++++++++++++++++++++++++-- kernel/async.c | 53 ++++++++++++++++++--------------- 2 files changed, 108 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async.h b/include/linux/async.h index 6b0226bdaadc..f81d6dbffe68 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -14,6 +14,8 @@ #include #include +#include +#include typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); @@ -37,9 +39,83 @@ struct async_domain { struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } -extern async_cookie_t async_schedule(async_func_t func, void *data); -extern async_cookie_t async_schedule_domain(async_func_t func, void *data, - struct async_domain *domain); +async_cookie_t async_schedule_node(async_func_t func, void *data, + int node); +async_cookie_t async_schedule_node_domain(async_func_t func, void *data, + int node, + struct async_domain *domain); + +/** + * async_schedule - schedule a function for asynchronous execution + * @func: function to execute asynchronously + * @data: data pointer to pass to the function + * + * Returns an async_cookie_t that may be used for checkpointing later. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t async_schedule(async_func_t func, void *data) +{ + return async_schedule_node(func, data, NUMA_NO_NODE); +} + +/** + * async_schedule_domain - schedule a function for asynchronous execution within a certain domain + * @func: function to execute asynchronously + * @data: data pointer to pass to the function + * @domain: the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @domain may be used in the async_synchronize_*_domain() functions to + * wait within a certain synchronization domain rather than globally. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t +async_schedule_domain(async_func_t func, void *data, + struct async_domain *domain) +{ + return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain); +} + +/** + * async_schedule_dev - A device specific version of async_schedule + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. By doing this we can try to + * provide for the best possible outcome by operating on the device on the + * CPUs closest to the device. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t +async_schedule_dev(async_func_t func, struct device *dev) +{ + return async_schedule_node(func, dev, dev_to_node(dev)); +} + +/** + * async_schedule_dev_domain - A device specific version of async_schedule_domain + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * @domain: the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. By doing this we can try to + * provide for the best possible outcome by operating on the device on the + * CPUs closest to the device. + * @domain may be used in the async_synchronize_*_domain() functions to + * wait within a certain synchronization domain rather than globally. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t +async_schedule_dev_domain(async_func_t func, struct device *dev, + struct async_domain *domain) +{ + return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); +} + void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); diff --git a/kernel/async.c b/kernel/async.c index a893d6170944..f6bd0d9885e1 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -149,7 +149,25 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } -static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain) +/** + * async_schedule_node_domain - NUMA specific version of async_schedule_domain + * @func: function to execute asynchronously + * @data: data pointer to pass to the function + * @node: NUMA node that we want to schedule this on or close to + * @domain: the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @domain may be used in the async_synchronize_*_domain() functions to + * wait within a certain synchronization domain rather than globally. + * + * Note: This function may be called from atomic or non-atomic contexts. + * + * The node requested will be honored on a best effort basis. If the node + * has no CPUs associated with it then the work is distributed among all + * available CPUs. + */ +async_cookie_t async_schedule_node_domain(async_func_t func, void *data, + int node, struct async_domain *domain) { struct async_entry *entry; unsigned long flags; @@ -195,43 +213,30 @@ static async_cookie_t __async_schedule(async_func_t func, void *data, struct asy current->flags |= PF_USED_ASYNC; /* schedule for execution */ - queue_work(system_unbound_wq, &entry->work); + queue_work_node(node, system_unbound_wq, &entry->work); return newcookie; } +EXPORT_SYMBOL_GPL(async_schedule_node_domain); /** - * async_schedule - schedule a function for asynchronous execution + * async_schedule_node - NUMA specific version of async_schedule * @func: function to execute asynchronously * @data: data pointer to pass to the function + * @node: NUMA node that we want to schedule this on or close to * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. - */ -async_cookie_t async_schedule(async_func_t func, void *data) -{ - return __async_schedule(func, data, &async_dfl_domain); -} -EXPORT_SYMBOL_GPL(async_schedule); - -/** - * async_schedule_domain - schedule a function for asynchronous execution within a certain domain - * @func: function to execute asynchronously - * @data: data pointer to pass to the function - * @domain: the domain * - * Returns an async_cookie_t that may be used for checkpointing later. - * @domain may be used in the async_synchronize_*_domain() functions to - * wait within a certain synchronization domain rather than globally. A - * synchronization domain is specified via @domain. Note: This function - * may be called from atomic or non-atomic contexts. + * The node requested will be honored on a best effort basis. If the node + * has no CPUs associated with it then the work is distributed among all + * available CPUs. */ -async_cookie_t async_schedule_domain(async_func_t func, void *data, - struct async_domain *domain) +async_cookie_t async_schedule_node(async_func_t func, void *data, int node) { - return __async_schedule(func, data, domain); + return async_schedule_node_domain(func, data, node, &async_dfl_domain); } -EXPORT_SYMBOL_GPL(async_schedule_domain); +EXPORT_SYMBOL_GPL(async_schedule_node); /** * async_synchronize_full - synchronize all asynchronous function calls -- cgit From 51bee5abeab2058ea5813c5615d6197a23dbf041 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 28 Jan 2019 17:00:13 +0100 Subject: cgroup/pids: turn cgroup_subsys->free() into cgroup_subsys->release() to fix the accounting The only user of cgroup_subsys->free() callback is pids_cgrp_subsys which needs pids_free() to uncharge the pid. However, ->free() is called from __put_task_struct()->cgroup_free() and this is too late. Even the trivial program which does for (;;) { int pid = fork(); assert(pid >= 0); if (pid) wait(NULL); else exit(0); } can run out of limits because release_task()->call_rcu(delayed_put_task_struct) implies an RCU gp after the task/pid goes away and before the final put(). Test-case: mkdir -p /tmp/CG mount -t cgroup2 none /tmp/CG echo '+pids' > /tmp/CG/cgroup.subtree_control mkdir /tmp/CG/PID echo 2 > /tmp/CG/PID/pids.max perl -e 'while ($p = fork) { wait; } $p // die "fork failed: $!\n"' & echo $! > /tmp/CG/PID/cgroup.procs Without this patch the forking process fails soon after migration. Rename cgroup_subsys->free() to cgroup_subsys->release() and move the callsite into the new helper, cgroup_release(), called by release_task() which actually frees the pid(s). Reported-by: Herton R. Krzesinski Reported-by: Jan Stancek Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- include/linux/cgroup.h | 2 ++ kernel/cgroup/cgroup.c | 15 +++++++++------ kernel/cgroup/pids.c | 4 ++-- kernel/exit.c | 1 + 5 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8fcbae1b8db0..120d1d40704b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -602,7 +602,7 @@ struct cgroup_subsys { void (*cancel_fork)(struct task_struct *task); void (*fork)(struct task_struct *task); void (*exit)(struct task_struct *task); - void (*free)(struct task_struct *task); + void (*release)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); bool early_init:1; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9968332cceed..81f58b4a5418 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -121,6 +121,7 @@ extern int cgroup_can_fork(struct task_struct *p); extern void cgroup_cancel_fork(struct task_struct *p); extern void cgroup_post_fork(struct task_struct *p); void cgroup_exit(struct task_struct *p); +void cgroup_release(struct task_struct *p); void cgroup_free(struct task_struct *p); int cgroup_init_early(void); @@ -697,6 +698,7 @@ static inline int cgroup_can_fork(struct task_struct *p) { return 0; } static inline void cgroup_cancel_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p) {} +static inline void cgroup_release(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f31bd61c9466..f4418371c83b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1; */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; -static u16 have_free_callback __read_mostly; +static u16 have_release_callback __read_mostly; static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ @@ -5313,7 +5313,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) have_fork_callback |= (bool)ss->fork << ss->id; have_exit_callback |= (bool)ss->exit << ss->id; - have_free_callback |= (bool)ss->free << ss->id; + have_release_callback |= (bool)ss->release << ss->id; have_canfork_callback |= (bool)ss->can_fork << ss->id; /* At system boot, before all subsystems have been @@ -5749,16 +5749,19 @@ void cgroup_exit(struct task_struct *tsk) } while_each_subsys_mask(); } -void cgroup_free(struct task_struct *task) +void cgroup_release(struct task_struct *task) { - struct css_set *cset = task_css_set(task); struct cgroup_subsys *ss; int ssid; - do_each_subsys_mask(ss, ssid, have_free_callback) { - ss->free(task); + do_each_subsys_mask(ss, ssid, have_release_callback) { + ss->release(task); } while_each_subsys_mask(); +} +void cgroup_free(struct task_struct *task) +{ + struct css_set *cset = task_css_set(task); put_css_set(cset); } diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 9829c67ebc0a..c9960baaa14f 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task) pids_uncharge(pids, 1); } -static void pids_free(struct task_struct *task) +static void pids_release(struct task_struct *task) { struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id)); @@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = { .cancel_attach = pids_cancel_attach, .can_fork = pids_can_fork, .cancel_fork = pids_cancel_fork, - .free = pids_free, + .release = pids_release, .legacy_cftypes = pids_files, .dfl_cftypes = pids_files, .threaded = true, diff --git a/kernel/exit.c b/kernel/exit.c index 3fb7be001964..c2b8443f30b4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -219,6 +219,7 @@ repeat: } write_unlock_irq(&tasklist_lock); + cgroup_release(p); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); -- cgit From 90462a5bd30c6ed91c6758e59537d047d7878ff9 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 31 Jan 2019 11:52:11 -0500 Subject: audit: remove unused actx param from audit_rule_match The audit_rule_match() struct audit_context *actx parameter is not used by any in-tree consumers (selinux, apparmour, integrity, smack). The audit context is an internal audit structure that should only be accessed by audit accessor functions. It was part of commit 03d37d25e0f9 ("LSM/Audit: Introduce generic Audit LSM hooks") but appears to have never been used. Remove it. Please see the github issue https://github.com/linux-audit/audit-kernel/issues/107 Signed-off-by: Richard Guy Briggs [PM: fixed the referenced commit title] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 4 +--- include/linux/security.h | 5 ++--- kernel/auditfilter.c | 2 +- kernel/auditsc.c | 21 ++++++++++++--------- security/apparmor/audit.c | 3 +-- security/apparmor/include/audit.h | 3 +-- security/integrity/ima/ima.h | 3 +-- security/integrity/ima/ima_policy.c | 6 ++---- security/security.c | 6 ++---- security/selinux/include/audit.h | 4 +--- security/selinux/ss/services.c | 3 +-- security/smack/smack_lsm.c | 4 +--- 12 files changed, 26 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9a0bdf91e646..d0b5c7a05832 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1344,7 +1344,6 @@ * @field contains the field which relates to current LSM. * @op contains the operator that will be used for matching. * @rule points to the audit rule that will be checked against. - * @actx points to the audit context associated with the check. * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. * * @audit_rule_free: @@ -1764,8 +1763,7 @@ union security_list_options { int (*audit_rule_init)(u32 field, u32 op, char *rulestr, void **lsmrule); int (*audit_rule_known)(struct audit_krule *krule); - int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx); + int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule); void (*audit_rule_free)(void *lsmrule); #endif /* CONFIG_AUDIT */ diff --git a/include/linux/security.h b/include/linux/security.h index dbfb5a66babb..e8febec62ffb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1674,8 +1674,7 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer) #ifdef CONFIG_SECURITY int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule); int security_audit_rule_known(struct audit_krule *krule); -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx); +int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); void security_audit_rule_free(void *lsmrule); #else @@ -1692,7 +1691,7 @@ static inline int security_audit_rule_known(struct audit_krule *krule) } static inline int security_audit_rule_match(u32 secid, u32 field, u32 op, - void *lsmrule, struct audit_context *actx) + void *lsmrule) { return 0; } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 26a80a9d43a9..add360b46b38 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1355,7 +1355,7 @@ int audit_filter(int msgtype, unsigned int listtype) if (f->lsm_rule) { security_task_getsecid(current, &sid); result = security_audit_rule_match(sid, - f->type, f->op, f->lsm_rule, NULL); + f->type, f->op, f->lsm_rule); } break; case AUDIT_EXE: diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 68da71001096..7d37cb1e4aef 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -631,9 +631,8 @@ static int audit_filter_rules(struct task_struct *tsk, need_sid = 0; } result = security_audit_rule_match(sid, f->type, - f->op, - f->lsm_rule, - ctx); + f->op, + f->lsm_rule); } break; case AUDIT_OBJ_USER: @@ -647,13 +646,17 @@ static int audit_filter_rules(struct task_struct *tsk, /* Find files that match */ if (name) { result = security_audit_rule_match( - name->osid, f->type, f->op, - f->lsm_rule, ctx); + name->osid, + f->type, + f->op, + f->lsm_rule); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { - if (security_audit_rule_match(n->osid, f->type, - f->op, f->lsm_rule, - ctx)) { + if (security_audit_rule_match( + n->osid, + f->type, + f->op, + f->lsm_rule)) { ++result; break; } @@ -664,7 +667,7 @@ static int audit_filter_rules(struct task_struct *tsk, break; if (security_audit_rule_match(ctx->ipc.osid, f->type, f->op, - f->lsm_rule, ctx)) + f->lsm_rule)) ++result; } break; diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index eeaddfe0c0fb..5a8b9cded4f2 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -225,8 +225,7 @@ int aa_audit_rule_known(struct audit_krule *rule) return 0; } -int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, - struct audit_context *actx) +int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule) { struct aa_audit_rule *rule = vrule; struct aa_label *label; diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index b8c8b1066b0a..ee559bc2acb8 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -192,7 +192,6 @@ static inline int complain_error(int error) void aa_audit_rule_free(void *vrule); int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule); int aa_audit_rule_known(struct audit_krule *rule); -int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, - struct audit_context *actx); +int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule); #endif /* __AA_AUDIT_H */ diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index cc12f3449a72..026163f37ba1 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -307,8 +307,7 @@ static inline int security_filter_rule_init(u32 field, u32 op, char *rulestr, } static inline int security_filter_rule_match(u32 secid, u32 field, u32 op, - void *lsmrule, - struct audit_context *actx) + void *lsmrule) { return -EINVAL; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 8bc8a1c8cb3f..26fa9d9723f6 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -340,8 +340,7 @@ retry: rc = security_filter_rule_match(osid, rule->lsm[i].type, Audit_equal, - rule->lsm[i].rule, - NULL); + rule->lsm[i].rule); break; case LSM_SUBJ_USER: case LSM_SUBJ_ROLE: @@ -349,8 +348,7 @@ retry: rc = security_filter_rule_match(secid, rule->lsm[i].type, Audit_equal, - rule->lsm[i].rule, - NULL); + rule->lsm[i].rule); default: break; } diff --git a/security/security.c b/security/security.c index f1b8d2587639..5f954b179a8e 100644 --- a/security/security.c +++ b/security/security.c @@ -1783,11 +1783,9 @@ void security_audit_rule_free(void *lsmrule) call_void_hook(audit_rule_free, lsmrule); } -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx) +int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule) { - return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule, - actx); + return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule); } #endif /* CONFIG_AUDIT */ diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 1bdf973433cc..e51a81ffb8c9 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -46,13 +46,11 @@ void selinux_audit_rule_free(void *rule); * @field: the field this rule refers to * @op: the operater the rule uses * @rule: pointer to the audit rule to check against - * @actx: the audit context (can be NULL) associated with the check * * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule, - struct audit_context *actx); +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule); /** * selinux_audit_rule_known - check to see if rule contains selinux fields. diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index dd44126c8d14..0b7e33f6aa59 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -3376,8 +3376,7 @@ int selinux_audit_rule_known(struct audit_krule *rule) return 0; } -int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, - struct audit_context *actx) +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule) { struct selinux_state *state = &selinux_state; struct context *ctxt; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 430d4f35e55c..403513df42fc 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4393,13 +4393,11 @@ static int smack_audit_rule_known(struct audit_krule *krule) * @field: audit rule flags given from user-space * @op: required testing operator * @vrule: smack internal rule presentation - * @actx: audit context associated with the check * * The core Audit hook. It's used to take the decision of * whether to audit or not to audit a given object. */ -static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule, - struct audit_context *actx) +static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule) { struct smack_known *skp; char *rule = vrule; -- cgit From a0ce2f0aa6ad97c3d4927bf2ca54bcebdf062d55 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 23 Jan 2019 15:19:17 +0100 Subject: splice: don't merge into linked buffers Before this patch, it was possible for two pipes to affect each other after data had been transferred between them with tee(): ============ $ cat tee_test.c int main(void) { int pipe_a[2]; if (pipe(pipe_a)) err(1, "pipe"); int pipe_b[2]; if (pipe(pipe_b)) err(1, "pipe"); if (write(pipe_a[1], "abcd", 4) != 4) err(1, "write"); if (tee(pipe_a[0], pipe_b[1], 2, 0) != 2) err(1, "tee"); if (write(pipe_b[1], "xx", 2) != 2) err(1, "write"); char buf[5]; if (read(pipe_a[0], buf, 4) != 4) err(1, "read"); buf[4] = 0; printf("got back: '%s'\n", buf); } $ gcc -o tee_test tee_test.c $ ./tee_test got back: 'abxx' $ ============ As suggested by Al Viro, fix it by creating a separate type for non-mergeable pipe buffers, then changing the types of buffers in splice_pipe_to_pipe() and link_pipe(). Cc: Fixes: 7c77f0b3f920 ("splice: implement pipe to pipe splicing") Fixes: 70524490ee2e ("[PATCH] splice: add support for sys_tee()") Suggested-by: Al Viro Signed-off-by: Jann Horn Signed-off-by: Al Viro --- fs/pipe.c | 14 ++++++++++++++ fs/splice.c | 4 ++++ include/linux/pipe_fs_i.h | 1 + 3 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index bdc5d3c0977d..c51750ed4011 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -234,6 +234,14 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { + .can_merge = 0, + .confirm = generic_pipe_buf_confirm, + .release = anon_pipe_buf_release, + .steal = anon_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + static const struct pipe_buf_operations packet_pipe_buf_ops = { .can_merge = 0, .confirm = generic_pipe_buf_confirm, @@ -242,6 +250,12 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +void pipe_buf_mark_unmergeable(struct pipe_buffer *buf) +{ + if (buf->ops == &anon_pipe_buf_ops) + buf->ops = &anon_pipe_buf_nomerge_ops; +} + static ssize_t pipe_read(struct kiocb *iocb, struct iov_iter *to) { diff --git a/fs/splice.c b/fs/splice.c index de2ede048473..90c29675d573 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1597,6 +1597,8 @@ retry: */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + pipe_buf_mark_unmergeable(obuf); + obuf->len = len; opipe->nrbufs++; ibuf->offset += obuf->len; @@ -1671,6 +1673,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + pipe_buf_mark_unmergeable(obuf); + if (obuf->len > len) obuf->len = len; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5a3bb3b7c9ad..3ecd7ea212ae 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -182,6 +182,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); +void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; -- cgit From 01e7187b41191376cee8bea8de9f907b001e87b4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 23 Jan 2019 15:19:18 +0100 Subject: pipe: stop using ->can_merge Al Viro pointed out that since there is only one pipe buffer type to which new data can be appended, it isn't necessary to have a ->can_merge field in struct pipe_buf_operations, we can just check for a magic type. Suggested-by: Al Viro Signed-off-by: Jann Horn Signed-off-by: Al Viro --- fs/pipe.c | 20 ++++++++++++++++---- fs/splice.c | 4 ---- include/linux/pipe_fs_i.h | 7 ------- kernel/relay.c | 1 - kernel/trace/trace.c | 2 -- net/smc/smc_rx.c | 1 - 6 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index c51750ed4011..0ff09b490ddf 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -226,8 +226,8 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, } EXPORT_SYMBOL(generic_pipe_buf_release); +/* New data written to a pipe may be appended to a buffer with this type. */ static const struct pipe_buf_operations anon_pipe_buf_ops = { - .can_merge = 1, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, @@ -235,7 +235,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { }; static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, @@ -243,19 +242,32 @@ static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { }; static const struct pipe_buf_operations packet_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, .get = generic_pipe_buf_get, }; +/** + * pipe_buf_mark_unmergeable - mark a &struct pipe_buffer as unmergeable + * @buf: the buffer to mark + * + * Description: + * This function ensures that no future writes will be merged into the + * given &struct pipe_buffer. This is necessary when multiple pipe buffers + * share the same backing page. + */ void pipe_buf_mark_unmergeable(struct pipe_buffer *buf) { if (buf->ops == &anon_pipe_buf_ops) buf->ops = &anon_pipe_buf_nomerge_ops; } +static bool pipe_buf_can_merge(struct pipe_buffer *buf) +{ + return buf->ops == &anon_pipe_buf_ops; +} + static ssize_t pipe_read(struct kiocb *iocb, struct iov_iter *to) { @@ -393,7 +405,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) struct pipe_buffer *buf = pipe->bufs + lastbuf; int offset = buf->offset + buf->len; - if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) { + if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) { ret = pipe_buf_confirm(pipe, buf); if (ret) goto out; diff --git a/fs/splice.c b/fs/splice.c index 90c29675d573..fc71e9733f7a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -138,7 +138,6 @@ error: } const struct pipe_buf_operations page_cache_pipe_buf_ops = { - .can_merge = 0, .confirm = page_cache_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = page_cache_pipe_buf_steal, @@ -156,7 +155,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, } static const struct pipe_buf_operations user_page_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, @@ -326,7 +324,6 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, EXPORT_SYMBOL(generic_file_splice_read); const struct pipe_buf_operations default_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -341,7 +338,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_nosteal, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3ecd7ea212ae..787d224ff43e 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -73,13 +73,6 @@ struct pipe_inode_info { * in fs/pipe.c for the pipe and generic variants of these hooks. */ struct pipe_buf_operations { - /* - * This is set to 1, if the generic pipe read/write may coalesce - * data into an existing buffer. If this is set to 0, a new pipe - * page segment is always used for new data. - */ - int can_merge; - /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e06..db3e419c25a6 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1175,7 +1175,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe, } static const struct pipe_buf_operations relay_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = relay_pipe_buf_release, .steal = generic_pipe_buf_steal, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c521b7347482..f380139e972c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5823,7 +5823,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, } static const struct pipe_buf_operations tracing_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -6843,7 +6842,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, .steal = generic_pipe_buf_steal, diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index bbcf0fe4ae10..413a6abf227e 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -136,7 +136,6 @@ static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe, } static const struct pipe_buf_operations smc_pipe_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = smc_rx_pipe_buf_release, .steal = smc_rx_pipe_buf_nosteal, -- cgit From 4bc59c2f7e306775f3d2e1bbafaa854dd1e09335 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:56 +0100 Subject: mfd / platform: cros_ec: Use devm_mfd_add_devices Use devm_mfd_add_devices() for adding cros-ec core MFD child devices. This reduces the need of remove callback from platform/chrome for removing the MFD child devices. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- drivers/mfd/cros_ec.c | 14 +++----------- drivers/platform/chrome/cros_ec_i2c.c | 10 ---------- drivers/platform/chrome/cros_ec_lpc.c | 4 ---- drivers/platform/chrome/cros_ec_spi.c | 11 ----------- include/linux/mfd/cros_ec.h | 10 ---------- 5 files changed, 3 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index fe6f83766144..6acfe036d522 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -129,8 +129,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev) } } - err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell, 1, - NULL, ec_dev->irq, NULL); + err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell, + 1, NULL, ec_dev->irq, NULL); if (err) { dev_err(dev, "Failed to register Embedded Controller subdevice %d\n", @@ -147,7 +147,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev) * - the EC is responsive at init time (it is not true for a * sensor hub. */ - err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, + err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_pd_cell, 1, NULL, ec_dev->irq, NULL); if (err) { dev_err(dev, @@ -181,14 +181,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev) } EXPORT_SYMBOL(cros_ec_register); -int cros_ec_remove(struct cros_ec_device *ec_dev) -{ - mfd_remove_devices(ec_dev->dev); - - return 0; -} -EXPORT_SYMBOL(cros_ec_remove); - #ifdef CONFIG_PM_SLEEP int cros_ec_suspend(struct cros_ec_device *ec_dev) { diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c index ef9b4763356f..9a009eaa4ada 100644 --- a/drivers/platform/chrome/cros_ec_i2c.c +++ b/drivers/platform/chrome/cros_ec_i2c.c @@ -317,15 +317,6 @@ static int cros_ec_i2c_probe(struct i2c_client *client, return 0; } -static int cros_ec_i2c_remove(struct i2c_client *client) -{ - struct cros_ec_device *ec_dev = i2c_get_clientdata(client); - - cros_ec_remove(ec_dev); - - return 0; -} - #ifdef CONFIG_PM_SLEEP static int cros_ec_i2c_suspend(struct device *dev) { @@ -376,7 +367,6 @@ static struct i2c_driver cros_ec_driver = { .pm = &cros_ec_i2c_pm_ops, }, .probe = cros_ec_i2c_probe, - .remove = cros_ec_i2c_remove, .id_table = cros_ec_i2c_id, }; diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c index e1b75775cd4a..14684a56e40f 100644 --- a/drivers/platform/chrome/cros_ec_lpc.c +++ b/drivers/platform/chrome/cros_ec_lpc.c @@ -327,7 +327,6 @@ static int cros_ec_lpc_probe(struct platform_device *pdev) static int cros_ec_lpc_remove(struct platform_device *pdev) { - struct cros_ec_device *ec_dev; struct acpi_device *adev; adev = ACPI_COMPANION(&pdev->dev); @@ -335,9 +334,6 @@ static int cros_ec_lpc_remove(struct platform_device *pdev) acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY, cros_ec_lpc_acpi_notify); - ec_dev = platform_get_drvdata(pdev); - cros_ec_remove(ec_dev); - return 0; } diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c index 2060d1483043..6cfbc2835beb 100644 --- a/drivers/platform/chrome/cros_ec_spi.c +++ b/drivers/platform/chrome/cros_ec_spi.c @@ -685,16 +685,6 @@ static int cros_ec_spi_probe(struct spi_device *spi) return 0; } -static int cros_ec_spi_remove(struct spi_device *spi) -{ - struct cros_ec_device *ec_dev; - - ec_dev = spi_get_drvdata(spi); - cros_ec_remove(ec_dev); - - return 0; -} - #ifdef CONFIG_PM_SLEEP static int cros_ec_spi_suspend(struct device *dev) { @@ -733,7 +723,6 @@ static struct spi_driver cros_ec_driver_spi = { .pm = &cros_ec_spi_pm_ops, }, .probe = cros_ec_spi_probe, - .remove = cros_ec_spi_remove, .id_table = cros_ec_spi_id, }; diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index de8b588c8776..977ebaa78e99 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -281,16 +281,6 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); -/** - * cros_ec_remove() - Remove a ChromeOS EC. - * @ec_dev: Device to register. - * - * Call this to deregister a ChromeOS EC, then clean up any private data. - * - * Return: 0 on success or negative error code. - */ -int cros_ec_remove(struct cros_ec_device *ec_dev); - /** * cros_ec_register() - Register a new ChromeOS EC, using the provided info. * @ec_dev: Device to register. -- cgit From ecf8a6cd949ef236ce435ae488ceb6b3354e677e Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:57 +0100 Subject: mfd / platform: cros_ec: Move lightbar attributes to its own driver The entire way how cros sysfs attibutes are created is broken. cros_ec_lightbar should be its own driver and its attributes should be associated with a lightbar driver not the mfd driver. In order to retain the path, the lightbar attributes are attached to the cros_class. The patch also adds the sysfs documentation. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- .../sysfs-class-chromeos-driver-cros-ec-lightbar | 74 +++++++++++++++++ drivers/mfd/cros_ec_dev.c | 24 +++--- drivers/mfd/cros_ec_dev.h | 6 -- drivers/platform/chrome/Kconfig | 11 +++ drivers/platform/chrome/Makefile | 3 +- drivers/platform/chrome/cros_ec_lightbar.c | 95 +++++++++++++++++----- include/linux/mfd/cros_ec.h | 1 - 7 files changed, 173 insertions(+), 41 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar new file mode 100644 index 000000000000..57a037791403 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar @@ -0,0 +1,74 @@ +What: /sys/class/chromeos//lightbar/brightness +Date: August 2015 +KernelVersion: 4.2 +Description: + Writing to this file adjusts the overall brightness of + the lightbar, separate from any color intensity. The + valid range is 0 (off) to 255 (maximum brightness). + +What: /sys/class/chromeos//lightbar/interval_msec +Date: August 2015 +KernelVersion: 4.2 +Description: + The lightbar is controlled by an embedded controller (EC), + which also manages the keyboard, battery charging, fans, + and other system hardware. To prevent unprivileged users + from interfering with the other EC functions, the rate at + which the lightbar control files can be read or written is + limited. + + Reading this file will return the number of milliseconds + that must elapse between accessing any of the lightbar + functions through this interface. Going faster will simply + block until the necessary interval has lapsed. The interval + applies uniformly to all accesses of any kind by any user. + +What: /sys/class/chromeos//lightbar/led_rgb +Date: August 2015 +KernelVersion: 4.2 +Description: + This allows you to control each LED segment. If the + lightbar is already running one of the automatic + sequences, you probably won’t see anything change because + your color setting will be almost immediately replaced. + To get useful results, you should stop the lightbar + sequence first. + + The values written to this file are sets of four integers, + indicating LED, RED, GREEN, BLUE. The LED number is 0 to 3 + to select a single segment, or 4 to set all four segments + to the same value at once. The RED, GREEN, and BLUE + numbers should be in the range 0 (off) to 255 (maximum). + You can update more than one segment at a time by writing + more than one set of four integers. + +What: /sys/class/chromeos//lightbar/program +Date: August 2015 +KernelVersion: 4.2 +Description: + This allows you to upload and run custom lightbar sequences. + +What: /sys/class/chromeos//lightbar/sequence +Date: August 2015 +KernelVersion: 4.2 +Description: + The Pixel lightbar has a number of built-in sequences + that it displays under various conditions, such as at + power on, shut down, or while running. Reading from this + file displays the current sequence that the lightbar is + displaying. Writing to this file allows you to change the + sequence. + +What: /sys/class/chromeos//lightbar/userspace_control +Date: August 2015 +KernelVersion: 4.2 +Description: + This allows you to take the control of the lightbar. This + prevents the kernel from going through its normal + sequences. + +What: /sys/class/chromeos//lightbar/version +Date: August 2015 +KernelVersion: 4.2 +Description: + Show the information about the lightbar version. diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 2d0fee488c5a..b227718e0ec2 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -36,7 +36,6 @@ static int ec_major; static const struct attribute_group *cros_ec_groups[] = { &cros_ec_attr_group, - &cros_ec_lightbar_attr_group, &cros_ec_vbc_attr_group, NULL, }; @@ -395,6 +394,10 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { { .name = "cros-usbpd-charger" } }; +static const struct mfd_cell cros_ec_platform_cells[] = { + { .name = "cros-ec-lightbar" }, +}; + static int ec_device_probe(struct platform_device *pdev) { int retval = -ENOMEM; @@ -470,9 +473,6 @@ static int ec_device_probe(struct platform_device *pdev) retval); } - /* Take control of the lightbar from the EC. */ - lb_manual_suspend_ctrl(ec, 1); - /* We can now add the sysfs class, we know which parameter to show */ retval = cdev_device_add(&ec->cdev, &ec->class_dev); if (retval) { @@ -480,6 +480,15 @@ static int ec_device_probe(struct platform_device *pdev) goto failed; } + retval = mfd_add_devices(ec->dev, PLATFORM_DEVID_AUTO, + cros_ec_platform_cells, + ARRAY_SIZE(cros_ec_platform_cells), + NULL, 0, NULL); + if (retval) + dev_warn(ec->dev, + "failed to add cros-ec platform devices: %d\n", + retval); + if (cros_ec_debugfs_init(ec)) dev_warn(dev, "failed to create debugfs directory\n"); @@ -494,9 +503,6 @@ static int ec_device_remove(struct platform_device *pdev) { struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); - /* Let the EC take over the lightbar again. */ - lb_manual_suspend_ctrl(ec, 0); - cros_ec_debugfs_remove(ec); mfd_remove_devices(ec->dev); @@ -525,8 +531,6 @@ static __maybe_unused int ec_device_suspend(struct device *dev) cros_ec_debugfs_suspend(ec); - lb_suspend(ec); - return 0; } @@ -536,8 +540,6 @@ static __maybe_unused int ec_device_resume(struct device *dev) cros_ec_debugfs_resume(ec); - lb_resume(ec); - return 0; } diff --git a/drivers/mfd/cros_ec_dev.h b/drivers/mfd/cros_ec_dev.h index 978d836a0248..ec750433455a 100644 --- a/drivers/mfd/cros_ec_dev.h +++ b/drivers/mfd/cros_ec_dev.h @@ -44,10 +44,4 @@ struct cros_ec_readmem { #define CROS_EC_DEV_IOCXCMD _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command) #define CROS_EC_DEV_IOCRDMEM _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem) -/* Lightbar utilities */ -extern bool ec_has_lightbar(struct cros_ec_dev *ec); -extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable); -extern int lb_suspend(struct cros_ec_dev *ec); -extern int lb_resume(struct cros_ec_dev *ec); - #endif /* _CROS_EC_DEV_H_ */ diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 16b1615958aa..6c05752a3334 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -111,4 +111,15 @@ config CROS_KBD_LED_BACKLIGHT To compile this driver as a module, choose M here: the module will be called cros_kbd_led_backlight. +config CROS_EC_LIGHTBAR + tristate "Chromebook Pixel's lightbar support" + depends on MFD_CROS_EC_CHARDEV + default MFD_CROS_EC_CHARDEV + help + This option exposes the Chromebook Pixel's lightbar to + userspace. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_lightbar. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index cd591bf872bb..3c29a4b405d5 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o -cros_ec_ctl-objs := cros_ec_sysfs.o cros_ec_lightbar.o \ +cros_ec_ctl-objs := cros_ec_sysfs.o \ cros_ec_vbc.o cros_ec_debugfs.o obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o @@ -13,3 +13,4 @@ cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC) += cros_ec_lpc_mec.o obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o +obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c index 68193bb53383..80eed6317570 100644 --- a/drivers/platform/chrome/cros_ec_lightbar.c +++ b/drivers/platform/chrome/cros_ec_lightbar.c @@ -33,6 +33,8 @@ #include #include +#define DRV_NAME "cros-ec-lightbar" + /* Rate-limit the lightbar interface to prevent DoS. */ static unsigned long lb_interval_jiffies = 50 * HZ / 1000; @@ -373,7 +375,7 @@ error: return ret; } -int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable) +static int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable) { struct ec_params_lightbar *param; struct cros_ec_command *msg; @@ -408,25 +410,6 @@ error: return ret; } -EXPORT_SYMBOL(lb_manual_suspend_ctrl); - -int lb_suspend(struct cros_ec_dev *ec) -{ - if (userspace_control || ec != ec_with_lightbar) - return 0; - - return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND); -} -EXPORT_SYMBOL(lb_suspend); - -int lb_resume(struct cros_ec_dev *ec) -{ - if (userspace_control || ec != ec_with_lightbar) - return 0; - - return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME); -} -EXPORT_SYMBOL(lb_resume); static ssize_t sequence_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -584,7 +567,7 @@ static struct attribute *__lb_cmds_attrs[] = { NULL, }; -bool ec_has_lightbar(struct cros_ec_dev *ec) +static bool ec_has_lightbar(struct cros_ec_dev *ec) { return !!get_lightbar_version(ec, NULL, NULL); } @@ -616,4 +599,72 @@ struct attribute_group cros_ec_lightbar_attr_group = { .attrs = __lb_cmds_attrs, .is_visible = cros_ec_lightbar_attrs_are_visible, }; -EXPORT_SYMBOL(cros_ec_lightbar_attr_group); + +static int cros_ec_lightbar_probe(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + struct device *dev = &pd->dev; + int ret; + + /* Take control of the lightbar from the EC. */ + lb_manual_suspend_ctrl(ec_dev, 1); + + ret = sysfs_create_group(&ec_dev->class_dev.kobj, + &cros_ec_lightbar_attr_group); + if (ret < 0) + dev_err(dev, "failed to create %s attributes. err=%d\n", + cros_ec_lightbar_attr_group.name, ret); + + return ret; +} + +static int cros_ec_lightbar_remove(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + + sysfs_remove_group(&ec_dev->class_dev.kobj, + &cros_ec_lightbar_attr_group); + + /* Let the EC take over the lightbar again. */ + lb_manual_suspend_ctrl(ec_dev, 0); + + return 0; +} + +static int __maybe_unused cros_ec_lightbar_resume(struct device *dev) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(dev); + + if (userspace_control || ec_dev != ec_with_lightbar) + return 0; + + return lb_send_empty_cmd(ec_dev, LIGHTBAR_CMD_RESUME); +} + +static int __maybe_unused cros_ec_lightbar_suspend(struct device *dev) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(dev); + + if (userspace_control || ec_dev != ec_with_lightbar) + return 0; + + return lb_send_empty_cmd(ec_dev, LIGHTBAR_CMD_SUSPEND); +} + +static SIMPLE_DEV_PM_OPS(cros_ec_lightbar_pm_ops, + cros_ec_lightbar_suspend, cros_ec_lightbar_resume); + +static struct platform_driver cros_ec_lightbar_driver = { + .driver = { + .name = DRV_NAME, + .pm = &cros_ec_lightbar_pm_ops, + }, + .probe = cros_ec_lightbar_probe, + .remove = cros_ec_lightbar_remove, +}; + +module_platform_driver(cros_ec_lightbar_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Expose the Chromebook Pixel's lightbar to userspace"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 977ebaa78e99..1e9b569564ea 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -327,7 +327,6 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; -extern struct attribute_group cros_ec_lightbar_attr_group; extern struct attribute_group cros_ec_vbc_attr_group; /* debugfs stuff */ -- cgit From acb9900f9e8074858738f48bee9a705138961258 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:58 +0100 Subject: mfd / platform: cros_ec: Move vbc attributes to its own driver The entire way how cros sysfs attibutes are created is broken. cros_ec_vbc should be its own driver and its attributes should be associated with a vbc driver not the mfd driver. In order to retain the path, the vbc attributes are attached to the cros_class. The patch also adds the sysfs documentation. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- .../sysfs-class-chromeos-driver-cros-ec-vbc | 6 +++ drivers/mfd/cros_ec_dev.c | 2 +- drivers/platform/chrome/Kconfig | 11 ++++++ drivers/platform/chrome/Makefile | 3 +- drivers/platform/chrome/cros_ec_vbc.c | 43 +++++++++++++++++++++- include/linux/mfd/cros_ec.h | 1 - 6 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc new file mode 100644 index 000000000000..38c5aaaaa89a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc @@ -0,0 +1,6 @@ +What: /sys/class/chromeos//vbc/vboot_context +Date: October 2015 +KernelVersion: 4.4 +Description: + Read/write the verified boot context data included on a + small nvram space on some EC implementations. diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index b227718e0ec2..40c98808fa1c 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -36,7 +36,6 @@ static int ec_major; static const struct attribute_group *cros_ec_groups[] = { &cros_ec_attr_group, - &cros_ec_vbc_attr_group, NULL, }; @@ -396,6 +395,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { static const struct mfd_cell cros_ec_platform_cells[] = { { .name = "cros-ec-lightbar" }, + { .name = "cros-ec-vbc" }, }; static int ec_device_probe(struct platform_device *pdev) diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 6c05752a3334..1e9dc9626e84 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -122,4 +122,15 @@ config CROS_EC_LIGHTBAR To compile this driver as a module, choose M here: the module will be called cros_ec_lightbar. +config CROS_EC_VBC + tristate "ChromeOS EC vboot context support" + depends on MFD_CROS_EC_CHARDEV && OF + default MFD_CROS_EC_CHARDEV + help + This option exposes the ChromeOS EC vboot context nvram to + userspace. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_vbc. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index 3c29a4b405d5..4081b7179df7 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o cros_ec_ctl-objs := cros_ec_sysfs.o \ - cros_ec_vbc.o cros_ec_debugfs.o + cros_ec_debugfs.o obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_CROS_EC_SPI) += cros_ec_spi.o @@ -14,3 +14,4 @@ obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o +obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c index 5356f26bc022..da3bbf05e86f 100644 --- a/drivers/platform/chrome/cros_ec_vbc.c +++ b/drivers/platform/chrome/cros_ec_vbc.c @@ -22,8 +22,11 @@ #include #include #include +#include #include +#define DRV_NAME "cros-ec-vbc" + static ssize_t vboot_context_read(struct file *filp, struct kobject *kobj, struct bin_attribute *att, char *buf, loff_t pos, size_t count) @@ -132,4 +135,42 @@ struct attribute_group cros_ec_vbc_attr_group = { .bin_attrs = cros_ec_vbc_bin_attrs, .is_bin_visible = cros_ec_vbc_is_visible, }; -EXPORT_SYMBOL(cros_ec_vbc_attr_group); + +static int cros_ec_vbc_probe(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + struct device *dev = &pd->dev; + int ret; + + ret = sysfs_create_group(&ec_dev->class_dev.kobj, + &cros_ec_vbc_attr_group); + if (ret < 0) + dev_err(dev, "failed to create %s attributes. err=%d\n", + cros_ec_vbc_attr_group.name, ret); + + return ret; +} + +static int cros_ec_vbc_remove(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + + sysfs_remove_group(&ec_dev->class_dev.kobj, + &cros_ec_vbc_attr_group); + + return 0; +} + +static struct platform_driver cros_ec_vbc_driver = { + .driver = { + .name = DRV_NAME, + }, + .probe = cros_ec_vbc_probe, + .remove = cros_ec_vbc_remove, +}; + +module_platform_driver(cros_ec_vbc_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Expose the vboot context nvram to userspace"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 1e9b569564ea..fdc3152cca1d 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -327,7 +327,6 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; -extern struct attribute_group cros_ec_vbc_attr_group; /* debugfs stuff */ int cros_ec_debugfs_init(struct cros_ec_dev *ec); -- cgit From 6fce0a2cf5a050e8a3326556d7d293e69be303be Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:59 +0100 Subject: mfd / platform: cros_ec: Move debugfs attributes to its own driver The entire way how cros debugfs attibutes are created is broken. cros_ec_debugfs should be its own driver and its attributes should be associated with a debugfs driver not the mfd driver. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- drivers/mfd/cros_ec_dev.c | 41 +------------------- drivers/platform/chrome/Kconfig | 11 ++++++ drivers/platform/chrome/Makefile | 4 +- drivers/platform/chrome/cros_ec_debugfs.c | 62 +++++++++++++++++++++---------- include/linux/mfd/cros_ec.h | 6 --- 5 files changed, 56 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 40c98808fa1c..9955937b821d 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -394,6 +394,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { }; static const struct mfd_cell cros_ec_platform_cells[] = { + { .name = "cros-ec-debugfs" }, { .name = "cros-ec-lightbar" }, { .name = "cros-ec-vbc" }, }; @@ -489,9 +490,6 @@ static int ec_device_probe(struct platform_device *pdev) "failed to add cros-ec platform devices: %d\n", retval); - if (cros_ec_debugfs_init(ec)) - dev_warn(dev, "failed to create debugfs directory\n"); - return 0; failed: @@ -503,62 +501,25 @@ static int ec_device_remove(struct platform_device *pdev) { struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); - cros_ec_debugfs_remove(ec); - mfd_remove_devices(ec->dev); cdev_del(&ec->cdev); device_unregister(&ec->class_dev); return 0; } -static void ec_device_shutdown(struct platform_device *pdev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); - - /* Be sure to clear up debugfs delayed works */ - cros_ec_debugfs_remove(ec); -} - static const struct platform_device_id cros_ec_id[] = { { DRV_NAME, 0 }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(platform, cros_ec_id); -static __maybe_unused int ec_device_suspend(struct device *dev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(dev); - - cros_ec_debugfs_suspend(ec); - - return 0; -} - -static __maybe_unused int ec_device_resume(struct device *dev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(dev); - - cros_ec_debugfs_resume(ec); - - return 0; -} - -static const struct dev_pm_ops cros_ec_dev_pm_ops = { -#ifdef CONFIG_PM_SLEEP - .suspend = ec_device_suspend, - .resume = ec_device_resume, -#endif -}; - static struct platform_driver cros_ec_dev_driver = { .driver = { .name = DRV_NAME, - .pm = &cros_ec_dev_pm_ops, }, .id_table = cros_ec_id, .probe = ec_device_probe, .remove = ec_device_remove, - .shutdown = ec_device_shutdown, }; static int __init cros_ec_dev_init(void) diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 1e9dc9626e84..6cbf5b69d156 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -133,4 +133,15 @@ config CROS_EC_VBC To compile this driver as a module, choose M here: the module will be called cros_ec_vbc. +config CROS_EC_DEBUGFS + tristate "Export ChromeOS EC internals in DebugFS" + depends on MFD_CROS_EC_CHARDEV && DEBUG_FS + default MFD_CROS_EC_CHARDEV + help + This option exposes the ChromeOS EC device internals to + userspace. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_debugfs. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index 4081b7179df7..12a5c4d18c17 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -3,8 +3,7 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o -cros_ec_ctl-objs := cros_ec_sysfs.o \ - cros_ec_debugfs.o +cros_ec_ctl-objs := cros_ec_sysfs.o obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_CROS_EC_SPI) += cros_ec_spi.o @@ -15,3 +14,4 @@ obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o +obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index c62ee8e610a0..6cacac53dfce 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -23,12 +23,16 @@ #include #include #include +#include #include +#include #include #include #include #include +#define DRV_NAME "cros-ec-debugfs" + #define LOG_SHIFT 14 #define LOG_SIZE (1 << LOG_SHIFT) #define LOG_POLL_SEC 10 @@ -423,8 +427,9 @@ static int cros_ec_create_pdinfo(struct cros_ec_debugfs *debug_info) return 0; } -int cros_ec_debugfs_init(struct cros_ec_dev *ec) +static int cros_ec_debugfs_probe(struct platform_device *pd) { + struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent); struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev); const char *name = ec_platform->ec_name; struct cros_ec_debugfs *debug_info; @@ -453,40 +458,57 @@ int cros_ec_debugfs_init(struct cros_ec_dev *ec) ec->debug_info = debug_info; + dev_set_drvdata(&pd->dev, ec); + return 0; remove_debugfs: debugfs_remove_recursive(debug_info->dir); return ret; } -EXPORT_SYMBOL(cros_ec_debugfs_init); -void cros_ec_debugfs_remove(struct cros_ec_dev *ec) +static int cros_ec_debugfs_remove(struct platform_device *pd) { - if (!ec->debug_info) - return; + struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent); debugfs_remove_recursive(ec->debug_info->dir); cros_ec_cleanup_console_log(ec->debug_info); + + return 0; } -EXPORT_SYMBOL(cros_ec_debugfs_remove); -void cros_ec_debugfs_suspend(struct cros_ec_dev *ec) +static int __maybe_unused cros_ec_debugfs_suspend(struct device *dev) { - /* - * cros_ec_debugfs_init() failures are non-fatal; it's also possible - * that we initted things but decided that console log wasn't supported. - * We'll use the same set of checks that cros_ec_debugfs_remove() + - * cros_ec_cleanup_console_log() end up using to handle those cases. - */ - if (ec->debug_info && ec->debug_info->log_buffer.buf) - cancel_delayed_work_sync(&ec->debug_info->log_poll_work); + struct cros_ec_dev *ec = dev_get_drvdata(dev); + + cancel_delayed_work_sync(&ec->debug_info->log_poll_work); + + return 0; } -EXPORT_SYMBOL(cros_ec_debugfs_suspend); -void cros_ec_debugfs_resume(struct cros_ec_dev *ec) +static int __maybe_unused cros_ec_debugfs_resume(struct device *dev) { - if (ec->debug_info && ec->debug_info->log_buffer.buf) - schedule_delayed_work(&ec->debug_info->log_poll_work, 0); + struct cros_ec_dev *ec = dev_get_drvdata(dev); + + schedule_delayed_work(&ec->debug_info->log_poll_work, 0); + + return 0; } -EXPORT_SYMBOL(cros_ec_debugfs_resume); + +static SIMPLE_DEV_PM_OPS(cros_ec_debugfs_pm_ops, + cros_ec_debugfs_suspend, cros_ec_debugfs_resume); + +static struct platform_driver cros_ec_debugfs_driver = { + .driver = { + .name = DRV_NAME, + .pm = &cros_ec_debugfs_pm_ops, + }, + .probe = cros_ec_debugfs_probe, + .remove = cros_ec_debugfs_remove, +}; + +module_platform_driver(cros_ec_debugfs_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Debug logs for ChromeOS EC"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index fdc3152cca1d..e50860d190db 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -328,10 +328,4 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; -/* debugfs stuff */ -int cros_ec_debugfs_init(struct cros_ec_dev *ec); -void cros_ec_debugfs_remove(struct cros_ec_dev *ec); -void cros_ec_debugfs_suspend(struct cros_ec_dev *ec); -void cros_ec_debugfs_resume(struct cros_ec_dev *ec); - #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit From 6fd7f2bbd4422e7635bc771cd1ec440378158cb1 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:34:00 +0100 Subject: mfd / platform: cros_ec: Move device sysfs attributes to its own driver The entire way how cros debugfs attibutes are created is broken. cros_ec_sysfs should be its own driver and its attributes should be associated with the sysfs driver not the mfd driver. The patch also adds the sysfs documentation. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- Documentation/ABI/testing/sysfs-class-chromeos | 32 +++++++++++++++++++++++ drivers/mfd/Kconfig | 1 - drivers/mfd/cros_ec_dev.c | 7 +---- drivers/platform/chrome/Kconfig | 14 +++++++--- drivers/platform/chrome/Makefile | 3 +-- drivers/platform/chrome/cros_ec_lightbar.c | 2 +- drivers/platform/chrome/cros_ec_sysfs.c | 36 +++++++++++++++++++++++++- include/linux/mfd/cros_ec.h | 3 --- 8 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-chromeos b/Documentation/ABI/testing/sysfs-class-chromeos new file mode 100644 index 000000000000..5819699d66ec --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-chromeos @@ -0,0 +1,32 @@ +What: /sys/class/chromeos//flashinfo +Date: August 2015 +KernelVersion: 4.2 +Description: + Show the EC flash information. + +What: /sys/class/chromeos//kb_wake_angle +Date: March 2018 +KernelVersion: 4.17 +Description: + Control the keyboard wake lid angle. Values are between + 0 and 360. This file will also show the keyboard wake lid + angle by querying the hardware. + +What: /sys/class/chromeos//reboot +Date: August 2015 +KernelVersion: 4.2 +Description: + Tell the EC to reboot in various ways. Options are: + "cancel": Cancel a pending reboot. + "ro": Jump to RO without rebooting. + "rw": Jump to RW without rebooting. + "cold": Cold reboot. + "disable-jump": Disable jump until next reboot. + "hibernate": Hibernate the EC. + "at-shutdown": Reboot after an AP shutdown. + +What: /sys/class/chromeos//version +Date: August 2015 +KernelVersion: 4.2 +Description: + Show the information about the EC software and hardware. diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index f461460a2aeb..2acc105e43cc 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -215,7 +215,6 @@ config MFD_CROS_EC config MFD_CROS_EC_CHARDEV tristate "Chrome OS Embedded Controller userspace device interface" depends on MFD_CROS_EC - select CROS_EC_CTL ---help--- This driver adds support to talk with the ChromeOS EC from userspace. diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 9955937b821d..b9ec2a798dbb 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -34,15 +34,9 @@ #define CROS_MAX_DEV 128 static int ec_major; -static const struct attribute_group *cros_ec_groups[] = { - &cros_ec_attr_group, - NULL, -}; - static struct class cros_class = { .owner = THIS_MODULE, .name = "chromeos", - .dev_groups = cros_ec_groups, }; /* Basic communication */ @@ -396,6 +390,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { static const struct mfd_cell cros_ec_platform_cells[] = { { .name = "cros-ec-debugfs" }, { .name = "cros-ec-lightbar" }, + { .name = "cros-ec-sysfs" }, { .name = "cros-ec-vbc" }, }; diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 6cbf5b69d156..5e2fde5ff63d 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -49,9 +49,6 @@ config CHROMEOS_TBMC To compile this driver as a module, choose M here: the module will be called chromeos_tbmc. -config CROS_EC_CTL - tristate - config CROS_EC_I2C tristate "ChromeOS Embedded Controller (I2C)" depends on MFD_CROS_EC && I2C @@ -144,4 +141,15 @@ config CROS_EC_DEBUGFS To compile this driver as a module, choose M here: the module will be called cros_ec_debugfs. +config CROS_EC_SYSFS + tristate "ChromeOS EC control and information through sysfs" + depends on MFD_CROS_EC_CHARDEV && SYSFS + default MFD_CROS_EC_CHARDEV + help + This option exposes some sysfs attributes to control and get + information from ChromeOS EC. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_sysfs. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index 12a5c4d18c17..fdbee501931b 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -3,8 +3,6 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o -cros_ec_ctl-objs := cros_ec_sysfs.o -obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_CROS_EC_SPI) += cros_ec_spi.o cros_ec_lpcs-objs := cros_ec_lpc.o cros_ec_lpc_reg.o @@ -15,3 +13,4 @@ obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o +obj-$(CONFIG_CROS_EC_SYSFS) += cros_ec_sysfs.o diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c index 80eed6317570..c22318ba93aa 100644 --- a/drivers/platform/chrome/cros_ec_lightbar.c +++ b/drivers/platform/chrome/cros_ec_lightbar.c @@ -594,7 +594,7 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj, return 0; } -struct attribute_group cros_ec_lightbar_attr_group = { +static struct attribute_group cros_ec_lightbar_attr_group = { .name = "lightbar", .attrs = __lb_cmds_attrs, .is_visible = cros_ec_lightbar_attrs_are_visible, diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c index f34a50121064..0ff5aa30c070 100644 --- a/drivers/platform/chrome/cros_ec_sysfs.c +++ b/drivers/platform/chrome/cros_ec_sysfs.c @@ -34,6 +34,8 @@ #include #include +#define DRV_NAME "cros-ec-sysfs" + /* Accessor functions */ static ssize_t reboot_show(struct device *dev, @@ -353,7 +355,39 @@ struct attribute_group cros_ec_attr_group = { .attrs = __ec_attrs, .is_visible = cros_ec_ctrl_visible, }; -EXPORT_SYMBOL(cros_ec_attr_group); + +static int cros_ec_sysfs_probe(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + struct device *dev = &pd->dev; + int ret; + + ret = sysfs_create_group(&ec_dev->class_dev.kobj, &cros_ec_attr_group); + if (ret < 0) + dev_err(dev, "failed to create attributes. err=%d\n", ret); + + return ret; +} + +static int cros_ec_sysfs_remove(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + + sysfs_remove_group(&ec_dev->class_dev.kobj, &cros_ec_attr_group); + + return 0; +} + +static struct platform_driver cros_ec_sysfs_driver = { + .driver = { + .name = DRV_NAME, + }, + .probe = cros_ec_sysfs_probe, + .remove = cros_ec_sysfs_remove, +}; + +module_platform_driver(cros_ec_sysfs_driver); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ChromeOS EC control driver"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index e50860d190db..8f2a8918bfa3 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -325,7 +325,4 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); */ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); -/* sysfs stuff */ -extern struct attribute_group cros_ec_attr_group; - #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit From efb5a790dfc33b36bc64dd5a41ffc3ae5a709770 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 13 Jan 2019 13:36:46 -0500 Subject: mfd: wm831x-core: Drop unused module infrastructure from non-modular code The Kconfig currently controlling compilation of this code is: drivers/mfd/Kconfig:config MFD_WM831X drivers/mfd/Kconfig: bool ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. We delete the MODULE_LICENSE tag etc. since all that information is already contained at the top of the file in the comments. Previous demodularizaion work has made wm831x_device_exit() no longer used, so it is also removed from the 831x core code. Signed-off-by: Paul Gortmaker Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm831x-core.c | 15 ++------------- include/linux/mfd/wm831x/core.h | 1 - 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index e70d35ef5c6d..25fbbaf39cb9 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -13,7 +13,8 @@ */ #include -#include +#include +#include #include #include #include @@ -1892,14 +1893,6 @@ err: return ret; } -void wm831x_device_exit(struct wm831x *wm831x) -{ - wm831x_otp_exit(wm831x); - mfd_remove_devices(wm831x->dev); - free_irq(wm831x_irq(wm831x, WM831X_IRQ_AUXADC_DATA), wm831x); - wm831x_irq_exit(wm831x); -} - int wm831x_device_suspend(struct wm831x *wm831x) { int reg, mask; @@ -1944,7 +1937,3 @@ void wm831x_device_shutdown(struct wm831x *wm831x) } } EXPORT_SYMBOL_GPL(wm831x_device_shutdown); - -MODULE_DESCRIPTION("Core support for the WM831X AudioPlus PMIC"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mark Brown"); diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h index b49fa67612f1..6fcb8eb00282 100644 --- a/include/linux/mfd/wm831x/core.h +++ b/include/linux/mfd/wm831x/core.h @@ -418,7 +418,6 @@ int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg, int count, u16 *buf); int wm831x_device_init(struct wm831x *wm831x, int irq); -void wm831x_device_exit(struct wm831x *wm831x); int wm831x_device_suspend(struct wm831x *wm831x); void wm831x_device_shutdown(struct wm831x *wm831x); int wm831x_irq_init(struct wm831x *wm831x, int irq); -- cgit From 0db88688e1bb0180d6348742bdba8927cd0e5670 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 13 Jan 2019 13:36:48 -0500 Subject: mfd: wm8350-core: Drop unused module infrastructure from non-modular code The Kconfig currently controlling compilation of this code is: drivers/mfd/Kconfig:config MFD_WM8350 drivers/mfd/Kconfig: bool ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. We delete the MODULE_LICENSE tag etc. since all that information is already contained at the top of the file in the comments. We replace module.h with init.h and export.h ; the latter since the file does export some symbols. Previous demodularizaion work has made wm8350_device_exit() no longer used, so it is also removed from the 8350 core code. Signed-off-by: Paul Gortmaker Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm8350-core.c | 30 ++---------------------------- include/linux/mfd/wm8350/core.h | 1 - 2 files changed, 2 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index 8a07c5634aee..9e1070f26b11 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -13,7 +13,8 @@ */ #include -#include +#include +#include #include #include #include @@ -442,30 +443,3 @@ err: return ret; } EXPORT_SYMBOL_GPL(wm8350_device_init); - -void wm8350_device_exit(struct wm8350 *wm8350) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(wm8350->pmic.led); i++) - platform_device_unregister(wm8350->pmic.led[i].pdev); - - for (i = 0; i < ARRAY_SIZE(wm8350->pmic.pdev); i++) - platform_device_unregister(wm8350->pmic.pdev[i]); - - platform_device_unregister(wm8350->wdt.pdev); - platform_device_unregister(wm8350->rtc.pdev); - platform_device_unregister(wm8350->power.pdev); - platform_device_unregister(wm8350->hwmon.pdev); - platform_device_unregister(wm8350->gpio.pdev); - platform_device_unregister(wm8350->codec.pdev); - - if (wm8350->irq_base) - free_irq(wm8350->irq_base + WM8350_IRQ_AUXADC_DATARDY, wm8350); - - wm8350_irq_exit(wm8350); -} -EXPORT_SYMBOL_GPL(wm8350_device_exit); - -MODULE_DESCRIPTION("WM8350 AudioPlus PMIC core driver"); -MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 509481d9cf19..202d9bde2c7c 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -643,7 +643,6 @@ struct wm8350_platform_data { */ int wm8350_device_init(struct wm8350 *wm8350, int irq, struct wm8350_platform_data *pdata); -void wm8350_device_exit(struct wm8350 *wm8350); /* * WM8350 device IO -- cgit From d57f72875eed3f26afaca176c0f425f209bc99d7 Mon Sep 17 00:00:00 2001 From: Christian Hohnstaedt Date: Mon, 14 Jan 2019 09:16:34 +0100 Subject: mfd: tps65218.c: Add input voltage options These options apply to all regulators in this chip. ti,strict-supply-voltage-supervision: Set STRICT flag in CONFIG1 ti,under-voltage-limit-microvolt: Select 2.75, 2.95, 3.25 or 3.35 V UVLO in CONFIG1 ti,under-voltage-hyst-microvolt: Select 200mV or 400mV UVLOHYS in CONFIG2 Signed-off-by: Christian Hohnstaedt Tested-by: Keerthy Reviewed-by: Keerthy Signed-off-by: Lee Jones --- drivers/mfd/tps65218.c | 89 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/tps65218.h | 4 ++ 2 files changed, 93 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c index 8bcdecf494d0..a62ea4cb8be7 100644 --- a/drivers/mfd/tps65218.c +++ b/drivers/mfd/tps65218.c @@ -211,6 +211,83 @@ static const struct of_device_id of_tps65218_match_table[] = { }; MODULE_DEVICE_TABLE(of, of_tps65218_match_table); +static int tps65218_voltage_set_strict(struct tps65218 *tps) +{ + u32 strict; + + if (of_property_read_u32(tps->dev->of_node, + "ti,strict-supply-voltage-supervision", + &strict)) + return 0; + + if (strict != 0 && strict != 1) { + dev_err(tps->dev, + "Invalid ti,strict-supply-voltage-supervision value\n"); + return -EINVAL; + } + + tps65218_update_bits(tps, TPS65218_REG_CONFIG1, + TPS65218_CONFIG1_STRICT, + strict ? TPS65218_CONFIG1_STRICT : 0, + TPS65218_PROTECT_L1); + return 0; +} + +static int tps65218_voltage_set_uv_hyst(struct tps65218 *tps) +{ + u32 hyst; + + if (of_property_read_u32(tps->dev->of_node, + "ti,under-voltage-hyst-microvolt", &hyst)) + return 0; + + if (hyst != 400000 && hyst != 200000) { + dev_err(tps->dev, + "Invalid ti,under-voltage-hyst-microvolt value\n"); + return -EINVAL; + } + + tps65218_update_bits(tps, TPS65218_REG_CONFIG2, + TPS65218_CONFIG2_UVLOHYS, + hyst == 400000 ? TPS65218_CONFIG2_UVLOHYS : 0, + TPS65218_PROTECT_L1); + return 0; +} + +static int tps65218_voltage_set_uvlo(struct tps65218 *tps) +{ + u32 uvlo; + int uvloval; + + if (of_property_read_u32(tps->dev->of_node, + "ti,under-voltage-limit-microvolt", &uvlo)) + return 0; + + switch (uvlo) { + case 2750000: + uvloval = TPS65218_CONFIG1_UVLO_2750000; + break; + case 2950000: + uvloval = TPS65218_CONFIG1_UVLO_2950000; + break; + case 3250000: + uvloval = TPS65218_CONFIG1_UVLO_3250000; + break; + case 3350000: + uvloval = TPS65218_CONFIG1_UVLO_3350000; + break; + default: + dev_err(tps->dev, + "Invalid ti,under-voltage-limit-microvolt value\n"); + return -EINVAL; + } + + tps65218_update_bits(tps, TPS65218_REG_CONFIG1, + TPS65218_CONFIG1_UVLO_MASK, uvloval, + TPS65218_PROTECT_L1); + return 0; +} + static int tps65218_probe(struct i2c_client *client, const struct i2c_device_id *ids) { @@ -249,6 +326,18 @@ static int tps65218_probe(struct i2c_client *client, tps->rev = chipid & TPS65218_CHIPID_REV_MASK; + ret = tps65218_voltage_set_strict(tps); + if (ret) + return ret; + + ret = tps65218_voltage_set_uvlo(tps); + if (ret) + return ret; + + ret = tps65218_voltage_set_uv_hyst(tps); + if (ret) + return ret; + ret = mfd_add_devices(tps->dev, PLATFORM_DEVID_AUTO, tps65218_cells, ARRAY_SIZE(tps65218_cells), NULL, 0, regmap_irq_get_domain(tps->irq_data)); diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index c204d9a79436..3cbe103495ab 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -137,6 +137,10 @@ #define TPS65218_CONFIG1_PGDLY_MASK 0x18 #define TPS65218_CONFIG1_STRICT BIT(2) #define TPS65218_CONFIG1_UVLO_MASK 0x3 +#define TPS65218_CONFIG1_UVLO_2750000 0x0 +#define TPS65218_CONFIG1_UVLO_2950000 0x1 +#define TPS65218_CONFIG1_UVLO_3250000 0x2 +#define TPS65218_CONFIG1_UVLO_3350000 0x3 #define TPS65218_CONFIG2_DC12_RST BIT(7) #define TPS65218_CONFIG2_UVLOHYS BIT(6) -- cgit From cfced786969c2a3e1bca45d7055a00311d93ae6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jan 2019 18:20:05 +0100 Subject: dma-mapping: remove the default map_resource implementation Instead provide a proper implementation in the direct mapping code, and also wire it up for arm and powerpc, leaving an error return for all the IOMMU or virtual mapping instances for which we'd have to wire up an actual implementation Signed-off-by: Christoph Hellwig Tested-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 2 ++ arch/powerpc/kernel/dma-swiotlb.c | 1 + arch/powerpc/kernel/dma.c | 1 + include/linux/dma-mapping.h | 12 +++++++----- kernel/dma/direct.c | 14 ++++++++++++++ 5 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f1e2922e447c..3c8534904209 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -188,6 +188,7 @@ const struct dma_map_ops arm_dma_ops = { .unmap_page = arm_dma_unmap_page, .map_sg = arm_dma_map_sg, .unmap_sg = arm_dma_unmap_sg, + .map_resource = dma_direct_map_resource, .sync_single_for_cpu = arm_dma_sync_single_for_cpu, .sync_single_for_device = arm_dma_sync_single_for_device, .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, @@ -211,6 +212,7 @@ const struct dma_map_ops arm_coherent_dma_ops = { .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, + .map_resource = dma_direct_map_resource, .dma_supported = arm_dma_supported, }; EXPORT_SYMBOL(arm_coherent_dma_ops); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 7d5fc9751622..fbb2506a414e 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -55,6 +55,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .map_page = dma_direct_map_page, .unmap_page = dma_direct_unmap_page, + .map_resource = dma_direct_map_resource, .sync_single_for_cpu = dma_direct_sync_single_for_cpu, .sync_single_for_device = dma_direct_sync_single_for_device, .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index b1903ebb2e9c..258b9e8ebb99 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -273,6 +273,7 @@ const struct dma_map_ops dma_nommu_ops = { .dma_supported = dma_nommu_dma_supported, .map_page = dma_nommu_map_page, .unmap_page = dma_nommu_unmap_page, + .map_resource = dma_direct_map_resource, .get_required_mask = dma_nommu_get_required_mask, #ifdef CONFIG_NOT_COHERENT_CACHE .sync_single_for_cpu = dma_nommu_sync_single, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f6ded992c183..9842085e6774 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -208,6 +208,8 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long attrs); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); +dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir, unsigned long attrs); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) @@ -346,19 +348,19 @@ static inline dma_addr_t dma_map_resource(struct device *dev, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; + dma_addr_t addr = DMA_MAPPING_ERROR; BUG_ON(!valid_dma_direction(dir)); /* Don't allow RAM to be mapped */ BUG_ON(pfn_valid(PHYS_PFN(phys_addr))); - addr = phys_addr; - if (ops && ops->map_resource) + if (dma_is_direct(ops)) + addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); debug_dma_map_resource(dev, phys_addr, size, dir, addr); - return addr; } @@ -369,7 +371,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops && ops->unmap_resource) + if (!dma_is_direct(ops) && ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 355d16acee6d..25bd19974223 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -356,6 +356,20 @@ out_unmap: } EXPORT_SYMBOL(dma_direct_map_sg); +dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + dma_addr_t dma_addr = paddr; + + if (unlikely(!dma_direct_possible(dev, dma_addr, size))) { + report_addr(dev, dma_addr, size); + return DMA_MAPPING_ERROR; + } + + return dma_addr; +} +EXPORT_SYMBOL(dma_direct_map_resource); + /* * Because 32-bit DMA masks are so common we expect every architecture to be * able to satisfy them - either by not supporting more physical memory, or by -- cgit From 645386dfe6307dbb28f10a4513792a59beda0efa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jan 2019 17:17:53 +0100 Subject: dma-mapping: don't BUG when calling dma_map_resource on RAM Use WARN_ON_ONCE to print a stack trace and return a proper error code instead. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Tested-by: Marek Szyprowski --- include/linux/dma-mapping.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 9842085e6774..b904d55247ab 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -353,7 +353,8 @@ static inline dma_addr_t dma_map_resource(struct device *dev, BUG_ON(!valid_dma_direction(dir)); /* Don't allow RAM to be mapped */ - BUG_ON(pfn_valid(PHYS_PFN(phys_addr))); + if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) + return DMA_MAPPING_ERROR; if (dma_is_direct(ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); -- cgit From e2f3cd831a280fc226118d9369bf3f77aab58c56 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Feb 2019 01:49:14 +0100 Subject: driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 45 +++++++++++++++++++++++++++++--------------- drivers/base/power/runtime.c | 26 +++++++++++-------------- include/linux/device.h | 2 +- 3 files changed, 42 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 50610cd87e71..8611385e44b5 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -165,6 +165,19 @@ void device_pm_move_to_tail(struct device *dev) device_links_read_unlock(idx); } +static void device_link_rpm_prepare(struct device *consumer, + struct device *supplier) +{ + pm_runtime_new_link(consumer); + /* + * If the link is being added by the consumer driver at probe time, + * balance the decrementation of the supplier's runtime PM usage counter + * after consumer probe in driver_probe_device(). + */ + if (consumer->links.status == DL_DEV_PROBING) + pm_runtime_get_noresume(supplier); +} + /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. @@ -201,7 +214,6 @@ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; - bool rpm_put_supplier = false; if (!consumer || !supplier || (flags & DL_FLAG_STATELESS && @@ -213,7 +225,6 @@ struct device_link *device_link_add(struct device *consumer, pm_runtime_put_noidle(supplier); return NULL; } - rpm_put_supplier = true; } device_links_write_lock(); @@ -249,6 +260,15 @@ struct device_link *device_link_add(struct device *consumer, if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; + if (flags & DL_FLAG_PM_RUNTIME) { + if (!(link->flags & DL_FLAG_PM_RUNTIME)) { + device_link_rpm_prepare(consumer, supplier); + link->flags |= DL_FLAG_PM_RUNTIME; + } + if (flags & DL_FLAG_RPM_ACTIVE) + refcount_inc(&link->rpm_active); + } + kref_get(&link->kref); goto out; } @@ -257,20 +277,15 @@ struct device_link *device_link_add(struct device *consumer, if (!link) goto out; + refcount_set(&link->rpm_active, 1); + if (flags & DL_FLAG_PM_RUNTIME) { - if (flags & DL_FLAG_RPM_ACTIVE) { - link->rpm_active = true; - rpm_put_supplier = false; - } - pm_runtime_new_link(consumer); - /* - * If the link is being added by the consumer driver at probe - * time, balance the decrementation of the supplier's runtime PM - * usage counter after consumer probe in driver_probe_device(). - */ - if (consumer->links.status == DL_DEV_PROBING) - pm_runtime_get_noresume(supplier); + if (flags & DL_FLAG_RPM_ACTIVE) + refcount_inc(&link->rpm_active); + + device_link_rpm_prepare(consumer, supplier); } + get_device(supplier); link->supplier = supplier; INIT_LIST_HEAD(&link->s_node); @@ -333,7 +348,7 @@ struct device_link *device_link_add(struct device *consumer, device_pm_unlock(); device_links_write_unlock(); - if (rpm_put_supplier) + if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link) pm_runtime_put(supplier); return link; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 457be03b744d..8bc9a432de70 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -259,11 +259,8 @@ static int rpm_get_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { int retval; - if (!(link->flags & DL_FLAG_PM_RUNTIME)) - continue; - - if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND || - link->rpm_active) + if (!(link->flags & DL_FLAG_PM_RUNTIME) || + READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) continue; retval = pm_runtime_get_sync(link->supplier); @@ -272,7 +269,7 @@ static int rpm_get_suppliers(struct device *dev) pm_runtime_put_noidle(link->supplier); return retval; } - link->rpm_active = true; + refcount_inc(&link->rpm_active); } return 0; } @@ -281,12 +278,13 @@ static void rpm_put_suppliers(struct device *dev) { struct device_link *link; - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) - if (link->rpm_active && - READ_ONCE(link->status) != DL_STATE_SUPPLIER_UNBIND) { + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { + if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) + continue; + + while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); - link->rpm_active = false; - } + } } /** @@ -1539,7 +1537,7 @@ void pm_runtime_remove(struct device *dev) * * Check links from this device to any consumers and if any of them have active * runtime PM references to the device, drop the usage counter of the device - * (once per link). + * (as many times as needed). * * Links with the DL_FLAG_STATELESS flag set are ignored. * @@ -1561,10 +1559,8 @@ void pm_runtime_clean_up_links(struct device *dev) if (link->flags & DL_FLAG_STATELESS) continue; - if (link->rpm_active) { + while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put_noidle(dev); - link->rpm_active = false; - } } device_links_read_unlock(idx); diff --git a/include/linux/device.h b/include/linux/device.h index d0e452fd0bff..5f49d2eff6ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -853,7 +853,7 @@ struct device_link { struct list_head c_node; enum device_link_state status; u32 flags; - bool rpm_active; + refcount_t rpm_active; struct kref kref; #ifdef CONFIG_SRCU struct rcu_head rcu_head; -- cgit From e7dd40105aac9ba051e44ad711123bc53a5e4c71 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Feb 2019 01:59:42 +0100 Subject: driver core: Add device link flag DL_FLAG_AUTOPROBE_CONSUMER Add a new device link flag, DL_FLAG_AUTOPROBE_CONSUMER, to request the driver core to probe for a consumer driver automatically after binding a driver to the supplier device on a persistent managed device link. As unbinding the supplier driver on a managed device link causes the consumer driver to be detached from its device automatically, this flag provides a complementary mechanism which is needed to address some "composite device" use cases. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/device_link.rst | 9 +++++++++ drivers/base/core.c | 16 +++++++++++++++- drivers/base/dd.c | 2 +- include/linux/device.h | 3 +++ 4 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst index e249e074a8d2..c764755121c7 100644 --- a/Documentation/driver-api/device_link.rst +++ b/Documentation/driver-api/device_link.rst @@ -94,6 +94,15 @@ Similarly, when the device link is added from supplier's ``->probe`` callback, ``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically purged when the supplier fails to probe or later unbinds. +If neither ``DL_FLAG_AUTOREMOVE_CONSUMER`` nor ``DL_FLAG_AUTOREMOVE_SUPPLIER`` +is set, ``DL_FLAG_AUTOPROBE_CONSUMER`` can be used to request the driver core +to probe for a driver for the consumer driver on the link automatically after +a driver has been bound to the supplier device. + +Note, however, that any combinations of ``DL_FLAG_AUTOREMOVE_CONSUMER``, +``DL_FLAG_AUTOREMOVE_SUPPLIER`` or ``DL_FLAG_AUTOPROBE_CONSUMER`` with +``DL_FLAG_STATELESS`` are invalid and cannot be used. + Limitations =========== diff --git a/drivers/base/core.c b/drivers/base/core.c index 9d49b461b1d9..abfce4f613f8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -208,6 +208,12 @@ static void device_link_rpm_prepare(struct device *consumer, * the link will be maintained until one of the devices pointed to by it (either * the consumer or the supplier) is unregistered. * + * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and + * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent + * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can + * be used to request the driver core to automaticall probe for a consmer + * driver after successfully binding a driver to the supplier device. + * * The combination of DL_FLAG_STATELESS and either DL_FLAG_AUTOREMOVE_CONSUMER * or DL_FLAG_AUTOREMOVE_SUPPLIER set in @flags at the same time is invalid and * will cause NULL to be returned upfront. @@ -228,7 +234,12 @@ struct device_link *device_link_add(struct device *consumer, if (!consumer || !supplier || (flags & DL_FLAG_STATELESS && - flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) + flags & (DL_FLAG_AUTOREMOVE_CONSUMER | + DL_FLAG_AUTOREMOVE_SUPPLIER | + DL_FLAG_AUTOPROBE_CONSUMER)) || + (flags & DL_FLAG_AUTOPROBE_CONSUMER && + flags & (DL_FLAG_AUTOREMOVE_CONSUMER | + DL_FLAG_AUTOREMOVE_SUPPLIER))) return NULL; if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { @@ -589,6 +600,9 @@ void device_links_driver_bound(struct device *dev) WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); + + if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) + driver_deferred_probe_add(link->consumer); } list_for_each_entry(link, &dev->links.suppliers, c_node) { diff --git a/drivers/base/dd.c b/drivers/base/dd.c index aa6a9c613595..2e898cbba79b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -116,7 +116,7 @@ static void deferred_probe_work_func(struct work_struct *work) } static DECLARE_WORK(deferred_probe_work, deferred_probe_work_func); -static void driver_deferred_probe_add(struct device *dev) +void driver_deferred_probe_add(struct device *dev) { mutex_lock(&deferred_probe_mutex); if (list_empty(&dev->p->deferred_probe)) { diff --git a/include/linux/device.h b/include/linux/device.h index 5f49d2eff6ed..0ab0a3a80ec3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -341,6 +341,7 @@ struct device *driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *dev, void *data)); +void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev); /** @@ -827,12 +828,14 @@ enum device_link_state { * PM_RUNTIME: If set, the runtime PM framework will use this link. * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind. + * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds. */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) #define DL_FLAG_PM_RUNTIME BIT(2) #define DL_FLAG_RPM_ACTIVE BIT(3) #define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4) +#define DL_FLAG_AUTOPROBE_CONSUMER BIT(5) /** * struct device_link - Device link representation. -- cgit From 42bf4152d8a79f89f5456dee63a1f364fbce2dd6 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Tue, 29 Jan 2019 11:19:36 +0530 Subject: tee: add supp_nowait flag in tee_context struct This flag indicates that requests in this context should not wait for tee-supplicant daemon to be started if not present and just return with an error code. It is needed for requests which should be non-blocking in nature like ones arising from TEE based kernel drivers or any in kernel api that uses TEE internal client interface. Signed-off-by: Sumit Garg Reviewed-by: Daniel Thompson Signed-off-by: Jens Wiklander --- drivers/tee/optee/supp.c | 10 +++++++++- drivers/tee/tee_core.c | 13 +++++++++++++ include/linux/tee_drv.h | 6 ++++++ 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c index 43626e15703a..92f56b8645e3 100644 --- a/drivers/tee/optee/supp.c +++ b/drivers/tee/optee/supp.c @@ -88,10 +88,18 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, { struct optee *optee = tee_get_drvdata(ctx->teedev); struct optee_supp *supp = &optee->supp; - struct optee_supp_req *req = kzalloc(sizeof(*req), GFP_KERNEL); + struct optee_supp_req *req; bool interruptable; u32 ret; + /* + * Return in case there is no supplicant available and + * non-blocking request. + */ + if (!supp->ctx && ctx->supp_nowait) + return TEEC_ERROR_COMMUNICATION; + + req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return TEEC_ERROR_OUT_OF_MEMORY; diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 7b2bb4c50058..adf2588282fc 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -106,6 +106,11 @@ static int tee_open(struct inode *inode, struct file *filp) if (IS_ERR(ctx)) return PTR_ERR(ctx); + /* + * Default user-space behaviour is to wait for tee-supplicant + * if not present for any requests in this context. + */ + ctx->supp_nowait = false; filp->private_data = ctx; return 0; } @@ -982,6 +987,14 @@ tee_client_open_context(struct tee_context *start, } while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM); put_device(put_dev); + /* + * Default behaviour for in kernel client is to not wait for + * tee-supplicant if not present for any requests in this context. + * Also this flag could be configured again before call to + * tee_client_open_session() if any in kernel client requires + * different behaviour. + */ + ctx->supp_nowait = true; return ctx; } EXPORT_SYMBOL_GPL(tee_client_open_context); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 6cfe05893a76..5076502c07d7 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -47,6 +47,11 @@ struct tee_shm_pool; * @releasing: flag that indicates if context is being released right now. * It is needed to break circular dependency on context during * shared memory release. + * @supp_nowait: flag that indicates that requests in this context should not + * wait for tee-supplicant daemon to be started if not present + * and just return with an error code. It is needed for requests + * that arises from TEE based kernel drivers that should be + * non-blocking in nature. */ struct tee_context { struct tee_device *teedev; @@ -54,6 +59,7 @@ struct tee_context { void *data; struct kref refcount; bool releasing; + bool supp_nowait; }; struct tee_param_memref { -- cgit From 0fc1db9d105915021260eb241661b8e96f5c0f1a Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Tue, 29 Jan 2019 11:19:35 +0530 Subject: tee: add bus driver framework for TEE based devices Introduce a generic TEE bus driver concept for TEE based kernel drivers which would like to communicate with TEE based devices/services. Also add support in module device table for these new TEE based devices. In this TEE bus concept, devices/services are identified via Universally Unique Identifier (UUID) and drivers register a table of device UUIDs which they can support. So this TEE bus framework registers following apis: - match(): Iterates over the driver UUID table to find a corresponding match for device UUID. If a match is found, then this particular device is probed via corresponding probe api registered by the driver. This process happens whenever a device or a driver is registered with TEE bus. - uevent(): Notifies user-space (udev) whenever a new device is registered on this bus for auto-loading of modularized drivers. Also this framework allows for device enumeration to be specific to corresponding TEE implementation like OP-TEE etc. Signed-off-by: Sumit Garg Reviewed-by: Daniel Thompson Reviewed-by: Bhupesh Sharma Signed-off-by: Jens Wiklander --- drivers/tee/tee_core.c | 54 ++++++++++++++++++++++++++++++++++++--- include/linux/mod_devicetable.h | 9 +++++++ include/linux/tee_drv.h | 32 ++++++++++++++++++++++- scripts/mod/devicetable-offsets.c | 3 +++ scripts/mod/file2alias.c | 19 ++++++++++++++ 5 files changed, 112 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index adf2588282fc..25f3b9cc8908 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -15,7 +15,6 @@ #define pr_fmt(fmt) "%s: " fmt, __func__ #include -#include #include #include #include @@ -1040,6 +1039,39 @@ int tee_client_invoke_func(struct tee_context *ctx, } EXPORT_SYMBOL_GPL(tee_client_invoke_func); +static int tee_client_device_match(struct device *dev, + struct device_driver *drv) +{ + const struct tee_client_device_id *id_table; + struct tee_client_device *tee_device; + + id_table = to_tee_client_driver(drv)->id_table; + tee_device = to_tee_client_device(dev); + + while (!uuid_is_null(&id_table->uuid)) { + if (uuid_equal(&tee_device->id.uuid, &id_table->uuid)) + return 1; + id_table++; + } + + return 0; +} + +static int tee_client_device_uevent(struct device *dev, + struct kobj_uevent_env *env) +{ + uuid_t *dev_id = &to_tee_client_device(dev)->id.uuid; + + return add_uevent_var(env, "MODALIAS=tee:%pUb", dev_id); +} + +struct bus_type tee_bus_type = { + .name = "tee", + .match = tee_client_device_match, + .uevent = tee_client_device_uevent, +}; +EXPORT_SYMBOL_GPL(tee_bus_type); + static int __init tee_init(void) { int rc; @@ -1053,18 +1085,32 @@ static int __init tee_init(void) rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee"); if (rc) { pr_err("failed to allocate char dev region\n"); - class_destroy(tee_class); - tee_class = NULL; + goto out_unreg_class; + } + + rc = bus_register(&tee_bus_type); + if (rc) { + pr_err("failed to register tee bus\n"); + goto out_unreg_chrdev; } + return 0; + +out_unreg_chrdev: + unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); +out_unreg_class: + class_destroy(tee_class); + tee_class = NULL; + return rc; } static void __exit tee_exit(void) { + bus_unregister(&tee_bus_type); + unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); class_destroy(tee_class); tee_class = NULL; - unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); } subsys_initcall(tee_init); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index f9bd2f34b99f..14eaeeb46f41 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -779,4 +779,13 @@ struct typec_device_id { kernel_ulong_t driver_data; }; +/** + * struct tee_client_device_id - tee based device identifier + * @uuid: For TEE based client devices we use the device uuid as + * the identifier. + */ +struct tee_client_device_id { + uuid_t uuid; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 5076502c07d7..56d7f1b4516d 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -15,11 +15,14 @@ #ifndef __TEE_DRV_H #define __TEE_DRV_H -#include +#include #include #include #include +#include #include +#include +#include /* * The file describes the API provided by the generic TEE driver to the @@ -544,4 +547,31 @@ static inline bool tee_param_is_memref(struct tee_param *param) } } +extern struct bus_type tee_bus_type; + +/** + * struct tee_client_device - tee based device + * @id: device identifier + * @dev: device structure + */ +struct tee_client_device { + struct tee_client_device_id id; + struct device dev; +}; + +#define to_tee_client_device(d) container_of(d, struct tee_client_device, dev) + +/** + * struct tee_client_driver - tee client driver + * @id_table: device id table supported by this driver + * @driver: driver structure + */ +struct tee_client_driver { + const struct tee_client_device_id *id_table; + struct device_driver driver; +}; + +#define to_tee_client_driver(d) \ + container_of(d, struct tee_client_driver, driver) + #endif /*__TEE_DRV_H*/ diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index 293004499b4d..160718383a71 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -225,5 +225,8 @@ int main(void) DEVID_FIELD(typec_device_id, svid); DEVID_FIELD(typec_device_id, mode); + DEVID(tee_client_device_id); + DEVID_FIELD(tee_client_device_id, uuid); + return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index a37af7d71973..d0e41723627f 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -37,6 +37,9 @@ typedef unsigned char __u8; typedef struct { __u8 b[16]; } uuid_le; +typedef struct { + __u8 b[16]; +} uuid_t; /* Big exception to the "don't include kernel headers into userspace, which * even potentially has different endianness and word sizes, since @@ -1287,6 +1290,21 @@ static int do_typec_entry(const char *filename, void *symval, char *alias) return 1; } +/* Looks like: tee:uuid */ +static int do_tee_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, tee_client_device_id, uuid); + + sprintf(alias, "tee:%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid.b[0], uuid.b[1], uuid.b[2], uuid.b[3], uuid.b[4], + uuid.b[5], uuid.b[6], uuid.b[7], uuid.b[8], uuid.b[9], + uuid.b[10], uuid.b[11], uuid.b[12], uuid.b[13], uuid.b[14], + uuid.b[15]); + + add_wildcard(alias); + return 1; +} + /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) { @@ -1357,6 +1375,7 @@ static const struct devtable devtable[] = { {"fslmc", SIZE_fsl_mc_device_id, do_fsl_mc_entry}, {"tbsvc", SIZE_tb_service_id, do_tbsvc_entry}, {"typec", SIZE_typec_device_id, do_typec_entry}, + {"tee", SIZE_tee_client_device_id, do_tee_entry}, }; /* Create MODULE_ALIAS() statements. -- cgit From b8580e9de48bf32b884910d22330ef2fa027cf01 Mon Sep 17 00:00:00 2001 From: Shunyong Yang Date: Fri, 1 Feb 2019 17:11:14 -0600 Subject: PCI: Add HXT vendor ID Add the HXT vendor ID to pci_ids.h. Signed-off-by: Shunyong Yang [bhelgaas: split to separate patch] Signed-off-by: Bjorn Helgaas Reviewed-by: Sinan Kaya CC: Joey Zheng --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5eaf39dbc388..26420e619dd7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2573,6 +2573,8 @@ #define PCI_VENDOR_ID_HYGON 0x1d94 +#define PCI_VENDOR_ID_HXT 0x1dbf + #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 -- cgit From 0ce26a1c31ca928df4dfc7504c8898b71ff9f5d5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Feb 2019 17:24:52 -0600 Subject: PCI: Move Rohm Vendor ID to generic list Move the Rohm Vendor ID to pci_ids.h instead of defining it in several drivers. Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Acked-by: Mark Brown Acked-by: Linus Walleij --- drivers/dma/pch_dma.c | 1 - drivers/gpio/gpio-ml-ioh.c | 2 -- drivers/gpio/gpio-pch.c | 1 - drivers/i2c/busses/i2c-eg20t.c | 1 - drivers/misc/pch_phub.c | 1 - drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 7 ++----- drivers/spi/spi-topcliff-pch.c | 1 - drivers/tty/serial/pch_uart.c | 2 -- drivers/usb/gadget/udc/pch_udc.c | 1 - include/linux/pci_ids.h | 2 ++ 10 files changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index afd8f27bda96..538b6e0e17bb 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -972,7 +972,6 @@ static void pch_dma_remove(struct pci_dev *pdev) } /* PCI Device ID of DMA device */ -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH 0x8810 #define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH 0x8815 #define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026 diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c index 51c7d1b84c2e..0c076dce9e17 100644 --- a/drivers/gpio/gpio-ml-ioh.c +++ b/drivers/gpio/gpio-ml-ioh.c @@ -31,8 +31,6 @@ #define IOH_IRQ_BASE 0 -#define PCI_VENDOR_ID_ROHM 0x10DB - struct ioh_reg_comn { u32 ien; u32 istatus; diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index ee79e5f88b5a..1d99293096f2 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -437,7 +437,6 @@ static int __maybe_unused pch_gpio_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(pch_gpio_pm_ops, pch_gpio_suspend, pch_gpio_resume); -#define PCI_VENDOR_ID_ROHM 0x10DB static const struct pci_device_id pch_gpio_pcidev_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x8014) }, diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 835d54ac2971..231675b10376 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -177,7 +177,6 @@ static wait_queue_head_t pch_event; static DEFINE_MUTEX(pch_mutex); /* Definition for ML7213 by LAPIS Semiconductor */ -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_ML7213_I2C 0x802D #define PCI_DEVICE_ID_ML7223_I2C 0x8010 #define PCI_DEVICE_ID_ML7831_I2C 0x8817 diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c index 540845651b8c..309703e9c42e 100644 --- a/drivers/misc/pch_phub.c +++ b/drivers/misc/pch_phub.c @@ -64,7 +64,6 @@ #define CLKCFG_UARTCLKSEL (1 << 18) /* Macros for ML7213 */ -#define PCI_VENDOR_ID_ROHM 0x10db #define PCI_DEVICE_ID_ROHM_ML7213_PHUB 0x801A /* Macros for ML7223 */ diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 43c0c10dfeb7..3a4225837049 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -27,7 +27,6 @@ #define DRV_VERSION "1.01" const char pch_driver_version[] = DRV_VERSION; -#define PCI_DEVICE_ID_INTEL_IOH1_GBE 0x8802 /* Pci device ID */ #define PCH_GBE_MAR_ENTRIES 16 #define PCH_GBE_SHORT_PKT 64 #define DSC_INIT16 0xC000 @@ -37,11 +36,9 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_PCI_BAR 1 #define PCH_GBE_RESERVE_MEMORY 0x200000 /* 2MB */ -/* Macros for ML7223 */ -#define PCI_VENDOR_ID_ROHM 0x10db -#define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 +#define PCI_DEVICE_ID_INTEL_IOH1_GBE 0x8802 -/* Macros for ML7831 */ +#define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 #define PCI_DEVICE_ID_ROHM_ML7831_GBE 0x8802 #define PCH_GBE_TX_WEIGHT 64 diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 97d137591b18..d794180e83dc 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -92,7 +92,6 @@ #define PCH_MAX_SPBR 1023 /* Definition for ML7213/ML7223/ML7831 by LAPIS Semiconductor */ -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_ML7213_SPI 0x802c #define PCI_DEVICE_ID_ML7223_SPI 0x800F #define PCI_DEVICE_ID_ML7831_SPI 0x8816 diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 9ed121f08a54..6157213a8359 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -192,8 +192,6 @@ enum { #define PCH_UART_HAL_LOOP (PCH_UART_MCR_LOOP) #define PCH_UART_HAL_AFE (PCH_UART_MCR_AFE) -#define PCI_VENDOR_ID_ROHM 0x10DB - #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) #define DEFAULT_UARTCLK 1843200 /* 1.8432 MHz */ diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index 55c8c8abeacd..cded51f36fc1 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -368,7 +368,6 @@ struct pch_udc_dev { #define PCI_DEVICE_ID_INTEL_QUARK_X1000_UDC 0x0939 #define PCI_DEVICE_ID_INTEL_EG20T_UDC 0x8808 -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_ML7213_IOH_UDC 0x801D #define PCI_DEVICE_ID_ML7831_IOH_UDC 0x8808 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 26420e619dd7..70e86148cb1e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1140,6 +1140,8 @@ #define PCI_VENDOR_ID_TCONRAD 0x10da #define PCI_DEVICE_ID_TCONRAD_TOKENRING 0x0508 +#define PCI_VENDOR_ID_ROHM 0x10db + #define PCI_VENDOR_ID_NVIDIA 0x10de #define PCI_DEVICE_ID_NVIDIA_TNT 0x0020 #define PCI_DEVICE_ID_NVIDIA_TNT2 0x0028 -- cgit From f38ab20b749da84e3df1f8c9240ddc791b0d5983 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 20 Dec 2018 14:59:33 +0800 Subject: iio: st_accel: use ACPI orientation data Platform-specific ST accelerometer mount matrix information can be provided by returning a package of 6 integers from the ACPI _ONT method. This has been seen on Acer products such as Veriton Z4860G, Z6860G and A890, which include a ST SMO8840 sensor. We have also confirmed experimentally that the Windows driver uses such information. The _ONT data format was explained by a ST vendor contact. However, strangely enough, the _ONT transformations must be applied after first applying another mount matrix which we determined experimentally. ST have not commented on why this is the case, but we imagine that perhaps earlier devices (before _ONT was introduced) required this translation and hence it became 'standard.' Interpret the _ONT data and export the equivalent mount matrix to userspace. If no _ONT data is present, no mount matrix is exported. Signed-off-by: Daniel Drake Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 171 +++++++++++++++++++++++++++++++++- include/linux/iio/common/st_sensors.h | 1 + 2 files changed, 171 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index f7b471121508..a3c0916479fa 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -918,12 +919,167 @@ static const struct iio_trigger_ops st_accel_trigger_ops = { #define ST_ACCEL_TRIGGER_OPS NULL #endif +static const struct iio_mount_matrix * +get_mount_matrix(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct st_sensor_data *adata = iio_priv(indio_dev); + + return adata->mount_matrix; +} + +static const struct iio_chan_spec_ext_info mount_matrix_ext_info[] = { + IIO_MOUNT_MATRIX(IIO_SHARED_BY_ALL, get_mount_matrix), + { }, +}; + +/* Read ST-specific _ONT orientation data from ACPI and generate an + * appropriate mount matrix. + */ +static int apply_acpi_orientation(struct iio_dev *indio_dev, + struct iio_chan_spec *channels) +{ +#ifdef CONFIG_ACPI + struct st_sensor_data *adata = iio_priv(indio_dev); + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + struct acpi_device *adev; + union acpi_object *ont; + union acpi_object *elements; + acpi_status status; + int ret = -EINVAL; + unsigned int val; + int i, j; + int final_ont[3][3] = { { 0 }, }; + + /* For some reason, ST's _ONT translation does not apply directly + * to the data read from the sensor. Another translation must be + * performed first, as described by the matrix below. Perhaps + * ST required this specific translation for the first product + * where the device was mounted? + */ + const int default_ont[3][3] = { + { 0, 1, 0 }, + { -1, 0, 0 }, + { 0, 0, -1 }, + }; + + + adev = ACPI_COMPANION(adata->dev); + if (!adev) + return 0; + + /* Read _ONT data, which should be a package of 6 integers. */ + status = acpi_evaluate_object(adev->handle, "_ONT", NULL, &buffer); + if (status == AE_NOT_FOUND) { + return 0; + } else if (ACPI_FAILURE(status)) { + dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n", + status); + return status; + } + + ont = buffer.pointer; + if (ont->type != ACPI_TYPE_PACKAGE || ont->package.count != 6) + goto out; + + /* The first 3 integers provide axis order information. + * e.g. 0 1 2 would indicate normal X,Y,Z ordering. + * e.g. 1 0 2 indicates that data arrives in order Y,X,Z. + */ + elements = ont->package.elements; + for (i = 0; i < 3; i++) { + if (elements[i].type != ACPI_TYPE_INTEGER) + goto out; + + val = elements[i].integer.value; + if (val < 0 || val > 2) + goto out; + + /* Avoiding full matrix multiplication, we simply reorder the + * columns in the default_ont matrix according to the + * ordering provided by _ONT. + */ + final_ont[0][i] = default_ont[0][val]; + final_ont[1][i] = default_ont[1][val]; + final_ont[2][i] = default_ont[2][val]; + } + + /* The final 3 integers provide sign flip information. + * 0 means no change, 1 means flip. + * e.g. 0 0 1 means that Z data should be sign-flipped. + * This is applied after the axis reordering from above. + */ + elements += 3; + for (i = 0; i < 3; i++) { + if (elements[i].type != ACPI_TYPE_INTEGER) + goto out; + + val = elements[i].integer.value; + if (val != 0 && val != 1) + goto out; + if (!val) + continue; + + /* Flip the values in the indicated column */ + final_ont[0][i] *= -1; + final_ont[1][i] *= -1; + final_ont[2][i] *= -1; + } + + /* Convert our integer matrix to a string-based iio_mount_matrix */ + adata->mount_matrix = devm_kmalloc(&indio_dev->dev, + sizeof(*adata->mount_matrix), + GFP_KERNEL); + if (!adata->mount_matrix) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + int matrix_val = final_ont[i][j]; + char *str_value; + + switch (matrix_val) { + case -1: + str_value = "-1"; + break; + case 0: + str_value = "0"; + break; + case 1: + str_value = "1"; + break; + default: + goto out; + } + adata->mount_matrix->rotation[i * 3 + j] = str_value; + } + } + + /* Expose the mount matrix via ext_info */ + for (i = 0; i < indio_dev->num_channels; i++) + channels[i].ext_info = mount_matrix_ext_info; + + ret = 0; + dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n"); + +out: + kfree(buffer.pointer); + return ret; +#else /* !CONFIG_ACPI */ + return 0; +#endif +} + int st_accel_common_probe(struct iio_dev *indio_dev) { struct st_sensor_data *adata = iio_priv(indio_dev); struct st_sensors_platform_data *pdata = (struct st_sensors_platform_data *)adata->dev->platform_data; int irq = adata->get_irq_data_ready(indio_dev); + struct iio_chan_spec *channels; + size_t channels_size; int err; indio_dev->modes = INDIO_DIRECT_MODE; @@ -942,9 +1098,22 @@ int st_accel_common_probe(struct iio_dev *indio_dev) adata->num_data_channels = ST_ACCEL_NUMBER_DATA_CHANNELS; adata->multiread_bit = adata->sensor_settings->multi_read_bit; - indio_dev->channels = adata->sensor_settings->ch; indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS; + channels_size = indio_dev->num_channels * sizeof(struct iio_chan_spec); + channels = devm_kmemdup(&indio_dev->dev, + adata->sensor_settings->ch, + channels_size, GFP_KERNEL); + if (!channels) { + err = -ENOMEM; + goto st_accel_power_off; + } + + if (apply_acpi_orientation(indio_dev, channels)) + dev_warn(&indio_dev->dev, + "failed to apply ACPI orientation data: %d\n", err); + + indio_dev->channels = channels; adata->current_fullscale = (struct st_sensor_fullscale_avl *) &adata->sensor_settings->fs.fs_avl[0]; adata->odr = adata->sensor_settings->odr.odr_avl[0].hz; diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 8092b8e7f37e..45e9667f0a8c 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -260,6 +260,7 @@ struct st_sensor_settings { struct st_sensor_data { struct device *dev; struct iio_trigger *trig; + struct iio_mount_matrix *mount_matrix; struct st_sensor_settings *sensor_settings; struct st_sensor_fullscale_avl *current_fullscale; struct regulator *vdd; -- cgit From d5d30d5a5c60628de5e77e3f292a8f9012d51350 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 2 Feb 2019 16:35:26 -0800 Subject: libnvdimm/dimm: Add a no-BLK quirk based on NVDIMM family As Dexuan reports the NVDIMM_FAMILY_HYPERV platform is incompatible with the existing Linux namespace implementation because it uses NSLABEL_FLAG_LOCAL for x1-width PMEM interleave sets. Quirk it as an platform / DIMM that does not provide BLK-aperture access. Allow the libnvdimm core to assume no potential for aliasing. In case other implementations make the same mistake, provide a "noblk" module parameter to force-enable the quirk. Link: https://lkml.kernel.org/r/PU1P153MB0169977604493B82B662A01CBF920@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM Reported-by: Dexuan Cui Tested-by: Dexuan Cui Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 4 ++++ drivers/nvdimm/dimm_devs.c | 7 +++++++ drivers/nvdimm/label.c | 3 +++ drivers/nvdimm/namespace_devs.c | 6 ++++++ drivers/nvdimm/region_devs.c | 7 +++++++ include/linux/libnvdimm.h | 2 ++ 6 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 4a7e8b1fa43b..811c399a3a76 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2016,6 +2016,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; } + /* Quirk to ignore LOCAL for labels on HYPERV DIMMs */ + if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) + set_bit(NDD_NOBLK, &flags); + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) { set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 4890310df874..553aa78abeee 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -11,6 +11,7 @@ * General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -25,6 +26,10 @@ static DEFINE_IDA(dimm_ida); +static bool noblk; +module_param(noblk, bool, 0444); +MODULE_PARM_DESC(noblk, "force disable BLK / local alias support"); + /* * Retrieve bus and dimm handle and return if this bus supports * get_config_data commands @@ -551,6 +556,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, nvdimm->dimm_id = dimm_id; nvdimm->provider_data = provider_data; + if (noblk) + flags |= 1 << NDD_NOBLK; nvdimm->flags = flags; nvdimm->cmd_mask = cmd_mask; nvdimm->num_flush = num_flush; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 6d6e9a12150b..f3d753d3169c 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -392,6 +392,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) return 0; /* no label, nothing to reserve */ for_each_clear_bit_le(slot, free, nslot) { + struct nvdimm *nvdimm = to_nvdimm(ndd->dev); struct nd_namespace_label *nd_label; struct nd_region *nd_region = NULL; u8 label_uuid[NSLABEL_UUID_LEN]; @@ -406,6 +407,8 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); flags = __le32_to_cpu(nd_label->flags); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) + flags &= ~NSLABEL_FLAG_LOCAL; nd_label_gen_id(&label_id, label_uuid, flags); res = nvdimm_allocate_dpa(ndd, &label_id, __le64_to_cpu(nd_label->dpa), diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4b077555ac70..3677b0c4a33d 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2492,6 +2492,12 @@ static int init_active_labels(struct nd_region *nd_region) if (!label_ent) break; label = nd_label_active(ndd, j); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) { + u32 flags = __le32_to_cpu(label->flags); + + flags &= ~NSLABEL_FLAG_LOCAL; + label->flags = __cpu_to_le32(flags); + } label_ent->label = label; mutex_lock(&nd_mapping->lock); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e2818f94f292..3b58baa44b5c 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1003,6 +1003,13 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, if (test_bit(NDD_UNARMED, &nvdimm->flags)) ro = 1; + + if (test_bit(NDD_NOBLK, &nvdimm->flags) + && dev_type == &nd_blk_device_type) { + dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n", + caller, dev_name(&nvdimm->dev), i); + return NULL; + } } if (dev_type == &nd_blk_device_type) { diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 5440f11b0907..7da406ae3a2b 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -42,6 +42,8 @@ enum { NDD_SECURITY_OVERWRITE = 3, /* tracking whether or not there is a pending device reference */ NDD_WORK_PENDING = 4, + /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */ + NDD_NOBLK = 5, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, -- cgit From 79a4e91d1bb2a411a4ce2baa93680fa707567003 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 2 Feb 2019 19:50:17 -0800 Subject: device.h: Add __cold to dev_ logging functions Add __cold to the dev_ logging functions similar to the use of __cold in the generic printk function. Using __cold moves all the dev_ logging functions out-of-line possibly improving code locality and runtime performance. Signed-off-by: Joe Perches Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0ab0a3a80ec3..a36830e2d0e5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1384,28 +1384,28 @@ void device_link_remove(void *consumer, struct device *supplier); #ifdef CONFIG_PRINTK -__printf(3, 0) +__printf(3, 0) __cold int dev_vprintk_emit(int level, const struct device *dev, const char *fmt, va_list args); -__printf(3, 4) +__printf(3, 4) __cold int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...); -__printf(3, 4) +__printf(3, 4) __cold void dev_printk(const char *level, const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_emerg(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_alert(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_crit(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_err(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_warn(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_notice(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_info(const struct device *dev, const char *fmt, ...); #else -- cgit From 5f3d544f1671d214cd26e45bda326f921455256e Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 1 Feb 2019 22:45:17 -0500 Subject: audit: remove audit_context when CONFIG_ AUDIT and not AUDITSYSCALL Remove audit_context from struct task_struct and struct audit_buffer when CONFIG_AUDIT is enabled but CONFIG_AUDITSYSCALL is not. Also, audit_log_name() (and supporting inode and fcaps functions) should have been put back in auditsc.c when soft and hard link logging was normalized since it is only used by syscall auditing. See github issue https://github.com/linux-audit/audit-kernel/issues/105 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/sched.h | 4 +- kernel/audit.c | 157 ------------------------------------------------- kernel/audit.h | 9 --- kernel/auditsc.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 167 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f9788bb122c5..765119df759a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -885,8 +885,10 @@ struct task_struct { struct callback_head *task_works; - struct audit_context *audit_context; #ifdef CONFIG_AUDIT +#ifdef CONFIG_AUDITSYSCALL + struct audit_context *audit_context; +#endif kuid_t loginuid; unsigned int sessionid; #endif diff --git a/kernel/audit.c b/kernel/audit.c index b7177a8def2e..c89ea48c70a6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2067,163 +2067,6 @@ void audit_log_key(struct audit_buffer *ab, char *key) audit_log_format(ab, "(null)"); } -void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) -{ - int i; - - if (cap_isclear(*cap)) { - audit_log_format(ab, " %s=0", prefix); - return; - } - audit_log_format(ab, " %s=", prefix); - CAP_FOR_EACH_U32(i) - audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]); -} - -static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) -{ - if (name->fcap_ver == -1) { - audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?"); - return; - } - audit_log_cap(ab, "cap_fp", &name->fcap.permitted); - audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); - audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", - name->fcap.fE, name->fcap_ver, - from_kuid(&init_user_ns, name->fcap.rootid)); -} - -static inline int audit_copy_fcaps(struct audit_names *name, - const struct dentry *dentry) -{ - struct cpu_vfs_cap_data caps; - int rc; - - if (!dentry) - return 0; - - rc = get_vfs_caps_from_disk(dentry, &caps); - if (rc) - return rc; - - name->fcap.permitted = caps.permitted; - name->fcap.inheritable = caps.inheritable; - name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); - name->fcap.rootid = caps.rootid; - name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> - VFS_CAP_REVISION_SHIFT; - - return 0; -} - -/* Copy inode data into an audit_names. */ -void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, - struct inode *inode, unsigned int flags) -{ - name->ino = inode->i_ino; - name->dev = inode->i_sb->s_dev; - name->mode = inode->i_mode; - name->uid = inode->i_uid; - name->gid = inode->i_gid; - name->rdev = inode->i_rdev; - security_inode_getsecid(inode, &name->osid); - if (flags & AUDIT_INODE_NOEVAL) { - name->fcap_ver = -1; - return; - } - audit_copy_fcaps(name, dentry); -} - -/** - * audit_log_name - produce AUDIT_PATH record from struct audit_names - * @context: audit_context for the task - * @n: audit_names structure with reportable details - * @path: optional path to report instead of audit_names->name - * @record_num: record number to report when handling a list of names - * @call_panic: optional pointer to int that will be updated if secid fails - */ -void audit_log_name(struct audit_context *context, struct audit_names *n, - const struct path *path, int record_num, int *call_panic) -{ - struct audit_buffer *ab; - ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); - if (!ab) - return; - - audit_log_format(ab, "item=%d", record_num); - - if (path) - audit_log_d_path(ab, " name=", path); - else if (n->name) { - switch (n->name_len) { - case AUDIT_NAME_FULL: - /* log the full path */ - audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, n->name->name); - break; - case 0: - /* name was specified as a relative path and the - * directory component is the cwd */ - audit_log_d_path(ab, " name=", &context->pwd); - break; - default: - /* log the name's directory component */ - audit_log_format(ab, " name="); - audit_log_n_untrustedstring(ab, n->name->name, - n->name_len); - } - } else - audit_log_format(ab, " name=(null)"); - - if (n->ino != AUDIT_INO_UNSET) - audit_log_format(ab, " inode=%lu" - " dev=%02x:%02x mode=%#ho" - " ouid=%u ogid=%u rdev=%02x:%02x", - n->ino, - MAJOR(n->dev), - MINOR(n->dev), - n->mode, - from_kuid(&init_user_ns, n->uid), - from_kgid(&init_user_ns, n->gid), - MAJOR(n->rdev), - MINOR(n->rdev)); - if (n->osid != 0) { - char *ctx = NULL; - u32 len; - if (security_secid_to_secctx( - n->osid, &ctx, &len)) { - audit_log_format(ab, " osid=%u", n->osid); - if (call_panic) - *call_panic = 2; - } else { - audit_log_format(ab, " obj=%s", ctx); - security_release_secctx(ctx, len); - } - } - - /* log the audit_names record type */ - switch(n->type) { - case AUDIT_TYPE_NORMAL: - audit_log_format(ab, " nametype=NORMAL"); - break; - case AUDIT_TYPE_PARENT: - audit_log_format(ab, " nametype=PARENT"); - break; - case AUDIT_TYPE_CHILD_DELETE: - audit_log_format(ab, " nametype=DELETE"); - break; - case AUDIT_TYPE_CHILD_CREATE: - audit_log_format(ab, " nametype=CREATE"); - break; - default: - audit_log_format(ab, " nametype=UNKNOWN"); - break; - } - - audit_log_fcaps(ab, n); - audit_log_end(ab); -} - int audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; diff --git a/kernel/audit.h b/kernel/audit.h index 002f0f7ba732..82734f438ddd 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -213,15 +213,6 @@ extern bool audit_ever_enabled; extern void audit_log_session_info(struct audit_buffer *ab); -extern void audit_copy_inode(struct audit_names *name, - const struct dentry *dentry, - struct inode *inode, unsigned int flags); -extern void audit_log_cap(struct audit_buffer *ab, char *prefix, - kernel_cap_t *cap); -extern void audit_log_name(struct audit_context *context, - struct audit_names *n, const struct path *path, - int record_num, int *call_panic); - extern int auditd_test_task(struct task_struct *task); #define AUDIT_INODE_BUCKETS 32 diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7d37cb1e4aef..d1eab1d4a930 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1139,6 +1139,32 @@ out: kfree(buf_head); } +void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) +{ + int i; + + if (cap_isclear(*cap)) { + audit_log_format(ab, " %s=0", prefix); + return; + } + audit_log_format(ab, " %s=", prefix); + CAP_FOR_EACH_U32(i) + audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]); +} + +static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) +{ + if (name->fcap_ver == -1) { + audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?"); + return; + } + audit_log_cap(ab, "cap_fp", &name->fcap.permitted); + audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); + audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", + name->fcap.fE, name->fcap_ver, + from_kuid(&init_user_ns, name->fcap.rootid)); +} + static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; @@ -1261,6 +1287,97 @@ static inline int audit_proctitle_rtrim(char *proctitle, int len) return len; } +/* + * audit_log_name - produce AUDIT_PATH record from struct audit_names + * @context: audit_context for the task + * @n: audit_names structure with reportable details + * @path: optional path to report instead of audit_names->name + * @record_num: record number to report when handling a list of names + * @call_panic: optional pointer to int that will be updated if secid fails + */ +static void audit_log_name(struct audit_context *context, struct audit_names *n, + const struct path *path, int record_num, int *call_panic) +{ + struct audit_buffer *ab; + + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); + if (!ab) + return; + + audit_log_format(ab, "item=%d", record_num); + + if (path) + audit_log_d_path(ab, " name=", path); + else if (n->name) { + switch (n->name_len) { + case AUDIT_NAME_FULL: + /* log the full path */ + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, n->name->name); + break; + case 0: + /* name was specified as a relative path and the + * directory component is the cwd + */ + audit_log_d_path(ab, " name=", &context->pwd); + break; + default: + /* log the name's directory component */ + audit_log_format(ab, " name="); + audit_log_n_untrustedstring(ab, n->name->name, + n->name_len); + } + } else + audit_log_format(ab, " name=(null)"); + + if (n->ino != AUDIT_INO_UNSET) + audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x", + n->ino, + MAJOR(n->dev), + MINOR(n->dev), + n->mode, + from_kuid(&init_user_ns, n->uid), + from_kgid(&init_user_ns, n->gid), + MAJOR(n->rdev), + MINOR(n->rdev)); + if (n->osid != 0) { + char *ctx = NULL; + u32 len; + + if (security_secid_to_secctx( + n->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", n->osid); + if (call_panic) + *call_panic = 2; + } else { + audit_log_format(ab, " obj=%s", ctx); + security_release_secctx(ctx, len); + } + } + + /* log the audit_names record type */ + switch (n->type) { + case AUDIT_TYPE_NORMAL: + audit_log_format(ab, " nametype=NORMAL"); + break; + case AUDIT_TYPE_PARENT: + audit_log_format(ab, " nametype=PARENT"); + break; + case AUDIT_TYPE_CHILD_DELETE: + audit_log_format(ab, " nametype=DELETE"); + break; + case AUDIT_TYPE_CHILD_CREATE: + audit_log_format(ab, " nametype=CREATE"); + break; + default: + audit_log_format(ab, " nametype=UNKNOWN"); + break; + } + + audit_log_fcaps(ab, n); + audit_log_end(ab); +} + static void audit_log_proctitle(void) { int res; @@ -1756,6 +1873,47 @@ void __audit_getname(struct filename *name) get_fs_pwd(current->fs, &context->pwd); } +static inline int audit_copy_fcaps(struct audit_names *name, + const struct dentry *dentry) +{ + struct cpu_vfs_cap_data caps; + int rc; + + if (!dentry) + return 0; + + rc = get_vfs_caps_from_disk(dentry, &caps); + if (rc) + return rc; + + name->fcap.permitted = caps.permitted; + name->fcap.inheritable = caps.inheritable; + name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + name->fcap.rootid = caps.rootid; + name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> + VFS_CAP_REVISION_SHIFT; + + return 0; +} + +/* Copy inode data into an audit_names. */ +void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, + struct inode *inode, unsigned int flags) +{ + name->ino = inode->i_ino; + name->dev = inode->i_sb->s_dev; + name->mode = inode->i_mode; + name->uid = inode->i_uid; + name->gid = inode->i_gid; + name->rdev = inode->i_rdev; + security_inode_getsecid(inode, &name->osid); + if (flags & AUDIT_INODE_NOEVAL) { + name->fcap_ver = -1; + return; + } + audit_copy_fcaps(name, dentry); +} + /** * __audit_inode - store the inode and device from a lookup * @name: name being audited -- cgit From 494c704f9af0a0cddf593b381ea44320888733e6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 2 Feb 2019 10:41:13 +0100 Subject: efi: Use 32-bit alignment for efi_guid_t The UEFI spec and EDK2 reference implementation both define EFI_GUID as struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM), this means that firmware services invoked by the kernel may assume that efi_guid_t* arguments are 32-bit aligned, and use memory accessors that do not tolerate misalignment. So let's set the minimum alignment to 32 bits. Note that the UEFI spec as well as some comments in the EDK2 code base suggest that EFI_GUID should be 64-bit aligned, but this appears to be a mistake, given that no code seems to exist that actually enforces that or relies on it. Reported-by: Heinrich Schuchardt Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm Cc: AKASHI Takahiro Cc: Alexander Graf Cc: Bjorn Andersson Cc: Borislav Petkov Cc: Jeffrey Hugo Cc: Lee Jones Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Jones Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190202094119.13230-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 45ff763fba76..be08518c2553 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,7 +48,20 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; -typedef guid_t efi_guid_t; +/* + * The UEFI spec and EDK2 reference implementation both define EFI_GUID as + * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment + * is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM), + * this means that firmware services invoked by the kernel may assume that + * efi_guid_t* arguments are 32-bit aligned, and use memory accessors that + * do not tolerate misalignment. So let's set the minimum alignment to 32 bits. + * + * Note that the UEFI spec as well as some comments in the EDK2 code base + * suggest that EFI_GUID should be 64-bit aligned, but this appears to be + * a mistake, given that no code seems to exist that actually enforces that + * or relies on it. + */ +typedef guid_t efi_guid_t __aligned(__alignof__(u32)); #define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) -- cgit From 8c94abbbe1ba24961278055434504b7dc3595415 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Mon, 28 Jan 2019 14:27:26 +0200 Subject: perf: Convert perf_event_context.refcount to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable perf_event_context.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst for more information. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the perf_event_context.refcount it might make a difference in following places: - get_ctx(), perf_event_ctx_lock_nested(), perf_lock_task_context() and __perf_event_ctx_lock_double(): increment in refcount_inc_not_zero() only guarantees control dependency on success vs. fully ordered atomic counterpart - put_ctx(): decrement in refcount_dec_and_test() provides RELEASE ordering and ACQUIRE ordering + control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: namhyung@kernel.org Link: https://lkml.kernel.org/r/1548678448-24458-2-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- kernel/events/core.c | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a79e59fc3b7d..6cb5d483ab34 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -54,6 +54,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -737,7 +738,7 @@ struct perf_event_context { int nr_stat; int nr_freq; int rotate_disable; - atomic_t refcount; + refcount_t refcount; struct task_struct *task; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 5b89de7918d0..677164d54547 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1172,7 +1172,7 @@ static void perf_event_ctx_deactivate(struct perf_event_context *ctx) static void get_ctx(struct perf_event_context *ctx) { - WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); + refcount_inc(&ctx->refcount); } static void free_ctx(struct rcu_head *head) @@ -1186,7 +1186,7 @@ static void free_ctx(struct rcu_head *head) static void put_ctx(struct perf_event_context *ctx) { - if (atomic_dec_and_test(&ctx->refcount)) { + if (refcount_dec_and_test(&ctx->refcount)) { if (ctx->parent_ctx) put_ctx(ctx->parent_ctx); if (ctx->task && ctx->task != TASK_TOMBSTONE) @@ -1268,7 +1268,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting) again: rcu_read_lock(); ctx = READ_ONCE(event->ctx); - if (!atomic_inc_not_zero(&ctx->refcount)) { + if (!refcount_inc_not_zero(&ctx->refcount)) { rcu_read_unlock(); goto again; } @@ -1401,7 +1401,7 @@ retry: } if (ctx->task == TASK_TOMBSTONE || - !atomic_inc_not_zero(&ctx->refcount)) { + !refcount_inc_not_zero(&ctx->refcount)) { raw_spin_unlock(&ctx->lock); ctx = NULL; } else { @@ -4057,7 +4057,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) INIT_LIST_HEAD(&ctx->event_list); INIT_LIST_HEAD(&ctx->pinned_active); INIT_LIST_HEAD(&ctx->flexible_active); - atomic_set(&ctx->refcount, 1); + refcount_set(&ctx->refcount, 1); } static struct perf_event_context * @@ -10613,7 +10613,7 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader, again: rcu_read_lock(); gctx = READ_ONCE(group_leader->ctx); - if (!atomic_inc_not_zero(&gctx->refcount)) { + if (!refcount_inc_not_zero(&gctx->refcount)) { rcu_read_unlock(); goto again; } -- cgit From d036bda7d0e7269c2982eb979acfef855f5d7977 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:26 +0200 Subject: sched/core: Convert sighand_struct.count to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable sighand_struct.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the sighand_struct.count it might make a difference in following places: - __cleanup_sighand: decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-2-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- fs/exec.c | 4 ++-- fs/proc/task_nommu.c | 2 +- include/linux/sched/signal.h | 3 ++- kernel/fork.c | 8 ++++---- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index fb72d36f7823..966cd98a2ce2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1189,7 +1189,7 @@ no_thread_group: flush_itimer_signals(); #endif - if (atomic_read(&oldsighand->count) != 1) { + if (refcount_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; /* * This ->sighand is shared with the CLONE_SIGHAND @@ -1199,7 +1199,7 @@ no_thread_group: if (!newsighand) return -ENOMEM; - atomic_set(&newsighand->count, 1); + refcount_set(&newsighand->count, 1); memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 0b63d68dedb2..f912872fbf91 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -64,7 +64,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) else bytes += kobjsize(current->files); - if (current->sighand && atomic_read(¤t->sighand->count) > 1) + if (current->sighand && refcount_read(¤t->sighand->count) > 1) sbytes += kobjsize(current->sighand); else bytes += kobjsize(current->sighand); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 13789d10a50e..37eeb1a28eba 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -8,13 +8,14 @@ #include #include #include +#include /* * Types defining task->signal and task->sighand and APIs using them: */ struct sighand_struct { - atomic_t count; + refcount_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; wait_queue_head_t signalfd_wqh; diff --git a/kernel/fork.c b/kernel/fork.c index b69248e6f0e0..370856d4c0b3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1463,7 +1463,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) struct sighand_struct *sig; if (clone_flags & CLONE_SIGHAND) { - atomic_inc(¤t->sighand->count); + refcount_inc(¤t->sighand->count); return 0; } sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); @@ -1471,7 +1471,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) if (!sig) return -ENOMEM; - atomic_set(&sig->count, 1); + refcount_set(&sig->count, 1); spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); spin_unlock_irq(¤t->sighand->siglock); @@ -1480,7 +1480,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_sighand(struct sighand_struct *sighand) { - if (atomic_dec_and_test(&sighand->count)) { + if (refcount_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); /* * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it @@ -2439,7 +2439,7 @@ static int check_unshare_flags(unsigned long unshare_flags) return -EINVAL; } if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { - if (atomic_read(¤t->sighand->count) > 1) + if (refcount_read(¤t->sighand->count) > 1) return -EINVAL; } if (unshare_flags & CLONE_VM) { -- cgit From 60d4de3ff7f775509deba94b3db3c1abe55bf7a5 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:27 +0200 Subject: sched/core: Convert signal_struct.sigcnt to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable signal_struct.sigcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the signal_struct.sigcnt it might make a difference in following places: - put_signal_struct(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-3-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched/signal.h | 2 +- init/init_task.c | 2 +- kernel/fork.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 37eeb1a28eba..ae5655197698 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -83,7 +83,7 @@ struct multiprocess_signals { * the locking of signal_struct. */ struct signal_struct { - atomic_t sigcnt; + refcount_t sigcnt; atomic_t live; int nr_threads; struct list_head thread_head; diff --git a/init/init_task.c b/init/init_task.c index 5aebe3be4d7c..9aa3ebc74970 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -44,7 +44,7 @@ static struct signal_struct init_signals = { }; static struct sighand_struct init_sighand = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .action = { { { .sa_handler = SIG_DFL, } }, }, .siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock), .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), diff --git a/kernel/fork.c b/kernel/fork.c index 370856d4c0b3..935a42d5f8ff 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -710,7 +710,7 @@ static inline void free_signal_struct(struct signal_struct *sig) static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) + if (refcount_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); } @@ -1527,7 +1527,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->nr_threads = 1; atomic_set(&sig->live, 1); - atomic_set(&sig->sigcnt, 1); + refcount_set(&sig->sigcnt, 1); /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); @@ -2082,7 +2082,7 @@ static __latent_entropy struct task_struct *copy_process( } else { current->signal->nr_threads++; atomic_inc(¤t->signal->live); - atomic_inc(¤t->signal->sigcnt); + refcount_inc(¤t->signal->sigcnt); task_join_group_stop(p); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); -- cgit From ec1d281923cf81cc660343d0cb8ffc837ffb991d Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:29 +0200 Subject: sched/core: Convert task_struct.usage to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable task_struct.usage is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the task_struct.usage it might make a difference in following places: - put_task_struct(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-5-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 ++- include/linux/sched/task.h | 4 ++-- init/init_task.c | 2 +- kernel/fork.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e2bba022827d..9d14d6864ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -607,7 +608,7 @@ struct task_struct { randomized_struct_fields_start void *stack; - atomic_t usage; + refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; unsigned int ptrace; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 44c6f15800ff..2e97a2227045 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -88,13 +88,13 @@ extern void sched_exec(void); #define sched_exec() {} #endif -#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) +#define get_task_struct(tsk) do { refcount_inc(&(tsk)->usage); } while(0) extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) { - if (atomic_dec_and_test(&t->usage)) + if (refcount_dec_and_test(&t->usage)) __put_task_struct(t); } diff --git a/init/init_task.c b/init/init_task.c index 9aa3ebc74970..aca34c89529f 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -65,7 +65,7 @@ struct task_struct init_task #endif .state = 0, .stack = init_stack, - .usage = ATOMIC_INIT(2), + .usage = REFCOUNT_INIT(2), .flags = PF_KTHREAD, .prio = MAX_PRIO - 20, .static_prio = MAX_PRIO - 20, diff --git a/kernel/fork.c b/kernel/fork.c index 935a42d5f8ff..3f7e192e29f2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -717,7 +717,7 @@ static inline void put_signal_struct(struct signal_struct *sig) void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); - WARN_ON(atomic_read(&tsk->usage)); + WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); cgroup_free(tsk); @@ -896,7 +896,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) * One for us, one for whoever does the "release_task()" (usually * parent) */ - atomic_set(&tsk->usage, 2); + refcount_set(&tsk->usage, 2); #ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; #endif -- cgit From f0b89d3958d73cd0785ec381f0ddf8efb6f183d8 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:30 +0200 Subject: sched/core: Convert task_struct.stack_refcount to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable task_struct.stack_refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the task_struct.stack_refcount it might make a difference in following places: - try_get_task_stack(): increment in refcount_inc_not_zero() only guarantees control dependency on success vs. fully ordered atomic counterpart - put_task_stack(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-6-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 1 + include/linux/sched.h | 2 +- include/linux/sched/task_stack.h | 2 +- init/init_task.c | 2 +- kernel/fork.c | 6 +++--- 5 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index a7083a45a26c..6049baa5b8bc 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/sched.h b/include/linux/sched.h index 9d14d6864ca6..628bf13cb5a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1194,7 +1194,7 @@ struct task_struct { #endif #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ - atomic_t stack_refcount; + refcount_t stack_refcount; #endif #ifdef CONFIG_LIVEPATCH int patch_state; diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 6a841929073f..2413427e439c 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -61,7 +61,7 @@ static inline unsigned long *end_of_stack(struct task_struct *p) #ifdef CONFIG_THREAD_INFO_IN_TASK static inline void *try_get_task_stack(struct task_struct *tsk) { - return atomic_inc_not_zero(&tsk->stack_refcount) ? + return refcount_inc_not_zero(&tsk->stack_refcount) ? task_stack_page(tsk) : NULL; } diff --git a/init/init_task.c b/init/init_task.c index aca34c89529f..46dbf546264d 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -61,7 +61,7 @@ struct task_struct init_task = { #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), - .stack_refcount = ATOMIC_INIT(1), + .stack_refcount = REFCOUNT_INIT(1), #endif .state = 0, .stack = init_stack, diff --git a/kernel/fork.c b/kernel/fork.c index 3f7e192e29f2..77059b211608 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -429,7 +429,7 @@ static void release_task_stack(struct task_struct *tsk) #ifdef CONFIG_THREAD_INFO_IN_TASK void put_task_stack(struct task_struct *tsk) { - if (atomic_dec_and_test(&tsk->stack_refcount)) + if (refcount_dec_and_test(&tsk->stack_refcount)) release_task_stack(tsk); } #endif @@ -447,7 +447,7 @@ void free_task(struct task_struct *tsk) * If the task had a separate stack allocation, it should be gone * by now. */ - WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0); + WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0); #endif rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); @@ -867,7 +867,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->stack_vm_area = stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK - atomic_set(&tsk->stack_refcount, 1); + refcount_set(&tsk->stack_refcount, 1); #endif if (err) -- cgit From 07879c6a3740fbbf3c8891a0ab484c20a12794d8 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 18 Dec 2018 11:53:52 -0800 Subject: sched/wake_q: Reduce reference counting for special users Some users, specifically futexes and rwsems, required fixes that allowed the callers to be safe when wakeups occur before they are expected by wake_up_q(). Such scenarios also play games and rely on reference counting, and until now were pivoting on wake_q doing it. With the wake_q_add() call being moved down, this can no longer be the case. As such we end up with a a double task refcounting overhead; and these callers care enough about this (being rather core-ish). This patch introduces a wake_q_add_safe() call that serves for callers that have already done refcounting and therefore the task is 'safe' from wake_q point of view (int that it requires reference throughout the entire queue/>wakeup cycle). In the one case it has internal reference counting, in the other case it consumes the reference counting. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: Xie Yongji Cc: Yongji Xie Cc: andrea.parri@amarulasolutions.com Cc: lilin24@baidu.com Cc: liuqi16@baidu.com Cc: nixun@baidu.com Cc: yuanlinsi01@baidu.com Cc: zhangyu31@baidu.com Link: https://lkml.kernel.org/r/20181218195352.7orq3upiwfdbrdne@linux-r8p5 Signed-off-by: Ingo Molnar --- include/linux/sched/wake_q.h | 4 +-- kernel/futex.c | 3 +-- kernel/locking/rwsem-xadd.c | 4 +-- kernel/sched/core.c | 60 ++++++++++++++++++++++++++++++++------------ 4 files changed, 48 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 545f37138057..ad826d2a4557 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -51,8 +51,8 @@ static inline void wake_q_init(struct wake_q_head *head) head->lastp = &head->first; } -extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); +extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); +extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); #endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/kernel/futex.c b/kernel/futex.c index 69e619baf709..2abe1a0b3062 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1463,8 +1463,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) * Queue the task for later wakeup for after we've released * the hb->lock. wake_q_add() grabs reference to p. */ - wake_q_add(wake_q, p); - put_task_struct(p); + wake_q_add_safe(wake_q, p); } /* diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 50d9af615dc4..fbe96341beee 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -211,9 +211,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * Ensure issuing the wakeup (either by us or someone else) * after setting the reader waiter to nil. */ - wake_q_add(wake_q, tsk); - /* wake_q_add() already take the task ref */ - put_task_struct(tsk); + wake_q_add_safe(wake_q, tsk); } adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3c8b4dba3d2d..64ceaa5158c5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -396,19 +396,7 @@ static bool set_nr_if_polling(struct task_struct *p) #endif #endif -/** - * wake_q_add() - queue a wakeup for 'later' waking. - * @head: the wake_q_head to add @task to - * @task: the task to queue for 'later' wakeup - * - * Queue a task for later wakeup, most likely by the wake_up_q() call in the - * same context, _HOWEVER_ this is not guaranteed, the wakeup can come - * instantly. - * - * This function must be used as-if it were wake_up_process(); IOW the task - * must be ready to be woken at this location. - */ -void wake_q_add(struct wake_q_head *head, struct task_struct *task) +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) { struct wake_q_node *node = &task->wake_q; @@ -422,15 +410,55 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) */ smp_mb__before_atomic(); if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) - return; - - get_task_struct(task); + return false; /* * The head is context local, there can be no concurrency. */ *head->lastp = node; head->lastp = &node->next; + return true; +} + +/** + * wake_q_add() - queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + */ +void wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ + if (__wake_q_add(head, task)) + get_task_struct(task); +} + +/** + * wake_q_add_safe() - safely queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + * + * This function is essentially a task-safe equivalent to wake_q_add(). Callers + * that already hold reference to @task can call the 'safe' version and trust + * wake_q to do the right thing depending whether or not the @task is already + * queued for wakeup. + */ +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) +{ + if (!__wake_q_add(head, task)) + put_task_struct(task); } void wake_up_q(struct wake_q_head *head) -- cgit From 23127296889fe84b0762b191b5d041e8ba6f2599 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 23 Jan 2019 16:26:53 +0100 Subject: sched/fair: Update scale invariance of PELT The current implementation of load tracking invariance scales the contribution with current frequency and uarch performance (only for utilization) of the CPU. One main result of this formula is that the figures are capped by current capacity of CPU. Another one is that the load_avg is not invariant because not scaled with uarch. The util_avg of a periodic task that runs r time slots every p time slots varies in the range : U * (1-y^r)/(1-y^p) * y^i < Utilization < U * (1-y^r)/(1-y^p) with U is the max util_avg value = SCHED_CAPACITY_SCALE At a lower capacity, the range becomes: U * C * (1-y^r')/(1-y^p) * y^i' < Utilization < U * C * (1-y^r')/(1-y^p) with C reflecting the compute capacity ratio between current capacity and max capacity. so C tries to compensate changes in (1-y^r') but it can't be accurate. Instead of scaling the contribution value of PELT algo, we should scale the running time. The PELT signal aims to track the amount of computation of tasks and/or rq so it seems more correct to scale the running time to reflect the effective amount of computation done since the last update. In order to be fully invariant, we need to apply the same amount of running time and idle time whatever the current capacity. Because running at lower capacity implies that the task will run longer, we have to ensure that the same amount of idle time will be applied when system becomes idle and no idle time has been "stolen". But reaching the maximum utilization value (SCHED_CAPACITY_SCALE) means that the task is seen as an always-running task whatever the capacity of the CPU (even at max compute capacity). In this case, we can discard this "stolen" idle times which becomes meaningless. In order to achieve this time scaling, a new clock_pelt is created per rq. The increase of this clock scales with current capacity when something is running on rq and synchronizes with clock_task when rq is idle. With this mechanism, we ensure the same running and idle time whatever the current capacity. This also enables to simplify the pelt algorithm by removing all references of uarch and frequency and applying the same contribution to utilization and loads. Furthermore, the scaling is done only once per update of clock (update_rq_clock_task()) instead of during each update of sched_entities and cfs/rt/dl_rq of the rq like the current implementation. This is interesting when cgroup are involved as shown in the results below: On a hikey (octo Arm64 platform). Performance cpufreq governor and only shallowest c-state to remove variance generated by those power features so we only track the impact of pelt algo. each test runs 16 times: ./perf bench sched pipe (higher is better) kernel tip/sched/core + patch ops/seconds ops/seconds diff cgroup root 59652(+/- 0.18%) 59876(+/- 0.24%) +0.38% level1 55608(+/- 0.27%) 55923(+/- 0.24%) +0.57% level2 52115(+/- 0.29%) 52564(+/- 0.22%) +0.86% hackbench -l 1000 (lower is better) kernel tip/sched/core + patch duration(sec) duration(sec) diff cgroup root 4.453(+/- 2.37%) 4.383(+/- 2.88%) -1.57% level1 4.859(+/- 8.50%) 4.830(+/- 7.07%) -0.60% level2 5.063(+/- 9.83%) 4.928(+/- 9.66%) -2.66% Then, the responsiveness of PELT is improved when CPU is not running at max capacity with this new algorithm. I have put below some examples of duration to reach some typical load values according to the capacity of the CPU with current implementation and with this patch. These values has been computed based on the geometric series and the half period value: Util (%) max capacity half capacity(mainline) half capacity(w/ patch) 972 (95%) 138ms not reachable 276ms 486 (47.5%) 30ms 138ms 60ms 256 (25%) 13ms 32ms 26ms On my hikey (octo Arm64 platform) with schedutil governor, the time to reach max OPP when starting from a null utilization, decreases from 223ms with current scale invariance down to 121ms with the new algorithm. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Morten.Rasmussen@arm.com Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bsegall@google.com Cc: dietmar.eggemann@arm.com Cc: patrick.bellasi@arm.com Cc: pjt@google.com Cc: pkondeti@codeaurora.org Cc: quentin.perret@arm.com Cc: rjw@rjwysocki.net Cc: srinivas.pandruvada@linux.intel.com Cc: thara.gopinath@linaro.org Link: https://lkml.kernel.org/r/1548257214-13745-3-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 23 +++------- kernel/sched/core.c | 1 + kernel/sched/deadline.c | 6 +-- kernel/sched/fair.c | 45 ++++++++++--------- kernel/sched/pelt.c | 45 ++++++++++--------- kernel/sched/pelt.h | 114 ++++++++++++++++++++++++++++++++++++++++++++++-- kernel/sched/rt.c | 6 +-- kernel/sched/sched.h | 5 ++- 8 files changed, 177 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 628bf13cb5a5..351c0fe64c85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -357,12 +357,6 @@ struct util_est { * For cfs_rq, it is the aggregated load_avg of all runnable and * blocked sched_entities. * - * load_avg may also take frequency scaling into account: - * - * load_avg = runnable% * scale_load_down(load) * freq% - * - * where freq% is the CPU frequency normalized to the highest frequency. - * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE @@ -371,17 +365,14 @@ struct util_est { * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable * and blocked sched_entities. * - * util_avg may also factor frequency scaling and CPU capacity scaling: - * - * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity% - * - * where freq% is the same as above, and capacity% is the CPU capacity - * normalized to the greatest capacity (due to uarch differences, etc). + * load_avg and util_avg don't direcly factor frequency scaling and CPU + * capacity scaling. The scaling is done through the rq_clock_pelt that + * is used for computing those signals (see update_rq_clock_pelt()) * - * N.B., the above ratios (runnable%, running%, freq%, and capacity%) - * themselves are in the range of [0, 1]. To do fixed point arithmetics, - * we therefore scale them to as large a range as necessary. This is for - * example reflected by util_avg's SCHED_CAPACITY_SCALE. + * N.B., the above ratios (runnable% and running%) themselves are in the + * range of [0, 1]. To do fixed point arithmetics, we therefore scale them + * to as large a range as necessary. This is for example reflected by + * util_avg's SCHED_CAPACITY_SCALE. * * [Overflow issue] * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a674c7db2f29..32e06704565e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -180,6 +180,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) update_irq_load_avg(rq, irq_delta + steal); #endif + update_rq_clock_pelt(rq, delta); } void update_rq_clock(struct rq *rq) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fb8b7b5d745d..6a73e41a2016 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1767,7 +1767,7 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) deadline_queue_push_tasks(rq); if (rq->curr->sched_class != &dl_sched_class) - update_dl_rq_load_avg(rq_clock_task(rq), rq, 0); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0); return p; } @@ -1776,7 +1776,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p) { update_curr_dl(rq); - update_dl_rq_load_avg(rq_clock_task(rq), rq, 1); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); } @@ -1793,7 +1793,7 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) { update_curr_dl(rq); - update_dl_rq_load_avg(rq_clock_task(rq), rq, 1); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); /* * Even when we have runtime, update_curr_dl() might have resulted in us * not being the leftmost task anymore. In that case NEED_RESCHED will diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index da13e834e990..f41f2eec6186 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -673,9 +673,8 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) return calc_delta_fair(sched_slice(cfs_rq, se), se); } -#ifdef CONFIG_SMP #include "pelt.h" -#include "sched-pelt.h" +#ifdef CONFIG_SMP static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); static unsigned long task_h_load(struct task_struct *p); @@ -763,7 +762,7 @@ void post_init_entity_util_avg(struct sched_entity *se) * such that the next switched_to_fair() has the * expected state. */ - se->avg.last_update_time = cfs_rq_clock_task(cfs_rq); + se->avg.last_update_time = cfs_rq_clock_pelt(cfs_rq); return; } } @@ -3109,7 +3108,7 @@ void set_task_rq_fair(struct sched_entity *se, p_last_update_time = prev->avg.last_update_time; n_last_update_time = next->avg.last_update_time; #endif - __update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se); + __update_load_avg_blocked_se(p_last_update_time, se); se->avg.last_update_time = n_last_update_time; } @@ -3244,11 +3243,11 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf /* * runnable_sum can't be lower than running_sum - * As running sum is scale with CPU capacity wehreas the runnable sum - * is not we rescale running_sum 1st + * Rescale running sum to be in the same range as runnable sum + * running_sum is in [0 : LOAD_AVG_MAX << SCHED_CAPACITY_SHIFT] + * runnable_sum is in [0 : LOAD_AVG_MAX] */ - running_sum = se->avg.util_sum / - arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq))); + running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT; runnable_sum = max(runnable_sum, running_sum); load_sum = (s64)se_weight(se) * runnable_sum; @@ -3351,7 +3350,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum /** * update_cfs_rq_load_avg - update the cfs_rq's load/util averages - * @now: current time, as per cfs_rq_clock_task() + * @now: current time, as per cfs_rq_clock_pelt() * @cfs_rq: cfs_rq to update * * The cfs_rq avg is the direct sum of all its entities (blocked and runnable) @@ -3396,7 +3395,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) decayed = 1; } - decayed |= __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq); + decayed |= __update_load_avg_cfs_rq(now, cfs_rq); #ifndef CONFIG_64BIT smp_wmb(); @@ -3486,9 +3485,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s /* Update task and its cfs_rq load average */ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { - u64 now = cfs_rq_clock_task(cfs_rq); - struct rq *rq = rq_of(cfs_rq); - int cpu = cpu_of(rq); + u64 now = cfs_rq_clock_pelt(cfs_rq); int decayed; /* @@ -3496,7 +3493,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s * track group sched_entity load average for task_h_load calc in migration */ if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) - __update_load_avg_se(now, cpu, cfs_rq, se); + __update_load_avg_se(now, cfs_rq, se); decayed = update_cfs_rq_load_avg(now, cfs_rq); decayed |= propagate_entity_load_avg(se); @@ -3548,7 +3545,7 @@ void sync_entity_load_avg(struct sched_entity *se) u64 last_update_time; last_update_time = cfs_rq_last_update_time(cfs_rq); - __update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se); + __update_load_avg_blocked_se(last_update_time, se); } /* @@ -7015,6 +7012,12 @@ idle: if (new_tasks > 0) goto again; + /* + * rq is about to be idle, check if we need to update the + * lost_idle_time of clock_pelt + */ + update_idle_rq_clock_pelt(rq); + return NULL; } @@ -7657,7 +7660,7 @@ static void update_blocked_averages(int cpu) if (throttled_hierarchy(cfs_rq)) continue; - if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq)) + if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) update_tg_load_avg(cfs_rq, 0); /* Propagate pending load changes to the parent, if any: */ @@ -7671,8 +7674,8 @@ static void update_blocked_averages(int cpu) } curr_class = rq->curr->sched_class; - update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); - update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); /* Don't need periodic decay once load/util_avg are null */ if (others_have_blocked(rq)) @@ -7742,11 +7745,11 @@ static inline void update_blocked_averages(int cpu) rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq); + update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq); curr_class = rq->curr->sched_class; - update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); - update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); #ifdef CONFIG_NO_HZ_COMMON rq->last_blocked_load_update_tick = jiffies; diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index 90fb5bc12ad4..befce29bd882 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -26,7 +26,6 @@ #include #include "sched.h" -#include "sched-pelt.h" #include "pelt.h" /* @@ -106,16 +105,12 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) * n=1 */ static __always_inline u32 -accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, +accumulate_sum(u64 delta, struct sched_avg *sa, unsigned long load, unsigned long runnable, int running) { - unsigned long scale_freq, scale_cpu; u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ u64 periods; - scale_freq = arch_scale_freq_capacity(cpu); - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); - delta += sa->period_contrib; periods = delta / 1024; /* A period is 1024us (~1ms) */ @@ -137,13 +132,12 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, } sa->period_contrib = delta; - contrib = cap_scale(contrib, scale_freq); if (load) sa->load_sum += load * contrib; if (runnable) sa->runnable_load_sum += runnable * contrib; if (running) - sa->util_sum += contrib * scale_cpu; + sa->util_sum += contrib << SCHED_CAPACITY_SHIFT; return periods; } @@ -177,7 +171,7 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] */ static __always_inline int -___update_load_sum(u64 now, int cpu, struct sched_avg *sa, +___update_load_sum(u64 now, struct sched_avg *sa, unsigned long load, unsigned long runnable, int running) { u64 delta; @@ -221,7 +215,7 @@ ___update_load_sum(u64 now, int cpu, struct sched_avg *sa, * Step 1: accumulate *_sum since last_update_time. If we haven't * crossed period boundaries, finish. */ - if (!accumulate_sum(delta, cpu, sa, load, runnable, running)) + if (!accumulate_sum(delta, sa, load, runnable, running)) return 0; return 1; @@ -267,9 +261,9 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna * runnable_load_avg = \Sum se->avg.runable_load_avg */ -int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) +int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) { - if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) { + if (___update_load_sum(now, &se->avg, 0, 0, 0)) { ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); return 1; } @@ -277,9 +271,9 @@ int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) return 0; } -int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) +int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se) { - if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq, + if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq, cfs_rq->curr == se)) { ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); @@ -290,9 +284,9 @@ int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_e return 0; } -int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) +int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) { - if (___update_load_sum(now, cpu, &cfs_rq->avg, + if (___update_load_sum(now, &cfs_rq->avg, scale_load_down(cfs_rq->load.weight), scale_load_down(cfs_rq->runnable_weight), cfs_rq->curr != NULL)) { @@ -317,7 +311,7 @@ int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) { - if (___update_load_sum(now, rq->cpu, &rq->avg_rt, + if (___update_load_sum(now, &rq->avg_rt, running, running, running)) { @@ -340,7 +334,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) { - if (___update_load_sum(now, rq->cpu, &rq->avg_dl, + if (___update_load_sum(now, &rq->avg_dl, running, running, running)) { @@ -365,22 +359,31 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) int update_irq_load_avg(struct rq *rq, u64 running) { int ret = 0; + + /* + * We can't use clock_pelt because irq time is not accounted in + * clock_task. Instead we directly scale the running time to + * reflect the real amount of computation + */ + running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq))); + running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq))); + /* * We know the time that has been used by interrupt since last update * but we don't when. Let be pessimistic and assume that interrupt has * happened just before the update. This is not so far from reality * because interrupt will most probably wake up task and trig an update - * of rq clock during which the metric si updated. + * of rq clock during which the metric is updated. * We start to decay with normal context time and then we add the * interrupt context time. * We can safely remove running from rq->clock because * rq->clock += delta with delta >= running */ - ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq, + ret = ___update_load_sum(rq->clock - running, &rq->avg_irq, 0, 0, 0); - ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq, + ret += ___update_load_sum(rq->clock, &rq->avg_irq, 1, 1, 1); diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 7e56b489ff32..7489d5f56960 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -1,8 +1,9 @@ #ifdef CONFIG_SMP +#include "sched-pelt.h" -int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se); -int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se); -int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq); +int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); +int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); +int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); @@ -42,6 +43,101 @@ static inline void cfs_se_util_change(struct sched_avg *avg) WRITE_ONCE(avg->util_est.enqueued, enqueued); } +/* + * The clock_pelt scales the time to reflect the effective amount of + * computation done during the running delta time but then sync back to + * clock_task when rq is idle. + * + * + * absolute time | 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|16 + * @ max capacity ------******---------------******--------------- + * @ half capacity ------************---------************--------- + * clock pelt | 1| 2| 3| 4| 7| 8| 9| 10| 11|14|15|16 + * + */ +static inline void update_rq_clock_pelt(struct rq *rq, s64 delta) +{ + if (unlikely(is_idle_task(rq->curr))) { + /* The rq is idle, we can sync to clock_task */ + rq->clock_pelt = rq_clock_task(rq); + return; + } + + /* + * When a rq runs at a lower compute capacity, it will need + * more time to do the same amount of work than at max + * capacity. In order to be invariant, we scale the delta to + * reflect how much work has been really done. + * Running longer results in stealing idle time that will + * disturb the load signal compared to max capacity. This + * stolen idle time will be automatically reflected when the + * rq will be idle and the clock will be synced with + * rq_clock_task. + */ + + /* + * Scale the elapsed time to reflect the real amount of + * computation + */ + delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq))); + delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq))); + + rq->clock_pelt += delta; +} + +/* + * When rq becomes idle, we have to check if it has lost idle time + * because it was fully busy. A rq is fully used when the /Sum util_sum + * is greater or equal to: + * (LOAD_AVG_MAX - 1024 + rq->cfs.avg.period_contrib) << SCHED_CAPACITY_SHIFT; + * For optimization and computing rounding purpose, we don't take into account + * the position in the current window (period_contrib) and we use the higher + * bound of util_sum to decide. + */ +static inline void update_idle_rq_clock_pelt(struct rq *rq) +{ + u32 divider = ((LOAD_AVG_MAX - 1024) << SCHED_CAPACITY_SHIFT) - LOAD_AVG_MAX; + u32 util_sum = rq->cfs.avg.util_sum; + util_sum += rq->avg_rt.util_sum; + util_sum += rq->avg_dl.util_sum; + + /* + * Reflecting stolen time makes sense only if the idle + * phase would be present at max capacity. As soon as the + * utilization of a rq has reached the maximum value, it is + * considered as an always runnig rq without idle time to + * steal. This potential idle time is considered as lost in + * this case. We keep track of this lost idle time compare to + * rq's clock_task. + */ + if (util_sum >= divider) + rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt; +} + +static inline u64 rq_clock_pelt(struct rq *rq) +{ + lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + + return rq->clock_pelt - rq->lost_idle_time; +} + +#ifdef CONFIG_CFS_BANDWIDTH +/* rq->task_clock normalized against any time this cfs_rq has spent throttled */ +static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) +{ + if (unlikely(cfs_rq->throttle_count)) + return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time; + + return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time; +} +#else +static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) +{ + return rq_clock_pelt(rq_of(cfs_rq)); +} +#endif + #else static inline int @@ -67,6 +163,18 @@ update_irq_load_avg(struct rq *rq, u64 running) { return 0; } + +static inline u64 rq_clock_pelt(struct rq *rq) +{ + return rq_clock_task(rq); +} + +static inline void +update_rq_clock_pelt(struct rq *rq, s64 delta) { } + +static inline void +update_idle_rq_clock_pelt(struct rq *rq) { } + #endif diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e4f398ad9e73..90fa23d36565 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1587,7 +1587,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * rt task */ if (rq->curr->sched_class != &rt_sched_class) - update_rt_rq_load_avg(rq_clock_task(rq), rq, 0); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); return p; } @@ -1596,7 +1596,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); - update_rt_rq_load_avg(rq_clock_task(rq), rq, 1); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); /* * The previous task needs to be made eligible for pushing @@ -2325,7 +2325,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) struct sched_rt_entity *rt_se = &p->rt; update_curr_rt(rq); - update_rt_rq_load_avg(rq_clock_task(rq), rq, 1); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); watchdog(rq, p); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0ed130fae2a9..fe31bc472f3e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -861,7 +861,10 @@ struct rq { unsigned int clock_update_flags; u64 clock; - u64 clock_task; + /* Ensure that all clocks are in the same cache line */ + u64 clock_task ____cacheline_aligned; + u64 clock_pelt; + unsigned long lost_idle_time; atomic_t nr_iowait; -- cgit From c546951d9c9300065bad253ecdf1ac59ce9d06c8 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 21 Jan 2019 16:52:40 +0100 Subject: sched/core: Use READ_ONCE()/WRITE_ONCE() in move_queued_task()/task_rq_lock() move_queued_task() synchronizes with task_rq_lock() as follows: move_queued_task() task_rq_lock() [S] ->on_rq = MIGRATING [L] rq = task_rq() WMB (__set_task_cpu()) ACQUIRE (rq->lock); [S] ->cpu = new_cpu [L] ->on_rq where "[L] rq = task_rq()" is ordered before "ACQUIRE (rq->lock)" by an address dependency and, in turn, "ACQUIRE (rq->lock)" is ordered before "[L] ->on_rq" by the ACQUIRE itself. Use READ_ONCE() to load ->cpu in task_rq() (c.f., task_cpu()) to honor this address dependency. Also, mark the accesses to ->cpu and ->on_rq with READ_ONCE()/WRITE_ONCE() to comply with the LKMM. Signed-off-by: Andrea Parri Signed-off-by: Peter Zijlstra (Intel) Cc: Alan Stern Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/r/20190121155240.27173-1-andrea.parri@amarulasolutions.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- kernel/sched/core.c | 9 +++++---- kernel/sched/sched.h | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 351c0fe64c85..4112639c2a85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1745,9 +1745,9 @@ static __always_inline bool need_resched(void) static inline unsigned int task_cpu(const struct task_struct *p) { #ifdef CONFIG_THREAD_INFO_IN_TASK - return p->cpu; + return READ_ONCE(p->cpu); #else - return task_thread_info(p)->cpu; + return READ_ONCE(task_thread_info(p)->cpu); #endif } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 32e06704565e..ec1b67a195cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -107,11 +107,12 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) * [L] ->on_rq * RELEASE (rq->lock) * - * If we observe the old CPU in task_rq_lock, the acquire of + * If we observe the old CPU in task_rq_lock(), the acquire of * the old rq->lock will fully serialize against the stores. * - * If we observe the new CPU in task_rq_lock, the acquire will - * pair with the WMB to ensure we must then also see migrating. + * If we observe the new CPU in task_rq_lock(), the address + * dependency headed by '[L] rq = task_rq()' and the acquire + * will pair with the WMB to ensure we then also see migrating. */ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { rq_pin_lock(rq, rf); @@ -916,7 +917,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, { lockdep_assert_held(&rq->lock); - p->on_rq = TASK_ON_RQ_MIGRATING; + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); dequeue_task(rq, p, DEQUEUE_NOCLOCK); set_task_cpu(p, new_cpu); rq_unlock(rq, rf); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 99e2a7772d16..c688ef5012e5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1479,9 +1479,9 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) */ smp_wmb(); #ifdef CONFIG_THREAD_INFO_IN_TASK - p->cpu = cpu; + WRITE_ONCE(p->cpu, cpu); #else - task_thread_info(p)->cpu = cpu; + WRITE_ONCE(task_thread_info(p)->cpu, cpu); #endif p->wake_cpu = cpu; #endif @@ -1582,7 +1582,7 @@ static inline int task_on_rq_queued(struct task_struct *p) static inline int task_on_rq_migrating(struct task_struct *p) { - return p->on_rq == TASK_ON_RQ_MIGRATING; + return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; } /* -- cgit From 77000bc43da17d5d6bc4ebfaf44d52d43bb69492 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Feb 2019 16:31:04 +0100 Subject: uio: remove the unused iov_for_each macro Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- .clang-format | 1 - include/linux/uio.h | 8 -------- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/.clang-format b/.clang-format index e6080f5834a3..c144d9c24d5d 100644 --- a/.clang-format +++ b/.clang-format @@ -259,7 +259,6 @@ ForEachMacros: - 'idr_for_each_entry_ul' - 'inet_bind_bucket_for_each' - 'inet_lhash2_for_each_icsk_rcu' - - 'iov_for_each' - 'key_for_each' - 'key_for_each_safe' - 'klp_for_each_func' diff --git a/include/linux/uio.h b/include/linux/uio.h index ecf584f6b82d..87477e1640f9 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -110,14 +110,6 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) }; } -#define iov_for_each(iov, iter, start) \ - if (iov_iter_type(start) == ITER_IOVEC || \ - iov_iter_type(start) == ITER_KVEC) \ - for (iter = (start); \ - (iter).count && \ - ((iov = iov_iter_iovec(&(iter))), 1); \ - iov_iter_advance(&(iter), (iov).iov_len)) - size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); -- cgit From 278311e417be60f7caef6fcb12bda4da2711ceff Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 21 Jan 2019 17:59:29 +0800 Subject: kexec, KEYS: Make use of platform keyring for signature verify This patch allows the kexec_file_load syscall to verify the PE signed kernel image signature based on the preboot keys stored in the .platform keyring, as fall back, if the signature verification failed due to not finding the public key in the secondary or builtin keyrings. This commit adds a VERIFY_USE_PLATFORM_KEYRING similar to previous VERIFY_USE_SECONDARY_KEYRING indicating that verify_pkcs7_signature should verify the signature using platform keyring. Also, decrease the error message log level when verification failed with -ENOKEY, so that if called tried multiple time with different keyring it won't generate extra noises. Signed-off-by: Kairui Song Cc: David Howells Acked-by: Dave Young (for kexec_file_load part) [zohar@linux.ibm.com: tweaked the first paragraph of the patch description, and fixed checkpatch warning.] Signed-off-by: Mimi Zohar --- arch/x86/kernel/kexec-bzimage64.c | 14 +++++++++++--- certs/system_keyring.c | 13 ++++++++++++- include/linux/verification.h | 1 + 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 278cd07228dd..e1215a600064 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -531,9 +531,17 @@ static int bzImage64_cleanup(void *loader_data) #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) { - return verify_pefile_signature(kernel, kernel_len, - VERIFY_USE_SECONDARY_KEYRING, - VERIFYING_KEXEC_PE_SIGNATURE); + int ret; + + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + } + return ret; } #endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index da055e901df4..c05c29ae4d5d 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -240,11 +240,22 @@ int verify_pkcs7_signature(const void *data, size_t len, #else trusted_keys = builtin_trusted_keys; #endif + } else if (trusted_keys == VERIFY_USE_PLATFORM_KEYRING) { +#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING + trusted_keys = platform_trusted_keys; +#else + trusted_keys = NULL; +#endif + if (!trusted_keys) { + ret = -ENOKEY; + pr_devel("PKCS#7 platform keyring is not available\n"); + goto error; + } } ret = pkcs7_validate_trust(pkcs7, trusted_keys); if (ret < 0) { if (ret == -ENOKEY) - pr_err("PKCS#7 signature not signed with a trusted key\n"); + pr_devel("PKCS#7 signature not signed with a trusted key\n"); goto error; } diff --git a/include/linux/verification.h b/include/linux/verification.h index cfa4730d607a..018fb5f13d44 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -17,6 +17,7 @@ * should be used. */ #define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL) +#define VERIFY_USE_PLATFORM_KEYRING ((struct key *)2UL) /* * The use to which an asymmetric key is being put. -- cgit From fdb2410f7702f25f82804a261f90ad03422bd2c3 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 22 Jan 2019 14:06:49 -0600 Subject: ima: define ima_post_create_tmpfile() hook and add missing call If tmpfiles can be made persistent, then newly created tmpfiles need to be treated like any other new files in policy. This patch indicates which newly created tmpfiles are in policy, causing the file hash to be calculated on __fput(). Reported-by: Ignaz Forster [rgoldwyn@suse.com: Call ima_post_create_tmpfile() in vfs_tmpfile() as opposed to do_tmpfile(). This will help the case for overlayfs where copy_up is denied while overwriting a file.] Signed-off-by: Goldwyn Rodrigues Signed-off-by: Mimi Zohar --- fs/namei.c | 1 + include/linux/ima.h | 5 +++++ security/integrity/ima/ima_main.c | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 914178cdbe94..373a7ec4b09d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3462,6 +3462,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag) inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } + ima_post_create_tmpfile(inode); return child; out_err: diff --git a/include/linux/ima.h b/include/linux/ima.h index b5e16b8c50b7..dc12fbcf484c 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -18,6 +18,7 @@ struct linux_binprm; #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_file_check(struct file *file, int mask); +extern void ima_post_create_tmpfile(struct inode *inode); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_load_data(enum kernel_load_data_id id); @@ -56,6 +57,10 @@ static inline int ima_file_check(struct file *file, int mask) return 0; } +static inline void ima_post_create_tmpfile(struct inode *inode) +{ +} + static inline void ima_file_free(struct file *file) { return; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 4ffac4f5c647..357edd140c09 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -396,6 +396,33 @@ int ima_file_check(struct file *file, int mask) } EXPORT_SYMBOL_GPL(ima_file_check); +/** + * ima_post_create_tmpfile - mark newly created tmpfile as new + * @file : newly created tmpfile + * + * No measuring, appraising or auditing of newly created tmpfiles is needed. + * Skip calling process_measurement(), but indicate which newly, created + * tmpfiles are in policy. + */ +void ima_post_create_tmpfile(struct inode *inode) +{ + struct integrity_iint_cache *iint; + int must_appraise; + + must_appraise = ima_must_appraise(inode, MAY_ACCESS, FILE_CHECK); + if (!must_appraise) + return; + + /* Nothing to do if we can't allocate memory */ + iint = integrity_inode_get(inode); + if (!iint) + return; + + /* needed for writing the security xattrs */ + set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); + iint->ima_file_status = INTEGRITY_PASS; +} + /** * ima_post_path_mknod - mark as a new inode * @dentry: newly created dentry @@ -413,9 +440,13 @@ void ima_post_path_mknod(struct dentry *dentry) if (!must_appraise) return; + /* Nothing to do if we can't allocate memory */ iint = integrity_inode_get(inode); - if (iint) - iint->flags |= IMA_NEW_FILE; + if (!iint) + return; + + /* needed for re-opening empty files */ + iint->flags |= IMA_NEW_FILE; } /** -- cgit From 809ab9371ca0a96b44d9866ad82849410759a45b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 26 Jan 2019 00:52:26 -0500 Subject: XArray: Update xa_erase family descriptions xa_erase does not allocate memory and doesn't have a gfp parameter. Update the descriptions of all four variants to be more useful. Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 12 ++++++------ lib/xarray.c | 17 ++++++++--------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 5d9d318bcf7a..e11841537631 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -526,9 +526,9 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index, * @xa: XArray. * @index: Index of entry. * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. @@ -550,9 +550,9 @@ static inline void *xa_erase_bh(struct xarray *xa, unsigned long index) * @xa: XArray. * @index: Index of entry. * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. diff --git a/lib/xarray.c b/lib/xarray.c index 81c3171ddde9..fb783bf2a441 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1294,13 +1294,12 @@ static void *xas_result(struct xa_state *xas, void *curr) * @xa: XArray. * @index: Index into array. * - * If the entry at this index is a multi-index entry then all indices will - * be erased, and the entry will no longer be a multi-index entry. - * This function expects the xa_lock to be held on entry. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * - * Context: Any context. Expects xa_lock to be held on entry. May - * release and reacquire xa_lock if @gfp flags permit. - * Return: The old entry at this index. + * Context: Any context. Expects xa_lock to be held on entry. + * Return: The entry which used to be at this index. */ void *__xa_erase(struct xarray *xa, unsigned long index) { @@ -1314,9 +1313,9 @@ EXPORT_SYMBOL(__xa_erase); * @xa: XArray. * @index: Index of entry. * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock. * Return: The entry which used to be at this index. -- cgit From fe6f42cf6eb3183ebd6ab6b0b7dcbee2600c2baa Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Wed, 6 Feb 2019 16:37:19 +0530 Subject: firmware: xilinx: Add zynqmp_pm_get_chipid() API This patch adds a new API to provide access to the hardware related data like soc revision, IDCODE... etc. Signed-off-by: Nava kishore Manne Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 24 ++++++++++++++++++++++++ include/linux/firmware/xlnx-zynqmp.h | 2 ++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 70b50377ae5f..16a23bc4c2c3 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -186,6 +186,29 @@ static int zynqmp_pm_get_api_version(u32 *version) return ret; } +/** + * zynqmp_pm_get_chipid - Get silicon ID registers + * @idcode: IDCODE register + * @version: version register + * + * Return: Returns the status of the operation and the idcode and version + * registers in @idcode and @version. + */ +static int zynqmp_pm_get_chipid(u32 *idcode, u32 *version) +{ + u32 ret_payload[PAYLOAD_ARG_CNT]; + int ret; + + if (!idcode || !version) + return -EINVAL; + + ret = zynqmp_pm_invoke_fn(PM_GET_CHIPID, 0, 0, 0, 0, ret_payload); + *idcode = ret_payload[1]; + *version = ret_payload[2]; + + return ret; +} + /** * zynqmp_pm_get_trustzone_version() - Get secure trustzone firmware version * @version: Returned version value @@ -509,6 +532,7 @@ static int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, static const struct zynqmp_eemi_ops eemi_ops = { .get_api_version = zynqmp_pm_get_api_version, + .get_chipid = zynqmp_pm_get_chipid, .query_data = zynqmp_pm_query_data, .clock_enable = zynqmp_pm_clock_enable, .clock_disable = zynqmp_pm_clock_disable, diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 07c587a0b06e..5a1f19848100 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -36,6 +36,7 @@ enum pm_api_id { PM_GET_API_VERSION = 1, PM_RESET_ASSERT = 17, PM_RESET_GET_STATUS, + PM_GET_CHIPID = 24, PM_IOCTL = 34, PM_QUERY_DATA, PM_CLOCK_ENABLE, @@ -224,6 +225,7 @@ struct zynqmp_pm_query_data { struct zynqmp_eemi_ops { int (*get_api_version)(u32 *version); + int (*get_chipid)(u32 *idcode, u32 *version); int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out); int (*clock_enable)(u32 clock_id); int (*clock_disable)(u32 clock_id); -- cgit From 2292822e1576c89191a65c3d0da584d75d3c033f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 19 Jan 2019 13:16:53 +0100 Subject: i2c: algo-bit: include main i2c header We are using symbols from it, so we should include it directly. Found after sorting includes in a driver. Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Signed-off-by: Wolfram Sang --- include/linux/i2c-algo-bit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h index 63904ba6887e..d64cebc6e65a 100644 --- a/include/linux/i2c-algo-bit.h +++ b/include/linux/i2c-algo-bit.h @@ -25,6 +25,8 @@ #ifndef _LINUX_I2C_ALGO_BIT_H #define _LINUX_I2C_ALGO_BIT_H +#include + /* --- Defines for bit-adapters --------------------------------------- */ /* * This struct contains the hw-dependent functions of bit-style adapters to -- cgit From 738ac0679b969776a638daf2cfb5011049d467da Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 19 Jan 2019 13:16:54 +0100 Subject: i2c: algo-bit: convert to SPDX header And use kernel style for the remaining comments in the header. Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-bit.c | 25 ++++++++----------------- include/linux/i2c-algo-bit.h | 31 ++++++++----------------------- 2 files changed, 16 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index c33dcfb87993..5e5990a83da5 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -1,21 +1,12 @@ -/* ------------------------------------------------------------------------- - * i2c-algo-bit.c i2c driver algorithms for bit-shift adapters - * ------------------------------------------------------------------------- +// SPDX-License-Identifier: GPL-2.0+ +/* + * i2c-algo-bit.c: i2c driver algorithms for bit-shift adapters + * * Copyright (C) 1995-2000 Simon G. Vogl - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - * ------------------------------------------------------------------------- */ - -/* With some changes from Frodo Looijaard , Kyösti Mälkki - and Jean Delvare */ + * + * With some changes from Frodo Looijaard , Kyösti Mälkki + * and Jean Delvare + */ #include #include diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h index d64cebc6e65a..69045df78e2d 100644 --- a/include/linux/i2c-algo-bit.h +++ b/include/linux/i2c-algo-bit.h @@ -1,26 +1,11 @@ -/* ------------------------------------------------------------------------- */ -/* i2c-algo-bit.h i2c driver algorithms for bit-shift adapters */ -/* ------------------------------------------------------------------------- */ -/* Copyright (C) 1995-99 Simon G. Vogl - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301 USA. */ -/* ------------------------------------------------------------------------- */ - -/* With some changes from Kyösti Mälkki and even - Frodo Looijaard */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * i2c-algo-bit.h: i2c driver algorithms for bit-shift adapters + * + * Copyright (C) 1995-99 Simon G. Vogl + * With some changes from Kyösti Mälkki and even + * Frodo Looijaard + */ #ifndef _LINUX_I2C_ALGO_BIT_H #define _LINUX_I2C_ALGO_BIT_H -- cgit From b525903c254dab2491410f0f23707691b7c2c317 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:53:58 +0000 Subject: genirq: Provide basic NMI management for interrupt lines Add functionality to allocate interrupt lines that will deliver IRQs as Non-Maskable Interrupts. These allocations are only successful if the irqchip provides the necessary support and allows NMI delivery for the interrupt line. Interrupt lines allocated for NMI delivery must be enabled/disabled through enable_nmi/disable_nmi_nosync to keep their state consistent. To treat a PERCPU IRQ as NMI, the interrupt must not be shared nor threaded, the irqchip directly managing the IRQ must be the root irqchip and the irqchip cannot be behind a slow bus. Signed-off-by: Julien Thierry Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Marc Zyngier Signed-off-by: Marc Zyngier --- include/linux/interrupt.h | 9 ++ include/linux/irq.h | 7 ++ kernel/irq/debugfs.c | 6 +- kernel/irq/internals.h | 2 + kernel/irq/manage.c | 228 +++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 249 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c672f34235e7..9941d1a8d83c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -156,6 +156,10 @@ __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, void __percpu *percpu_dev_id); +extern int __must_check +request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, + const char *name, void *dev); + static inline int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id) @@ -167,6 +171,8 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); +extern const void *free_nmi(unsigned int irq, void *dev_id); + struct device; extern int __must_check @@ -217,6 +223,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type); extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); +extern void disable_nmi_nosync(unsigned int irq); +extern void enable_nmi(unsigned int irq); + /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); diff --git a/include/linux/irq.h b/include/linux/irq.h index def2b2aac8b1..a7298e4998c8 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -442,6 +442,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine * @ipi_send_single: send a single IPI to destination cpus * @ipi_send_mask: send an IPI to destination cpus in cpumask + * @irq_nmi_setup: function called from core code before enabling an NMI + * @irq_nmi_teardown: function called from core code after disabling an NMI * @flags: chip specific flags */ struct irq_chip { @@ -490,6 +492,9 @@ struct irq_chip { void (*ipi_send_single)(struct irq_data *data, unsigned int cpu); void (*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest); + int (*irq_nmi_setup)(struct irq_data *data); + void (*irq_nmi_teardown)(struct irq_data *data); + unsigned long flags; }; @@ -505,6 +510,7 @@ struct irq_chip { * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs + * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -515,6 +521,7 @@ enum { IRQCHIP_ONESHOT_SAFE = (1 << 5), IRQCHIP_EOI_THREADED = (1 << 6), IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), + IRQCHIP_SUPPORTS_NMI = (1 << 8), }; #include diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 6f636136cccc..59a04d2a66df 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -56,6 +56,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), + BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), }; static void @@ -140,6 +141,7 @@ static const struct irq_bit_descr irqdesc_istates[] = { BIT_MASK_DESCR(IRQS_WAITING), BIT_MASK_DESCR(IRQS_PENDING), BIT_MASK_DESCR(IRQS_SUSPENDED), + BIT_MASK_DESCR(IRQS_NMI), }; @@ -203,8 +205,8 @@ static ssize_t irq_debug_write(struct file *file, const char __user *user_buf, chip_bus_lock(desc); raw_spin_lock_irqsave(&desc->lock, flags); - if (irq_settings_is_level(desc)) { - /* Can't do level, sorry */ + if (irq_settings_is_level(desc) || desc->istate & IRQS_NMI) { + /* Can't do level nor NMIs, sorry */ err = -EINVAL; } else { desc->istate |= IRQS_PENDING; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index ca6afa267070..2a77cdd27ca9 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -49,6 +49,7 @@ enum { * IRQS_WAITING - irq is waiting * IRQS_PENDING - irq is pending and replayed later * IRQS_SUSPENDED - irq is suspended + * IRQS_NMI - irq line is used to deliver NMIs */ enum { IRQS_AUTODETECT = 0x00000001, @@ -60,6 +61,7 @@ enum { IRQS_PENDING = 0x00000200, IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, + IRQS_NMI = 0x00002000, }; #include "debug.h" diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a4888ce4667a..9472ae987946 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -341,7 +341,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) /* The release function is promised process context */ might_sleep(); - if (!desc) + if (!desc || desc->istate & IRQS_NMI) return -EINVAL; /* Complete initialisation of *notify */ @@ -550,6 +550,21 @@ bool disable_hardirq(unsigned int irq) } EXPORT_SYMBOL_GPL(disable_hardirq); +/** + * disable_nmi_nosync - disable an nmi without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and enables are + * nested. + * The interrupt to disable must have been requested through request_nmi. + * Unlike disable_nmi(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + */ +void disable_nmi_nosync(unsigned int irq) +{ + disable_irq_nosync(irq); +} + void __enable_irq(struct irq_desc *desc) { switch (desc->depth) { @@ -606,6 +621,20 @@ out: } EXPORT_SYMBOL(enable_irq); +/** + * enable_nmi - enable handling of an nmi + * @irq: Interrupt to enable + * + * The interrupt to enable must have been requested through request_nmi. + * Undoes the effect of one call to disable_nmi(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + */ +void enable_nmi(unsigned int irq) +{ + enable_irq(irq); +} + static int set_irq_wake_real(unsigned int irq, unsigned int on) { struct irq_desc *desc = irq_to_desc(irq); @@ -641,6 +670,12 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) if (!desc) return -EINVAL; + /* Don't use NMIs as wake up interrupts please */ + if (desc->istate & IRQS_NMI) { + ret = -EINVAL; + goto out_unlock; + } + /* wakeup-capable irqs can be shared between drivers that * don't need to have the same sleep mode behaviors. */ @@ -663,6 +698,8 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); } } + +out_unlock: irq_put_desc_busunlock(desc, flags); return ret; } @@ -1125,6 +1162,39 @@ static void irq_release_resources(struct irq_desc *desc) c->irq_release_resources(d); } +static bool irq_supports_nmi(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /* Only IRQs directly managed by the root irqchip can be set as NMI */ + if (d->parent_data) + return false; +#endif + /* Don't support NMIs for chips behind a slow bus */ + if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock) + return false; + + return d->chip->flags & IRQCHIP_SUPPORTS_NMI; +} + +static int irq_nmi_setup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL; +} + +static void irq_nmi_teardown(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + if (c->irq_nmi_teardown) + c->irq_nmi_teardown(d); +} + static int setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) { @@ -1299,9 +1369,17 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must * agree on ONESHOT. + * Interrupt lines used for NMIs cannot be shared. */ unsigned int oldtype; + if (desc->istate & IRQS_NMI) { + pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + /* * If nobody did set the configuration before, inherit * the one provided by the requester. @@ -1753,6 +1831,59 @@ const void *free_irq(unsigned int irq, void *dev_id) } EXPORT_SYMBOL(free_irq); +/* This function must be called with desc->lock held */ +static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) +{ + const char *devname = NULL; + + desc->istate &= ~IRQS_NMI; + + if (!WARN_ON(desc->action == NULL)) { + irq_pm_remove_action(desc, desc->action); + devname = desc->action->name; + unregister_handler_proc(irq, desc->action); + + kfree(desc->action); + desc->action = NULL; + } + + irq_settings_clr_disable_unlazy(desc); + irq_shutdown(desc); + + irq_release_resources(desc); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + + return devname; +} + +const void *free_nmi(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + const void *devname; + + if (!desc || WARN_ON(!(desc->istate & IRQS_NMI))) + return NULL; + + if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return NULL; + + /* NMI still enabled */ + if (WARN_ON(desc->depth == 0)) + disable_nmi_nosync(irq); + + raw_spin_lock_irqsave(&desc->lock, flags); + + irq_nmi_teardown(desc); + devname = __cleanup_nmi(irq, desc); + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return devname; +} + /** * request_threaded_irq - allocate an interrupt line * @irq: Interrupt line to allocate @@ -1922,6 +2053,101 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, } EXPORT_SYMBOL_GPL(request_any_context_irq); +/** + * request_nmi - allocate an interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @irqflags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. It sets up the IRQ line + * to be handled as an NMI. + * + * An interrupt line delivering NMIs cannot be shared and IRQ handling + * cannot be threaded. + * + * Interrupt lines requested for NMI delivering must produce per cpu + * interrupts and have auto enabling setting disabled. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail and return a negative value. + */ +int request_nmi(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *name, void *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + /* NMI cannot be shared, used for Polling */ + if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL)) + return -EINVAL; + + if (!(irqflags & IRQF_PERCPU)) + return -EINVAL; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || irq_settings_can_autoenable(desc) || + !irq_settings_can_request(desc) || + WARN_ON(irq_settings_is_per_cpu_devid(desc)) || + !irq_supports_nmi(desc)) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING; + action->name = name; + action->dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + + /* Setup NMI state */ + desc->istate |= IRQS_NMI; + retval = irq_nmi_setup(desc); + if (retval) { + __cleanup_nmi(irq, desc); + raw_spin_unlock_irqrestore(&desc->lock, flags); + return -EINVAL; + } + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + void enable_percpu_irq(unsigned int irq, unsigned int type) { unsigned int cpu = smp_processor_id(); -- cgit From 4b078c3f1a26487c39363089ba0d5c6b09f2a89f Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:53:59 +0000 Subject: genirq: Provide NMI management for percpu_devid interrupts Add support for percpu_devid interrupts treated as NMIs. Percpu_devid NMIs need to be setup/torn down on each CPU they target. The same restrictions as for global NMIs still apply for percpu_devid NMIs. Signed-off-by: Julien Thierry Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Marc Zyngier Signed-off-by: Marc Zyngier --- include/linux/interrupt.h | 9 +++ kernel/irq/manage.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9941d1a8d83c..831ddcdc5597 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -168,10 +168,15 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, devname, percpu_dev_id); } +extern int __must_check +request_percpu_nmi(unsigned int irq, irq_handler_t handler, + const char *devname, void __percpu *dev); + extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); extern const void *free_nmi(unsigned int irq, void *dev_id); +extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id); struct device; @@ -224,7 +229,11 @@ extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); extern void disable_nmi_nosync(unsigned int irq); +extern void disable_percpu_nmi(unsigned int irq); extern void enable_nmi(unsigned int irq); +extern void enable_percpu_nmi(unsigned int irq, unsigned int type); +extern int prepare_percpu_nmi(unsigned int irq); +extern void teardown_percpu_nmi(unsigned int irq); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9472ae987946..0a1ebc004a59 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2182,6 +2182,11 @@ out: } EXPORT_SYMBOL_GPL(enable_percpu_irq); +void enable_percpu_nmi(unsigned int irq, unsigned int type) +{ + enable_percpu_irq(irq, type); +} + /** * irq_percpu_is_enabled - Check whether the per cpu irq is enabled * @irq: Linux irq number to check for @@ -2221,6 +2226,11 @@ void disable_percpu_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(disable_percpu_irq); +void disable_percpu_nmi(unsigned int irq) +{ + disable_percpu_irq(irq); +} + /* * Internal function to unregister a percpu irqaction. */ @@ -2252,6 +2262,8 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_ /* Found it - now remove it from the list of entries: */ desc->action = NULL; + desc->istate &= ~IRQS_NMI; + raw_spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); @@ -2305,6 +2317,19 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id) } EXPORT_SYMBOL_GPL(free_percpu_irq); +void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + return; + + kfree(__free_percpu_irq(irq, dev_id)); +} + /** * setup_percpu_irq - setup a per-cpu interrupt * @irq: Interrupt line to setup @@ -2394,6 +2419,158 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, } EXPORT_SYMBOL_GPL(__request_percpu_irq); +/** + * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @name: An ascii name for the claiming device + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs + * have to be setup on each CPU by calling ready_percpu_nmi() before being + * enabled on the same CPU by using enable_percpu_nmi(). + * + * Dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. + * + * Interrupt lines requested for NMI delivering should have auto enabling + * setting disabled. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, + const char *name, void __percpu *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || !irq_settings_can_request(desc) || + !irq_settings_is_per_cpu_devid(desc) || + irq_settings_can_autoenable(desc) || + !irq_supports_nmi(desc)) + return -EINVAL; + + /* The line cannot already be NMI */ + if (desc->istate & IRQS_NMI) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD + | IRQF_NOBALANCING; + action->name = name; + action->percpu_dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + desc->istate |= IRQS_NMI; + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +/** + * prepare_percpu_nmi - performs CPU local setup for NMI delivery + * @irq: Interrupt line to prepare for NMI delivery + * + * This call prepares an interrupt line to deliver NMI on the current CPU, + * before that interrupt line gets enabled with enable_percpu_nmi(). + * + * As a CPU local operation, this should be called from non-preemptible + * context. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int prepare_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + int ret = 0; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return -EINVAL; + + if (WARN(!(desc->istate & IRQS_NMI), + KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", + irq)) { + ret = -EINVAL; + goto out; + } + + ret = irq_nmi_setup(desc); + if (ret) { + pr_err("Failed to setup NMI delivery: irq %u\n", irq); + goto out; + } + +out: + irq_put_desc_unlock(desc, flags); + return ret; +} + +/** + * teardown_percpu_nmi - undoes NMI setup of IRQ line + * @irq: Interrupt line from which CPU local NMI configuration should be + * removed + * + * This call undoes the setup done by prepare_percpu_nmi(). + * + * IRQ line should not be enabled for the current CPU. + * + * As a CPU local operation, this should be called from non-preemptible + * context. + */ +void teardown_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + goto out; + + irq_nmi_teardown(desc); +out: + irq_put_desc_unlock(desc, flags); +} + /** * irq_get_irqchip_state - returns the irqchip state of a interrupt. * @irq: Interrupt line that is forwarded to a VM -- cgit From 2dcf1fbcad352baaa5f47b17e57c5743c8eedbad Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:54:00 +0000 Subject: genirq: Provide NMI handlers Provide flow handlers that are NMI safe for interrupts and percpu_devid interrupts. Signed-off-by: Julien Thierry Acked-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Peter Zijlstra Signed-off-by: Marc Zyngier --- include/linux/irq.h | 3 +++ kernel/irq/chip.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index a7298e4998c8..5e91f6bcaacd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -601,6 +601,9 @@ extern void handle_percpu_devid_irq(struct irq_desc *desc); extern void handle_bad_irq(struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); +extern void handle_fasteoi_nmi(struct irq_desc *desc); +extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc); + extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); extern int irq_chip_pm_get(struct irq_data *data); extern int irq_chip_pm_put(struct irq_data *data); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 34e969069488..c32d5f386f68 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -729,6 +729,37 @@ out: } EXPORT_SYMBOL_GPL(handle_fasteoi_irq); +/** + * handle_fasteoi_nmi - irq handler for NMI interrupt lines + * @desc: the interrupt description structure for this irq + * + * A simple NMI-safe handler, considering the restrictions + * from request_nmi. + * + * Only a single callback will be issued to the chip: an ->eoi() + * call when the interrupt has been serviced. This enables support + * for modern forms of interrupt handlers, which handle the flow + * details in hardware, transparently. + */ +void handle_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + trace_irq_handler_entry(irq, action); + /* + * NMIs cannot be shared, there is only one action. + */ + res = action->handler(irq, action->dev_id); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} +EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); + /** * handle_edge_irq - edge type IRQ handler * @desc: the interrupt description structure for this irq @@ -908,6 +939,29 @@ void handle_percpu_devid_irq(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } +/** + * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu + * dev ids + * @desc: the interrupt description structure for this irq + * + * Similar to handle_fasteoi_nmi, but handling the dev_id cookie + * as a percpu pointer. + */ +void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} + static void __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained, const char *name) -- cgit From 6e4933a006616343f66c4702dc4fc56bb25e7b02 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:54:01 +0000 Subject: irqdesc: Add domain handler for NMIs NMI handling code should be executed between calls to nmi_enter and nmi_exit. Add a separate domain handler to properly setup NMI context when handling an interrupt requested as NMI. Signed-off-by: Julien Thierry Acked-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Will Deacon Cc: Peter Zijlstra Signed-off-by: Marc Zyngier --- include/linux/irqdesc.h | 5 +++++ kernel/irq/irqdesc.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd1e40ddac7d..ba05b0d6401a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -171,6 +171,11 @@ static inline int handle_domain_irq(struct irq_domain *domain, { return __handle_domain_irq(domain, hwirq, true, regs); } + +#ifdef CONFIG_IRQ_DOMAIN +int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, + struct pt_regs *regs); +#endif #endif /* Test to see if a driver has successfully requested an irq */ diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index ee062b7939d3..a1d7a7d484e0 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -669,6 +669,41 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, set_irq_regs(old_regs); return ret; } + +#ifdef CONFIG_IRQ_DOMAIN +/** + * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * @regs: Register file coming from the low-level handling code + * + * Returns: 0 on success, or -EINVAL if conversion has failed + */ +int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, + struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned int irq; + int ret = 0; + + nmi_enter(); + + irq = irq_find_mapping(domain, hwirq); + + /* + * ack_bad_irq is not NMI-safe, just report + * an invalid interrupt. + */ + if (likely(irq)) + generic_handle_irq(irq); + else + ret = -EINVAL; + + nmi_exit(); + set_irq_regs(old_regs); + return ret; +} +#endif #endif /* Dynamic interrupt handling */ -- cgit From 972248e9111ee6fe9fb56c24ecfd7434f3d713ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Jan 2019 09:32:03 +0100 Subject: scsi: bsg-lib: handle bidi requests without block layer help We can just stash away the second request in struct bsg_job instead of using the block layer req->next_rq field, allowing for the eventual removal of the latter. Signed-off-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Martin K. Petersen --- block/bsg-lib.c | 44 +++++++++++++++++++++---- block/bsg.c | 68 ++++++++------------------------------- drivers/scsi/scsi_transport_sas.c | 1 - include/linux/bsg-lib.h | 4 +++ 4 files changed, 56 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 192129856342..005e2b75d775 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -51,11 +51,40 @@ static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr, fmode_t mode) { struct bsg_job *job = blk_mq_rq_to_pdu(rq); + int ret; job->request_len = hdr->request_len; job->request = memdup_user(uptr64(hdr->request), hdr->request_len); + if (IS_ERR(job->request)) + return PTR_ERR(job->request); + + if (hdr->dout_xfer_len && hdr->din_xfer_len) { + job->bidi_rq = blk_get_request(rq->q, REQ_OP_SCSI_IN, 0); + if (IS_ERR(job->bidi_rq)) { + ret = PTR_ERR(job->bidi_rq); + goto out; + } + + ret = blk_rq_map_user(rq->q, job->bidi_rq, NULL, + uptr64(hdr->din_xferp), hdr->din_xfer_len, + GFP_KERNEL); + if (ret) + goto out_free_bidi_rq; + + job->bidi_bio = job->bidi_rq->bio; + } else { + job->bidi_rq = NULL; + job->bidi_bio = NULL; + } - return PTR_ERR_OR_ZERO(job->request); + return 0; + +out_free_bidi_rq: + if (job->bidi_rq) + blk_put_request(job->bidi_rq); +out: + kfree(job->request); + return ret; } static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr) @@ -93,7 +122,7 @@ static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr) /* we assume all request payload was transferred, residual == 0 */ hdr->dout_resid = 0; - if (rq->next_rq) { + if (job->bidi_rq) { unsigned int rsp_len = job->reply_payload.payload_len; if (WARN_ON(job->reply_payload_rcv_len > rsp_len)) @@ -111,6 +140,11 @@ static void bsg_transport_free_rq(struct request *rq) { struct bsg_job *job = blk_mq_rq_to_pdu(rq); + if (job->bidi_rq) { + blk_rq_unmap_user(job->bidi_bio); + blk_put_request(job->bidi_rq); + } + kfree(job->request); } @@ -200,7 +234,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) */ static bool bsg_prepare_job(struct device *dev, struct request *req) { - struct request *rsp = req->next_rq; struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; @@ -211,8 +244,8 @@ static bool bsg_prepare_job(struct device *dev, struct request *req) if (ret) goto failjob_rls_job; } - if (rsp && rsp->bio) { - ret = bsg_map_buffer(&job->reply_payload, rsp); + if (job->bidi_rq) { + ret = bsg_map_buffer(&job->reply_payload, job->bidi_rq); if (ret) goto failjob_rls_rqst_payload; } @@ -369,7 +402,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, } q->queuedata = dev; - blk_queue_flag_set(QUEUE_FLAG_BIDI, q); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); ret = bsg_register_queue(q, dev, name, &bsg_transport_ops); diff --git a/block/bsg.c b/block/bsg.c index a799b0ace55c..f306853c6b08 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -74,6 +74,11 @@ static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr, { struct scsi_request *sreq = scsi_req(rq); + if (hdr->dout_xfer_len && hdr->din_xfer_len) { + pr_warn_once("BIDI support in bsg has been removed.\n"); + return -EOPNOTSUPP; + } + sreq->cmd_len = hdr->request_len; if (sreq->cmd_len > BLK_MAX_CDB) { sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL); @@ -114,14 +119,10 @@ static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr) hdr->response_len = len; } - if (rq->next_rq) { - hdr->dout_resid = sreq->resid_len; - hdr->din_resid = scsi_req(rq->next_rq)->resid_len; - } else if (rq_data_dir(rq) == READ) { + if (rq_data_dir(rq) == READ) hdr->din_resid = sreq->resid_len; - } else { + else hdr->dout_resid = sreq->resid_len; - } return ret; } @@ -140,8 +141,8 @@ static const struct bsg_ops bsg_scsi_ops = { static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) { - struct request *rq, *next_rq = NULL; - struct bio *bio, *bidi_bio = NULL; + struct request *rq; + struct bio *bio; struct sg_io_v4 hdr; int ret; @@ -164,7 +165,7 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) ret = q->bsg_dev.ops->fill_hdr(rq, &hdr, mode); if (ret) - goto out; + return ret; rq->timeout = msecs_to_jiffies(hdr.timeout); if (!rq->timeout) @@ -174,29 +175,6 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) if (rq->timeout < BLK_MIN_SG_TIMEOUT) rq->timeout = BLK_MIN_SG_TIMEOUT; - if (hdr.dout_xfer_len && hdr.din_xfer_len) { - if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) { - ret = -EOPNOTSUPP; - goto out; - } - - pr_warn_once( - "BIDI support in bsg has been deprecated and might be removed. " - "Please report your use case to linux-scsi@vger.kernel.org\n"); - - next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); - if (IS_ERR(next_rq)) { - ret = PTR_ERR(next_rq); - goto out; - } - - rq->next_rq = next_rq; - ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr.din_xferp), - hdr.din_xfer_len, GFP_KERNEL); - if (ret) - goto out_free_nextrq; - } - if (hdr.dout_xfer_len) { ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr.dout_xferp), hdr.dout_xfer_len, GFP_KERNEL); @@ -206,38 +184,20 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) } if (ret) - goto out_unmap_nextrq; + goto out_free_rq; bio = rq->bio; - if (rq->next_rq) - bidi_bio = rq->next_rq->bio; blk_execute_rq(q, NULL, rq, !(hdr.flags & BSG_FLAG_Q_AT_TAIL)); ret = rq->q->bsg_dev.ops->complete_rq(rq, &hdr); - - if (rq->next_rq) { - blk_rq_unmap_user(bidi_bio); - blk_put_request(rq->next_rq); - } - blk_rq_unmap_user(bio); + +out_free_rq: rq->q->bsg_dev.ops->free_rq(rq); blk_put_request(rq); - - if (copy_to_user(uarg, &hdr, sizeof(hdr))) + if (!ret && copy_to_user(uarg, &hdr, sizeof(hdr))) return -EFAULT; return ret; - -out_unmap_nextrq: - if (rq->next_rq) - blk_rq_unmap_user(rq->next_rq->bio); -out_free_nextrq: - if (rq->next_rq) - blk_put_request(rq->next_rq); -out: - q->bsg_dev.ops->free_rq(rq); - blk_put_request(rq); - return ret; } static struct bsg_device *bsg_alloc_device(void) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 692b46937e52..60f1a81d2034 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -213,7 +213,6 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) to_sas_host_attrs(shost)->q = q; } - blk_queue_flag_set(QUEUE_FLAG_BIDI, q); return 0; } diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index b356e0006731..7f14517a559b 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -69,6 +69,10 @@ struct bsg_job { int result; unsigned int reply_payload_rcv_len; + /* BIDI support */ + struct request *bidi_rq; + struct bio *bidi_bio; + void *dd_data; /* Used for driver-specific storage */ }; -- cgit From 69ed175c195595c73901e18366cb0ebeaeb68b8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 19:35:11 +0100 Subject: scsi: block: remove req->special No users left. Signed-off-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Martin K. Petersen --- block/blk-mq.c | 1 - drivers/scsi/sd.c | 2 -- include/linux/blkdev.h | 2 -- 3 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ba37b9e15e9..502cbf964a3b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -331,7 +331,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, #if defined(CONFIG_BLK_DEV_INTEGRITY) rq->nr_integrity_segments = 0; #endif - rq->special = NULL; /* tag was already set */ rq->extra_len = 0; WRITE_ONCE(rq->deadline, 0); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3db9b1fe7516..c124459041dc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1171,8 +1171,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) if (ret != BLK_STS_OK) return ret; - WARN_ON_ONCE(cmd != rq->special); - if (!scsi_device_online(sdp) || sdp->changed) { scmd_printk(KERN_ERR, cmd, "device offline or changed\n"); return BLK_STS_IOERR; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 338604dff7d0..fd1450d53f1c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -216,8 +216,6 @@ struct request { unsigned short write_hint; unsigned short ioprio; - void *special; /* opaque pointer available for LLD use */ - unsigned int extra_len; /* length of alignment and padding */ enum mq_rq_state state; -- cgit From 8b3238cabd50e2715b6544e724e74685209b190a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 08:01:10 -0800 Subject: scsi: block: remove bidi support Unused now, and another field in struct request bites the dust. Signed-off-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Martin K. Petersen --- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 3 --- include/linux/blkdev.h | 6 ------ 3 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 90d68760af08..ac832547160a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -115,7 +115,6 @@ static int queue_pm_only_show(void *data, struct seq_file *m) static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STOPPED), QUEUE_FLAG_NAME(DYING), - QUEUE_FLAG_NAME(BIDI), QUEUE_FLAG_NAME(NOMERGES), QUEUE_FLAG_NAME(SAME_COMP), QUEUE_FLAG_NAME(FAIL_IO), diff --git a/block/blk-mq.c b/block/blk-mq.c index 502cbf964a3b..820d131a6893 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -339,7 +339,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->end_io = NULL; rq->end_io_data = NULL; - rq->next_rq = NULL; data->ctx->rq_dispatched[op_is_sync(op)]++; refcount_set(&rq->ref, 1); @@ -549,8 +548,6 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) rq_qos_done(rq->q, rq); rq->end_io(rq, error); } else { - if (unlikely(blk_bidi_rq(rq))) - blk_mq_free_request(rq->next_rq); blk_mq_free_request(rq); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fd1450d53f1c..21beb456b97a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -234,9 +234,6 @@ struct request { */ rq_end_io_fn *end_io; void *end_io_data; - - /* for bidi */ - struct request *next_rq; }; static inline bool blk_op_is_scsi(unsigned int op) @@ -572,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ -#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ @@ -644,8 +640,6 @@ static inline bool blk_account_rq(struct request *rq) return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); } -#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) - #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -- cgit From 5870970b9a828d8693aa6d15742573289d7dbcd0 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:58:39 +0000 Subject: arm64: Fix HCR.TGE status for NMI contexts When using VHE, the host needs to clear HCR_EL2.TGE bit in order to interact with guest TLBs, switching from EL2&0 translation regime to EL1&0. However, some non-maskable asynchronous event could happen while TGE is cleared like SDEI. Because of this address translation operations relying on EL2&0 translation regime could fail (tlb invalidation, userspace access, ...). Fix this by properly setting HCR_EL2.TGE when entering NMI context and clear it if necessary when returning to the interrupted context. Signed-off-by: Julien Thierry Suggested-by: Marc Zyngier Reviewed-by: Marc Zyngier Reviewed-by: James Morse Cc: Arnd Bergmann Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hardirq.h | 31 +++++++++++++++++++++++++++++++ arch/arm64/kernel/irq.c | 3 +++ include/linux/hardirq.h | 7 +++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 1473fc2f7ab7..89691c86640a 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -17,8 +17,12 @@ #define __ASM_HARDIRQ_H #include +#include #include +#include #include +#include +#include #define NR_IPI 7 @@ -37,6 +41,33 @@ u64 smp_irq_stat_cpu(unsigned int cpu); #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 +struct nmi_ctx { + u64 hcr; +}; + +DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); + +#define arch_nmi_enter() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + nmi_ctx->hcr = read_sysreg(hcr_el2); \ + if (!(nmi_ctx->hcr & HCR_TGE)) { \ + write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + } \ + } while (0) + +#define arch_nmi_exit() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + if (!(nmi_ctx->hcr & HCR_TGE)) \ + write_sysreg(nmi_ctx->hcr, hcr_el2); \ + } \ + } while (0) + static inline void ack_bad_irq(unsigned int irq) { extern unsigned long irq_err_count; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 780a12f59a8f..92fa81798fb9 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -33,6 +33,9 @@ unsigned long irq_err_count; +/* Only access this in an NMI enter/exit */ +DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); + DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 0fbbcdf0c178..da0af631ded5 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -60,8 +60,14 @@ extern void irq_enter(void); */ extern void irq_exit(void); +#ifndef arch_nmi_enter +#define arch_nmi_enter() do { } while (0) +#define arch_nmi_exit() do { } while (0) +#endif + #define nmi_enter() \ do { \ + arch_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ @@ -80,6 +86,7 @@ extern void irq_exit(void); ftrace_nmi_exit(); \ lockdep_on(); \ printk_nmi_exit(); \ + arch_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit From 13b210ddf474d9f3368766008a89fe82a6f90b48 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:58:49 +0000 Subject: efi: Let architectures decide the flags that should be saved/restored Currently, irqflags are saved before calling runtime services and checked for mismatch on return. Provide a pair of overridable macros to save and restore (if needed) the state that need to be preserved on return from a runtime service. This allows to check for flags that are not necesarly related to irqflags. Signed-off-by: Julien Thierry Acked-by: Catalin Marinas Acked-by: Ard Biesheuvel Acked-by: Marc Zyngier Cc: Ard Biesheuvel Cc: linux-efi@vger.kernel.org Signed-off-by: Catalin Marinas --- drivers/firmware/efi/runtime-wrappers.c | 17 +++++++++++++++-- include/linux/efi.h | 5 +++-- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 8903b9ccfc2b..c70df5ae7c4a 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -89,11 +89,24 @@ exit: \ efi_rts_work.status; \ }) +#ifndef arch_efi_save_flags +#define arch_efi_save_flags(state_flags) local_save_flags(state_flags) +#define arch_efi_restore_flags(state_flags) local_irq_restore(state_flags) +#endif + +unsigned long efi_call_virt_save_flags(void) +{ + unsigned long flags; + + arch_efi_save_flags(flags); + return flags; +} + void efi_call_virt_check_flags(unsigned long flags, const char *call) { unsigned long cur_flags, mismatch; - local_save_flags(cur_flags); + cur_flags = efi_call_virt_save_flags(); mismatch = flags ^ cur_flags; if (!WARN_ON_ONCE(mismatch & ARCH_EFI_IRQ_FLAGS_MASK)) @@ -102,7 +115,7 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call) add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE); pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n", flags, cur_flags, call); - local_irq_restore(flags); + arch_efi_restore_flags(flags); } /* diff --git a/include/linux/efi.h b/include/linux/efi.h index 45ff763fba76..bd80b7ec35db 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1607,6 +1607,7 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, bool efi_runtime_disabled(void); extern void efi_call_virt_check_flags(unsigned long flags, const char *call); +extern unsigned long efi_call_virt_save_flags(void); enum efi_secureboot_mode { efi_secureboot_mode_unset, @@ -1652,7 +1653,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); \ arch_efi_call_virt_setup(); \ \ - local_save_flags(__flags); \ + __flags = efi_call_virt_save_flags(); \ __s = arch_efi_call_virt(p, f, args); \ efi_call_virt_check_flags(__flags, __stringify(f)); \ \ @@ -1667,7 +1668,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); \ arch_efi_call_virt_setup(); \ \ - local_save_flags(__flags); \ + __flags = efi_call_virt_save_flags(); \ arch_efi_call_virt(p, f, args); \ efi_call_virt_check_flags(__flags, __stringify(f)); \ \ -- cgit From 840018668ce2d96783356204ff282d6c9b0e5f66 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 31 Jan 2019 11:47:08 -0700 Subject: perf/aux: Make perf_event accessible to setup_aux() When pmu::setup_aux() is called the coresight PMU needs to know which sink to use for the session by looking up the information in the event's attr::config2 field. As such simply replace the cpu information by the complete perf_event structure and change all affected customers. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki Poulouse Acked-by: Peter Zijlstra Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/20190131184714.20388-2-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/s390/kernel/perf_cpum_sf.c | 6 +++--- arch/x86/events/intel/bts.c | 4 +++- arch/x86/events/intel/pt.c | 5 +++-- drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +++--- drivers/perf/arm_spe_pmu.c | 6 +++--- include/linux/perf_event.h | 2 +- kernel/events/ring_buffer.c | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bfabeb1889cc..1266194afb02 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1600,7 +1600,7 @@ static void aux_sdb_init(unsigned long sdb) /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling - * @cpu: On which to allocate, -1 means current + * @event: Event the buffer is setup for, event->cpu == -1 means current * @pages: Array of pointers to buffer pages passed from perf core * @nr_pages: Total pages * @snapshot: Flag for snapshot mode @@ -1612,8 +1612,8 @@ static void aux_sdb_init(unsigned long sdb) * * Return the private AUX buffer structure if success or NULL if fails. */ -static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *aux_buffer_setup(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct sf_buffer *sfb; struct aux_buffer *aux; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index a01ef1b0f883..7cdd7b13bbda 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -77,10 +77,12 @@ static size_t buf_size(struct page *page) } static void * -bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) +bts_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { struct bts_buffer *buf; struct page *page; + int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 9494ca68fd9d..c0e86ff21f81 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, * Return: Our private PT buffer structure. */ static void * -pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) +pt_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct pt_buffer *buf; - int node, ret; + int node, ret, cpu = event->cpu; if (!nr_pages) return NULL; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index abe8249b893b..f21eb28b6782 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -177,15 +177,15 @@ static void etm_free_aux(void *data) schedule_work(&event_data->work); } -static void *etm_setup_aux(int event_cpu, void **pages, +static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - int cpu; + int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; - event_data = alloc_event_data(event_cpu); + event_data = alloc_event_data(cpu); if (!event_data) return NULL; INIT_WORK(&event_data->work, free_event_data); diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 8e46a9dad2fa..7cb766dafe85 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -824,10 +824,10 @@ static void arm_spe_pmu_read(struct perf_event *event) { } -static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { - int i; + int i, cpu = event->cpu; struct page **pglist; struct arm_spe_pmu_buf *buf; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6cb5d483ab34..d9c3610e0e25 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -410,7 +410,7 @@ struct pmu { /* * Set up pmu-private data structures for an AUX area */ - void *(*setup_aux) (int cpu, void **pages, + void *(*setup_aux) (struct perf_event *event, void **pages, int nr_pages, bool overwrite); /* optional */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 805f0423ee0b..70ae2422cbaf 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -657,7 +657,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, goto out; } - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) goto out; -- cgit From bb8e370bdc141ddff526e5e5ee74210c91fee0b8 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 31 Jan 2019 11:47:09 -0700 Subject: coresight: perf: Add "sinks" group to PMU directory Add a "sinks" directory entry so that users can see all the sinks available in the system in a single place. Individual sink are added as they are registered with the coresight bus. Committer tests: Test built on a ubuntu 18.04 container with a cross build environment to arm64, the new field is there, need to find a machine with this feature to do further testing in the future. root@d15263e5734a:/git/perf# grep CORESIGHT /tmp/build/v5.0-rc2+/.config CONFIG_CORESIGHT=y CONFIG_CORESIGHT_LINKS_AND_SINKS=y CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y CONFIG_CORESIGHT_CATU=y CONFIG_CORESIGHT_SINK_TPIU=y CONFIG_CORESIGHT_SINK_ETBV10=y CONFIG_CORESIGHT_SOURCE_ETM4X=y CONFIG_CORESIGHT_DYNAMIC_REPLICATOR=y CONFIG_CORESIGHT_STM=y CONFIG_CORESIGHT_CPU_DEBUG=m root@d15263e5734a:/git/perf# root@d15263e5734a:/git/perf# file /tmp/build/v5.0-rc2+/drivers/hwtracing/coresight/*.o .../coresight/coresight-catu.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-cpu-debug.mod.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-cpu-debug.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-dynamic-replicator.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etb10.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etm-perf.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etm4x-sysfs.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etm4x.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-funnel.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-replicator.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-stm.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tmc-etf.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tmc-etr.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tmc.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tpiu.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/of_coresight.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped root@d15263e5734a:/git/perf# root@d15263e5734a:/git/perf# pahole -C coresight_device /tmp/build/v5.0-rc2+/drivers/hwtracing/coresight/coresight.o struct coresight_device { struct coresight_connection * conns; /* 0 8 */ int nr_inport; /* 8 4 */ int nr_outport; /* 12 4 */ enum coresight_dev_type type; /* 16 4 */ union coresight_dev_subtype subtype; /* 20 8 */ /* XXX 4 bytes hole, try to pack */ const struct coresight_ops * ops; /* 32 8 */ struct device dev; /* 40 1408 */ /* XXX last struct has 7 bytes of padding */ /* --- cacheline 22 boundary (1408 bytes) was 40 bytes ago --- */ atomic_t * refcnt; /* 1448 8 */ bool orphan; /* 1456 1 */ bool enable; /* 1457 1 */ bool activated; /* 1458 1 */ /* XXX 5 bytes hole, try to pack */ struct dev_ext_attribute * ea; /* 1464 8 */ /* size: 1472, cachelines: 23, members: 12 */ /* sum members: 1463, holes: 2, sum holes: 9 */ /* paddings: 1, sum paddings: 7 */ }; root@d15263e5734a:/git/perf# Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Acked-by: Peter Zijlstra Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/20190131184714.20388-3-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- drivers/hwtracing/coresight/coresight-etm-perf.c | 82 ++++++++++++++++++++++++ drivers/hwtracing/coresight/coresight-etm-perf.h | 6 +- drivers/hwtracing/coresight/coresight.c | 18 ++++++ include/linux/coresight.h | 7 +- 4 files changed, 110 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index f21eb28b6782..cdbdb28dc175 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -43,8 +44,18 @@ static const struct attribute_group etm_pmu_format_group = { .attrs = etm_config_formats_attr, }; +static struct attribute *etm_config_sinks_attr[] = { + NULL, +}; + +static const struct attribute_group etm_pmu_sinks_group = { + .name = "sinks", + .attrs = etm_config_sinks_attr, +}; + static const struct attribute_group *etm_pmu_attr_groups[] = { &etm_pmu_format_group, + &etm_pmu_sinks_group, NULL, }; @@ -479,6 +490,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link) return 0; } +static ssize_t etm_perf_sink_name_show(struct device *dev, + struct device_attribute *dattr, + char *buf) +{ + struct dev_ext_attribute *ea; + + ea = container_of(dattr, struct dev_ext_attribute, attr); + return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); +} + +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ + int ret; + unsigned long hash; + const char *name; + struct device *pmu_dev = etm_pmu.dev; + struct device *pdev = csdev->dev.parent; + struct dev_ext_attribute *ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return -EINVAL; + + if (csdev->ea != NULL) + return -EINVAL; + + if (!etm_perf_up) + return -EPROBE_DEFER; + + ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL); + if (!ea) + return -ENOMEM; + + name = dev_name(pdev); + /* See function coresight_get_sink_by_id() to know where this is used */ + hash = hashlen_hash(hashlen_string(NULL, name)); + + ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL); + if (!ea->attr.attr.name) + return -ENOMEM; + + ea->attr.attr.mode = 0444; + ea->attr.show = etm_perf_sink_name_show; + ea->var = (unsigned long *)hash; + + ret = sysfs_add_file_to_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + + if (!ret) + csdev->ea = ea; + + return ret; +} + +void etm_perf_del_symlink_sink(struct coresight_device *csdev) +{ + struct device *pmu_dev = etm_pmu.dev; + struct dev_ext_attribute *ea = csdev->ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return; + + if (!ea) + return; + + sysfs_remove_file_from_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + csdev->ea = NULL; +} + static int __init etm_perf_init(void) { int ret; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index da7d9336a15c..015213abe00a 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -59,6 +59,8 @@ struct etm_event_data { #ifdef CONFIG_CORESIGHT int etm_perf_symlink(struct coresight_device *csdev, bool link); +int etm_perf_add_symlink_sink(struct coresight_device *csdev); +void etm_perf_del_symlink_sink(struct coresight_device *csdev); static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { struct etm_event_data *data = perf_get_aux(handle); @@ -70,7 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle) #else static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) { return -EINVAL; } - +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ return -EINVAL; } +void etm_perf_del_symlink_sink(struct coresight_device *csdev) {} static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { return NULL; diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 2b0df1a0a8df..d7fa90be6f42 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -18,6 +18,7 @@ #include #include +#include "coresight-etm-perf.h" #include "coresight-priv.h" static DEFINE_MUTEX(coresight_mutex); @@ -1167,6 +1168,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) goto err_out; } + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + ret = etm_perf_add_symlink_sink(csdev); + + if (ret) { + device_unregister(&csdev->dev); + /* + * As with the above, all resources are free'd + * explicitly via coresight_device_release() triggered + * from put_device(), which is in turn called from + * function device_unregister(). + */ + goto err_out; + } + } + mutex_lock(&coresight_mutex); coresight_fixup_device_conns(csdev); @@ -1185,6 +1202,7 @@ EXPORT_SYMBOL_GPL(coresight_register); void coresight_unregister(struct coresight_device *csdev) { + etm_perf_del_symlink_sink(csdev); /* Remove references of that device in the topology */ coresight_remove_conns(csdev); device_unregister(&csdev->dev); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 46c67a764877..7b87965f7a65 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -154,8 +154,9 @@ struct coresight_connection { * @orphan: true if the component has connections that haven't been linked. * @enable: 'true' if component is currently part of an active path. * @activated: 'true' only if a _sink_ has been activated. A sink can be - activated but not yet enabled. Enabling for a _sink_ - happens when a source has been selected for that it. + * activated but not yet enabled. Enabling for a _sink_ + * appens when a source has been selected for that it. + * @ea: Device attribute for sink representation under PMU directory. */ struct coresight_device { struct coresight_connection *conns; @@ -168,7 +169,9 @@ struct coresight_device { atomic_t *refcnt; bool orphan; bool enable; /* true only if configured as part of a path */ + /* sink specific fields */ bool activated; /* true only if a sink is part of a path */ + struct dev_ext_attribute *ea; }; #define to_coresight_device(d) container_of(d, struct coresight_device, dev) -- cgit From 5f02a877638472e83cb5e335f9eec27052b1c7c2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:28 +0200 Subject: fsnotify: annotate directory entry modification events "dirent" events are referring to events that modify directory entries, such as create,delete,rename. Those events should always be reported on a watched directory, regardless if FS_EVENT_ON_CHILD is set on the watch mask. fsnotify_nameremove() and fsnotify_move() were modified to no longer set the FS_EVENT_ON_CHILD event bit. This is a semantic change to align with the "dirent" event definition. It has no effect on any existing backend, because dnotify, inotify and audit always requets the child events and fanotify does not get the delete,rename events. The fsnotify_dirent() helper is used instead of fsnotify_parent() to report a dirent event to dentry->d_parent without FS_EVENT_ON_CHILD and regardless if parent has the FS_EVENT_ON_CHILD bit set. Unlike fsnotify_parent(), fsnotify_dirent() assumes that dentry->d_name and dentry->d_parent are stable. For fsnotify_create()/fsnotify_mkdir(), this assumption is abviously correct. For fsnotify_nameremove(), it is less trivial, so we use dget_parent() and take_dentry_name_snapshot() to grab stable references. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 49 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 2ccb08cb5d6a..39b22e88423d 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,8 +17,22 @@ #include #include +/* + * Notify this @dir inode about a change in the directory entry @dentry. + * + * Unlike fsnotify_parent(), the event will be reported regardless of the + * FS_EVENT_ON_CHILD mask on the parent inode. + */ +static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry, + __u32 mask) +{ + return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + dentry->d_name.name, 0); +} + /* Notify this dentry's parent about a child's events. */ -static inline int fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(const struct path *path, + struct dentry *dentry, __u32 mask) { if (!dentry) dentry = path->dentry; @@ -85,8 +99,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, { struct inode *source = moved->d_inode; u32 fs_cookie = fsnotify_get_cookie(); - __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM); - __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO); + __u32 old_dir_mask = FS_MOVED_FROM; + __u32 new_dir_mask = FS_MOVED_TO; const unsigned char *new_name = moved->d_name.name; if (old_dir == new_dir) @@ -128,15 +142,35 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) /* * fsnotify_nameremove - a filename was removed from a directory + * + * This is mostly called under parent vfs inode lock so name and + * dentry->d_parent should be stable. However there are some corner cases where + * inode lock is not held. So to be on the safe side and be reselient to future + * callers and out of tree users of d_delete(), we do not assume that d_parent + * and d_name are stable and we use dget_parent() and + * take_dentry_name_snapshot() to grab stable references. */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + struct dentry *parent; + struct name_snapshot name; __u32 mask = FS_DELETE; + /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ + if (IS_ROOT(dentry)) + return; + if (isdir) mask |= FS_ISDIR; - fsnotify_parent(NULL, dentry, mask); + parent = dget_parent(dentry); + take_dentry_name_snapshot(&name, dentry); + + fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + name.name, 0); + + release_dentry_name_snapshot(&name); + dput(parent); } /* @@ -155,7 +189,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE); } /* @@ -176,12 +210,9 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { - __u32 mask = (FS_CREATE | FS_ISDIR); - struct inode *d_inode = dentry->d_inode; - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); } /* -- cgit From e220140ff6241e180d0c2fc294e61ee6bbc6a18e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:29 +0200 Subject: fsnotify: remove dirent events from FS_EVENTS_POSS_ON_CHILD mask "dirent" events are referring to events that modify directory entries, such as create,delete,rename. Those events are always be reported on a watched directory, regardless if FS_EVENT_ON_CHILD is set on the watch mask. ALL_FSNOTIFY_DIRENT_EVENTS defines all the dirent event types and those event types are removed from FS_EVENTS_POSS_ON_CHILD. That means for a directory with an inotify watch and only dirent events in the mask (i.e. create,delete,move), all children dentries will no longer have the DCACHE_FSNOTIFY_PARENT_WATCHED flag set. This will allow all events that happen on children to be optimized away in __fsnotify_parent() without the need to dereference child->d_parent->d_inode->i_fsnotify_mask. Since the dirent events are never repoted via __fsnotify_parent(), this results in no change of logic, but only an optimization. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7639774e7475..7f195d43efaf 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -59,27 +59,33 @@ * dnotify and inotify. */ #define FS_EVENT_ON_CHILD 0x08000000 -/* This is a list of all events that may get sent to a parernt based on fs event - * happening to inodes inside that directory */ -#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) - #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) +/* + * Directory entry modification events - reported only to directory + * where entry is modified and not to a watching parent. + * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event + * when a directory entry inside a child subdir changes. + */ +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) + #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) +/* + * This is a list of all events that may get sent to a parent based on fs event + * happening to inodes inside that directory. + */ +#define FS_EVENTS_POSS_ON_CHILD (ALL_FSNOTIFY_PERM_EVENTS | \ + FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ + FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \ + FS_OPEN | FS_OPEN_EXEC) + /* Events that can be reported to backends */ -#define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ - FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ - FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) +#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ + FS_EVENTS_POSS_ON_CHILD | \ + FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ + FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ -- cgit From a0a92d261f2922f4b5d2c0a98d6c41a89c7f5edd Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:31 +0200 Subject: fsnotify: move mask out of struct fsnotify_event Common fsnotify_event helpers have no need for the mask field. It is only used by backend code, so move the field out of the abstract fsnotify_event struct and into the concrete backend event structs. This change packs struct inotify_event_info better on 64bit machine and will allow us to cram some more fields into struct fanotify_event_info. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 11 +++++++---- fs/notify/fanotify/fanotify.h | 1 + fs/notify/fanotify/fanotify_user.c | 10 +++++----- fs/notify/inotify/inotify.h | 1 + fs/notify/inotify/inotify_fsnotify.c | 9 +++++---- fs/notify/inotify/inotify_user.c | 5 +++-- fs/notify/notification.c | 22 +--------------------- include/linux/fsnotify_backend.h | 10 ++++++---- 8 files changed, 29 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 3723f3d18d20..98197802bbfb 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -36,20 +36,22 @@ static bool should_merge(struct fsnotify_event *old_fsn, static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { struct fsnotify_event *test_event; + struct fanotify_event_info *new; pr_debug("%s: list=%p event=%p\n", __func__, list, event); + new = FANOTIFY_E(event); /* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response. */ - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(new->mask)) return 0; list_for_each_entry_reverse(test_event, list, list) { if (should_merge(test_event, event)) { - test_event->mask |= event->mask; + FANOTIFY_E(test_event)->mask |= new->mask; return 1; } } @@ -173,7 +175,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, if (!event) goto out; init: __maybe_unused - fsnotify_init_event(&event->fse, inode, mask); + fsnotify_init_event(&event->fse, inode); + event->mask = mask; if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) event->pid = get_pid(task_pid(current)); else @@ -280,7 +283,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); path_put(&event->path); put_pid(event->pid); - if (fanotify_is_perm_event(fsn_event->mask)) { + if (fanotify_is_perm_event(event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); return; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index ea05b8a401e7..e630d787d4c3 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -14,6 +14,7 @@ extern struct kmem_cache *fanotify_perm_event_cachep; */ struct fanotify_event_info { struct fsnotify_event fse; + u32 mask; /* * We hold ref to this path so it may be dereferenced at any point * during this object's lifetime diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9c870b0d2b56..dea47d07cc29 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -131,9 +131,9 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->reserved = 0; - metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata->mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->pid); - if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) + if (unlikely(event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; else { metadata->fd = create_fd(group, event, file); @@ -230,7 +230,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, fanotify_event_metadata.event_len)) goto out_close_fd; - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(FANOTIFY_E(event)->mask)) FANOTIFY_PE(event)->fd = fd; if (fd != FAN_NOFD) @@ -316,7 +316,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!fanotify_is_perm_event(kevent->mask)) { + if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { fsnotify_destroy_event(group, kevent); } else { if (ret <= 0) { @@ -401,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ while (!fsnotify_notify_queue_is_empty(group)) { fsn_event = fsnotify_remove_first_event(group); - if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) { + if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); spin_lock(&group->notification_lock); diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7e4578d35b61..74ae60305189 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -5,6 +5,7 @@ struct inotify_event_info { struct fsnotify_event fse; + u32 mask; int wd; u32 sync_cookie; int name_len; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index f4184b4f3815..fe97299975f2 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -43,11 +43,11 @@ static bool event_compare(struct fsnotify_event *old_fsn, { struct inotify_event_info *old, *new; - if (old_fsn->mask & FS_IN_IGNORED) - return false; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); - if ((old_fsn->mask == new_fsn->mask) && + if (old->mask & FS_IN_IGNORED) + return false; + if ((old->mask == new->mask) && (old_fsn->inode == new_fsn->inode) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) @@ -114,7 +114,8 @@ int inotify_handle_event(struct fsnotify_group *group, } fsn_event = &event->fse; - fsnotify_init_event(fsn_event, inode, mask); + fsnotify_init_event(fsn_event, inode); + event->mask = mask; event->wd = i_mark->wd; event->sync_cookie = cookie; event->name_len = len; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 798f1253141a..e2901fbb9f76 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -189,7 +189,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; - inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); + inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; @@ -634,7 +634,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + fsnotify_init_event(group->overflow_event, NULL); + oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 3c3e36745f59..027d5d5bb90e 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -71,7 +71,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event) { /* Overflow events are per-group and we don't want to free them */ - if (!event || event->mask == FS_Q_OVERFLOW) + if (!event || event == group->overflow_event) return; /* * If the event is still queued, we have a problem... Do an unreliable @@ -194,23 +194,3 @@ void fsnotify_flush_notify(struct fsnotify_group *group) } spin_unlock(&group->notification_lock); } - -/* - * fsnotify_create_event - Allocate a new event which will be sent to each - * group's handle_event function if the group was interested in this - * particular event. - * - * @inode the inode which is supposed to receive the event (sometimes a - * parent of the inode to which the event happened. - * @mask what actually happened. - * @data pointer to the object which was actually affected - * @data_type flag indication if the data is a file, path, inode, nothing... - * @name the filename, if available - */ -void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode, - u32 mask) -{ - INIT_LIST_HEAD(&event->list); - event->inode = inode; - event->mask = mask; -} diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7f195d43efaf..1e4b88bd1443 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -135,7 +135,6 @@ struct fsnotify_event { struct list_head list; /* inode may ONLY be dereferenced during handle_event(). */ struct inode *inode; /* either the inode the event happened to or its parent */ - u32 mask; /* the type of access, bitwise OR for FS_* event types */ }; /* @@ -485,9 +484,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); -/* put here because inotify does some weird stuff when destroying watches */ -extern void fsnotify_init_event(struct fsnotify_event *event, - struct inode *to_tell, u32 mask); +static inline void fsnotify_init_event(struct fsnotify_event *event, + struct inode *inode) +{ + INIT_LIST_HEAD(&event->list); + event->inode = inode; +} #else -- cgit From d325c402964e7c63db94e9138c530832269a1297 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Fri, 28 Dec 2018 14:38:47 +0100 Subject: ring-buffer: Remove unused function ring_buffer_page_len() Commit 6b7e633fe9c2 ("tracing: Remove extra zeroing out of the ring buffer page") removed the only caller of ring_buffer_page_len(). The function is now unused and may be removed. Link: http://lkml.kernel.org/r/20181228133847.106177-1-mbenes@suse.cz Signed-off-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 2 -- kernel/trace/ring_buffer.c | 14 -------------- 2 files changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 5b9ae62272bb..f1429675f252 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -187,8 +187,6 @@ void ring_buffer_set_clock(struct ring_buffer *buffer, void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); -size_t ring_buffer_page_len(void *page); - size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 06e864a334bb..9a91479bbbfe 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -353,20 +353,6 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } -/** - * ring_buffer_page_len - the size of data on the page. - * @page: The page to read - * - * Returns the amount of data on the page, including buffer page header. - */ -size_t ring_buffer_page_len(void *page) -{ - struct buffer_data_page *bpage = page; - - return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) - + BUF_PAGE_HDR_SIZE; -} - /* * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing * this issue out. -- cgit From fd9dc93e36231fb6d520e0edd467058fad4fd12d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 6 Feb 2019 13:07:11 -0500 Subject: XArray: Change xa_insert to return -EBUSY Userspace translates EEXIST to "File exists" which isn't a very good error message for the problem. "Device or resource busy" is a better indication of what went wrong. Signed-off-by: Matthew Wilcox --- Documentation/core-api/xarray.rst | 2 +- fs/nilfs2/btnode.c | 2 +- include/linux/xarray.h | 6 +++--- lib/test_xarray.c | 4 ++-- lib/xarray.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index 5d54b27c6eba..42bb1a62650f 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -85,7 +85,7 @@ which was at that index; if it returns the same entry which was passed as If you want to only store a new entry to an index if the current entry at that index is ``NULL``, you can use :c:func:`xa_insert` which -returns ``-EEXIST`` if the entry is not empty. +returns ``-EBUSY`` if the entry is not empty. You can enquire whether a mark is set on an entry by using :c:func:`xa_get_mark`. If the entry is not ``NULL``, you can set a mark diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index f2129a5d9f23..4391fd3abd8f 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -189,7 +189,7 @@ retry: */ if (!err) return 0; - else if (err != -EEXIST) + else if (err != -EBUSY) goto failed_unlock; err = invalidate_inode_pages2_range(btnc, newkey, newkey); diff --git a/include/linux/xarray.h b/include/linux/xarray.h index e11841537631..57cf35c4d094 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -664,7 +664,7 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index, * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int xa_insert(struct xarray *xa, unsigned long index, @@ -693,7 +693,7 @@ static inline int xa_insert(struct xarray *xa, unsigned long index, * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int xa_insert_bh(struct xarray *xa, unsigned long index, @@ -722,7 +722,7 @@ static inline int xa_insert_bh(struct xarray *xa, unsigned long index, * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int xa_insert_irq(struct xarray *xa, unsigned long index, diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 671a93ee09e6..9d894e93456c 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -346,7 +346,7 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL); - XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST); + XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE); @@ -388,7 +388,7 @@ static noinline void check_reserve(struct xarray *xa) /* But xa_insert does not */ xa_reserve(xa, 12345678, GFP_KERNEL); XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != - -EEXIST); + -EBUSY); XA_BUG_ON(xa, xa_empty(xa)); XA_BUG_ON(xa, xa_erase(xa, 12345678) != NULL); XA_BUG_ON(xa, !xa_empty(xa)); diff --git a/lib/xarray.c b/lib/xarray.c index fb783bf2a441..1b97ca58bd15 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1451,7 +1451,7 @@ EXPORT_SYMBOL(__xa_cmpxchg); * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) @@ -1471,7 +1471,7 @@ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) if (xa_track_free(xa)) xas_clear_mark(&xas, XA_FREE_MARK); } else { - xas_set_err(&xas, -EEXIST); + xas_set_err(&xas, -EBUSY); } } while (__xas_nomem(&xas, gfp)); -- cgit From 3ccaf57a6a63ad171a951dcaddffc453b2414c7b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 14:43:22 -0400 Subject: XArray: Add support for 1s-based allocation A lot of places want to allocate IDs starting at 1 instead of 0. While the xa_alloc() API supports this, it's not very efficient if lots of IDs are allocated, due to having to walk down to the bottom of the tree to see if ID 1 is available, then all the way over to the next non-allocated ID. This method marks ID 0 as being occupied which wastes one slot in the XArray, but preserves xa_empty() as working. Signed-off-by: Matthew Wilcox --- Documentation/core-api/xarray.rst | 10 +++-- include/linux/xarray.h | 14 ++++++- lib/test_xarray.c | 88 ++++++++++++++++++++++++--------------- lib/xarray.c | 11 +++++ 4 files changed, 86 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index 42bb1a62650f..e90c4925cd37 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -131,17 +131,21 @@ If you use :c:func:`DEFINE_XARRAY_ALLOC` to define the XArray, or initialise it by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`, the XArray changes to track whether entries are in use or not. -You can call :c:func:`xa_alloc` to store the entry at any unused index +You can call :c:func:`xa_alloc` to store the entry at an unused index in the XArray. If you need to modify the array from interrupt context, you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable interrupts while allocating the ID. -Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert` -will mark the entry as being allocated. Unlike a normal XArray, storing +Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert` will +also mark the entry as being allocated. Unlike a normal XArray, storing ``NULL`` will mark the entry as being in use, like :c:func:`xa_reserve`. To free an entry, use :c:func:`xa_erase` (or :c:func:`xa_release` if you only want to free the entry if it's ``NULL``). +By default, the lowest free entry is allocated starting from 0. If you +want to allocate entries starting at 1, it is more efficient to use +:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``. + You cannot use ``XA_MARK_0`` with an allocating XArray as this mark is used to track whether an entry is free or not. The other marks are available for your use. diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 57cf35c4d094..99dd0838b4ba 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -220,10 +220,13 @@ enum xa_lock_type { #define XA_FLAGS_LOCK_IRQ ((__force gfp_t)XA_LOCK_IRQ) #define XA_FLAGS_LOCK_BH ((__force gfp_t)XA_LOCK_BH) #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) +#define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) +/* ALLOC is for a normal 0-based alloc. ALLOC1 is for an 1-based alloc */ #define XA_FLAGS_ALLOC (XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK)) +#define XA_FLAGS_ALLOC1 (XA_FLAGS_TRACK_FREE | XA_FLAGS_ZERO_BUSY) /** * struct xarray - The anchor of the XArray. @@ -279,7 +282,7 @@ struct xarray { #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0) /** - * DEFINE_XARRAY_ALLOC() - Define an XArray which can allocate IDs. + * DEFINE_XARRAY_ALLOC() - Define an XArray which allocates IDs starting at 0. * @name: A string that names your XArray. * * This is intended for file scope definitions of allocating XArrays. @@ -287,6 +290,15 @@ struct xarray { */ #define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC) +/** + * DEFINE_XARRAY_ALLOC1() - Define an XArray which allocates IDs starting at 1. + * @name: A string that names your XArray. + * + * This is intended for file scope definitions of allocating XArrays. + * See also DEFINE_XARRAY(). + */ +#define DEFINE_XARRAY_ALLOC1(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC1) + void *xa_load(struct xarray *, unsigned long index); void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *xa_erase(struct xarray *, unsigned long index); diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 9d894e93456c..cd74f8f32abe 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -589,64 +589,86 @@ static noinline void check_multi_store(struct xarray *xa) #endif } -static DEFINE_XARRAY_ALLOC(xa0); - -static noinline void check_xa_alloc(void) +static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) { int i; u32 id; - /* An empty array should assign 0 to the first alloc */ - xa_alloc_index(&xa0, 0, GFP_KERNEL); + XA_BUG_ON(xa, !xa_empty(xa)); + /* An empty array should assign %base to the first alloc */ + xa_alloc_index(xa, base, GFP_KERNEL); /* Erasing it should make the array empty again */ - xa_erase_index(&xa0, 0); - XA_BUG_ON(&xa0, !xa_empty(&xa0)); + xa_erase_index(xa, base); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* And it should assign %base again */ + xa_alloc_index(xa, base, GFP_KERNEL); + + /* Allocating and then erasing a lot should not lose base */ + for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++) + xa_alloc_index(xa, i, GFP_KERNEL); + for (i = base; i < 2 * XA_CHUNK_SIZE; i++) + xa_erase_index(xa, i); + xa_alloc_index(xa, base, GFP_KERNEL); + + /* Destroying the array should do the same as erasing */ + xa_destroy(xa); - /* And it should assign 0 again */ - xa_alloc_index(&xa0, 0, GFP_KERNEL); + /* And it should assign %base again */ + xa_alloc_index(xa, base, GFP_KERNEL); - /* The next assigned ID should be 1 */ - xa_alloc_index(&xa0, 1, GFP_KERNEL); - xa_erase_index(&xa0, 1); + /* The next assigned ID should be base+1 */ + xa_alloc_index(xa, base + 1, GFP_KERNEL); + xa_erase_index(xa, base + 1); /* Storing a value should mark it used */ - xa_store_index(&xa0, 1, GFP_KERNEL); - xa_alloc_index(&xa0, 2, GFP_KERNEL); + xa_store_index(xa, base + 1, GFP_KERNEL); + xa_alloc_index(xa, base + 2, GFP_KERNEL); - /* If we then erase 0, it should be free */ - xa_erase_index(&xa0, 0); - xa_alloc_index(&xa0, 0, GFP_KERNEL); + /* If we then erase base, it should be free */ + xa_erase_index(xa, base); + xa_alloc_index(xa, base, GFP_KERNEL); - xa_erase_index(&xa0, 1); - xa_erase_index(&xa0, 2); + xa_erase_index(xa, base + 1); + xa_erase_index(xa, base + 2); for (i = 1; i < 5000; i++) { - xa_alloc_index(&xa0, i, GFP_KERNEL); + xa_alloc_index(xa, base + i, GFP_KERNEL); } - xa_destroy(&xa0); + xa_destroy(xa); + /* Check that we fail properly at the limit of allocation */ id = 0xfffffffeU; - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, id != 0xfffffffeU); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, id != 0xfffffffeU); + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, id != 0xffffffffU); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, id != 0xffffffffU); + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(&xa0, id != 0xffffffffU); - xa_destroy(&xa0); + XA_BUG_ON(xa, id != 0xffffffffU); + xa_destroy(xa); id = 10; - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(&xa0, xa_store_index(&xa0, 3, GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id), + XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); + XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), GFP_KERNEL) != -ENOSPC); - xa_erase_index(&xa0, 3); - XA_BUG_ON(&xa0, !xa_empty(&xa0)); + xa_erase_index(xa, 3); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static DEFINE_XARRAY_ALLOC(xa0); +static DEFINE_XARRAY_ALLOC1(xa1); + +static noinline void check_xa_alloc(void) +{ + check_xa_alloc_1(&xa0, 0); + check_xa_alloc_1(&xa1, 1); } static noinline void __check_store_iter(struct xarray *xa, unsigned long start, diff --git a/lib/xarray.c b/lib/xarray.c index 1b97ca58bd15..468fb7b7963f 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -57,6 +57,11 @@ static inline bool xa_track_free(const struct xarray *xa) return xa->xa_flags & XA_FLAGS_TRACK_FREE; } +static inline bool xa_zero_busy(const struct xarray *xa) +{ + return xa->xa_flags & XA_FLAGS_ZERO_BUSY; +} + static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) { if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) @@ -432,6 +437,8 @@ static void xas_shrink(struct xa_state *xas) break; if (!xa_is_node(entry) && node->shift) break; + if (xa_is_zero(entry) && xa_zero_busy(xa)) + entry = NULL; xas->xa_node = XAS_BOUNDS; RCU_INIT_POINTER(xa->xa_head, entry); @@ -628,6 +635,8 @@ static void *xas_create(struct xa_state *xas, bool allow_root) if (xas_top(node)) { entry = xa_head_locked(xa); xas->xa_node = NULL; + if (!entry && xa_zero_busy(xa)) + entry = XA_ZERO_ENTRY; shift = xas_expand(xas, entry); if (shift < 0) return NULL; @@ -1942,6 +1951,8 @@ void xa_destroy(struct xarray *xa) entry = xa_head_locked(xa); RCU_INIT_POINTER(xa->xa_head, NULL); xas_init_marks(&xas); + if (xa_zero_busy(xa)) + xa_mark_clear(xa, XA_FREE_MARK); /* lockdep checks we're still holding the lock in xas_free_nodes() */ if (xa_is_node(entry)) xas_free_nodes(&xas, xa_to_node(entry)); -- cgit From a3e4d3f97ec844de005a679585c04c5c03dfbdb6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 31 Dec 2018 10:41:01 -0500 Subject: XArray: Redesign xa_alloc API It was too easy to forget to initialise the start index. Add an xa_limit data structure which can be used to pass min & max, and define a couple of special values for common cases. Also add some more tests cribbed from the IDR test suite. Change the return value from -ENOSPC to -EBUSY to match xa_insert(). Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 80 +++++++++++++++++++++++++++++----------------- lib/test_xarray.c | 86 ++++++++++++++++++++++++++++++++++++++++---------- lib/xarray.c | 29 ++++++++--------- 3 files changed, 135 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 99dd0838b4ba..883bb958e462 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -200,6 +200,27 @@ static inline int xa_err(void *entry) return 0; } +/** + * struct xa_limit - Represents a range of IDs. + * @min: The lowest ID to allocate (inclusive). + * @max: The maximum ID to allocate (inclusive). + * + * This structure is used either directly or via the XA_LIMIT() macro + * to communicate the range of IDs that are valid for allocation. + * Two common ranges are predefined for you: + * * xa_limit_32b - [0 - UINT_MAX] + * * xa_limit_31b - [0 - INT_MAX] + */ +struct xa_limit { + u32 max; + u32 min; +}; + +#define XA_LIMIT(_min, _max) (struct xa_limit) { .min = _min, .max = _max } + +#define xa_limit_32b XA_LIMIT(0, UINT_MAX) +#define xa_limit_31b XA_LIMIT(0, INT_MAX) + typedef unsigned __bitwise xa_mark_t; #define XA_MARK_0 ((__force xa_mark_t)0U) #define XA_MARK_1 ((__force xa_mark_t)1U) @@ -476,7 +497,8 @@ void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, void *entry, gfp_t); int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t); -int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t); +int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry, + struct xa_limit, gfp_t); int __xa_reserve(struct xarray *, unsigned long index, gfp_t); void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -753,26 +775,26 @@ static inline int xa_insert_irq(struct xarray *xa, unsigned long index, * xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). * @entry: New entry. + * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * - * Context: Process context. Takes and releases the xa_lock. May sleep if + * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, - gfp_t gfp) +static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock(xa); - err = __xa_alloc(xa, id, max, entry, gfp); + err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock(xa); return err; @@ -782,26 +804,26 @@ static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, * xa_alloc_bh() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). * @entry: New entry. + * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry, - gfp_t gfp) +static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock_bh(xa); - err = __xa_alloc(xa, id, max, entry, gfp); + err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_bh(xa); return err; @@ -811,26 +833,26 @@ static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry, * xa_alloc_irq() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). * @entry: New entry. + * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry, - gfp_t gfp) +static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock_irq(xa); - err = __xa_alloc(xa, id, max, entry, gfp); + err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_irq(xa); return err; diff --git a/lib/test_xarray.c b/lib/test_xarray.c index cd74f8f32abe..b5a6b981454d 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -40,9 +40,9 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { - u32 id = 0; + u32 id; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(index), + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(index), xa_limit_32b, gfp) != 0); XA_BUG_ON(xa, id != index); } @@ -640,28 +640,81 @@ static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) xa_destroy(xa); /* Check that we fail properly at the limit of allocation */ - id = 0xfffffffeU; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX - 1), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), GFP_KERNEL) != 0); XA_BUG_ON(xa, id != 0xfffffffeU); - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), GFP_KERNEL) != 0); XA_BUG_ON(xa, id != 0xffffffffU); - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(xa, id != 0xffffffffU); + id = 3; + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(0), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), + GFP_KERNEL) != -EBUSY); + XA_BUG_ON(xa, id != 3); xa_destroy(xa); - id = 10; - XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), + GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); - XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), + GFP_KERNEL) != -EBUSY); xa_erase_index(xa, 3); XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) +{ + unsigned int i, id; + unsigned long index; + void *entry; + + /* Allocate and free a NULL and check xa_empty() behaves */ + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_erase(xa, id) != NULL); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Ditto, but check destroy instead of erase */ + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_empty(xa)); + xa_destroy(xa); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = base; i < base + 10; i++) { + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, + GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != i); + } + + XA_BUG_ON(xa, xa_store(xa, 3, xa_mk_index(3), GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store(xa, 4, xa_mk_index(4), GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store(xa, 4, NULL, GFP_KERNEL) != xa_mk_index(4)); + XA_BUG_ON(xa, xa_erase(xa, 5) != NULL); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 5); + + xa_for_each(xa, index, entry) { + xa_erase_index(xa, index); + } + + for (i = base; i < base + 9; i++) { + XA_BUG_ON(xa, xa_erase(xa, i) != NULL); + XA_BUG_ON(xa, xa_empty(xa)); + } + XA_BUG_ON(xa, xa_erase(xa, 8) != NULL); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_erase(xa, base + 9) != NULL); + XA_BUG_ON(xa, !xa_empty(xa)); + + xa_destroy(xa); +} + static DEFINE_XARRAY_ALLOC(xa0); static DEFINE_XARRAY_ALLOC1(xa1); @@ -669,6 +722,8 @@ static noinline void check_xa_alloc(void) { check_xa_alloc_1(&xa0, 0); check_xa_alloc_1(&xa1, 1); + check_xa_alloc_2(&xa0, 0); + check_xa_alloc_2(&xa1, 1); } static noinline void __check_store_iter(struct xarray *xa, unsigned long start, @@ -1219,9 +1274,8 @@ static void check_align_1(struct xarray *xa, char *name) void *entry; for (i = 0; i < 8; i++) { - id = 0; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, name + i, GFP_KERNEL) - != 0); + XA_BUG_ON(xa, xa_alloc(xa, &id, name + i, xa_limit_32b, + GFP_KERNEL) != 0); XA_BUG_ON(xa, id != i); } xa_for_each(xa, index, entry) diff --git a/lib/xarray.c b/lib/xarray.c index 468fb7b7963f..c707388fb05e 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1615,23 +1615,23 @@ EXPORT_SYMBOL(xa_store_range); * __xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). + * @limit: Range for allocated ID. * @entry: New entry. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) +int __xa_alloc(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, gfp_t gfp) { XA_STATE(xas, xa, 0); - int err; if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; @@ -1642,18 +1642,17 @@ int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) entry = XA_ZERO_ENTRY; do { - xas.xa_index = *id; - xas_find_marked(&xas, max, XA_FREE_MARK); + xas.xa_index = limit.min; + xas_find_marked(&xas, limit.max, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) - xas_set_err(&xas, -ENOSPC); + xas_set_err(&xas, -EBUSY); + else + *id = xas.xa_index; xas_store(&xas, entry); xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); - err = xas_error(&xas); - if (!err) - *id = xas.xa_index; - return err; + return xas_error(&xas); } EXPORT_SYMBOL(__xa_alloc); -- cgit From 2fa044e51a1f35d7b04cbde07ec513b0ba195e38 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 6 Nov 2018 14:13:35 -0500 Subject: XArray: Add cyclic allocation This differs slightly from the IDR equivalent in five ways. 1. It can allocate up to UINT_MAX instead of being limited to INT_MAX, like xa_alloc(). Also like xa_alloc(), it will write to the 'id' pointer before placing the entry in the XArray. 2. The 'next' cursor is allocated separately from the XArray instead of being part of the IDR. This saves memory for all the users which do not use the cyclic allocation API and suits some users better. 3. It returns -EBUSY instead of -ENOSPC. 4. It will attempt to wrap back to the minimum value on memory allocation failure as well as on an -EBUSY error, assuming that a user would rather allocate a small ID than suffer an ID allocation failure. 5. It reports whether it has wrapped, which is important to some users. Signed-off-by: Matthew Wilcox --- Documentation/core-api/xarray.rst | 4 +- include/linux/xarray.h | 102 ++++++++++++++++++++++++++++++++++++++ lib/test_xarray.c | 53 ++++++++++++++++++++ lib/xarray.c | 50 +++++++++++++++++++ 4 files changed, 208 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index e90c4925cd37..c7436da5c4ad 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -144,7 +144,9 @@ you only want to free the entry if it's ``NULL``). By default, the lowest free entry is allocated starting from 0. If you want to allocate entries starting at 1, it is more efficient to use -:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``. +:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``. If you want to +allocate IDs up to a maximum, then wrap back around to the lowest free +ID, you can use :c:func:`xa_alloc_cyclic`. You cannot use ``XA_MARK_0`` with an allocating XArray as this mark is used to track whether an entry is free or not. The other marks are diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 883bb958e462..5ed6b462e754 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -242,6 +242,7 @@ enum xa_lock_type { #define XA_FLAGS_LOCK_BH ((__force gfp_t)XA_LOCK_BH) #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) #define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) +#define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) @@ -499,6 +500,8 @@ void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t); int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry, struct xa_limit, gfp_t); +int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry, + struct xa_limit, u32 *next, gfp_t); int __xa_reserve(struct xarray *, unsigned long index, gfp_t); void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -858,6 +861,105 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, return err; } +/** + * xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Takes and releases the xa_lock. May sleep if + * the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock(xa); + + return err; +} + +/** + * xa_alloc_cyclic_bh() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. May sleep if the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock_bh(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock_bh(xa); + + return err; +} + +/** + * xa_alloc_cyclic_irq() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. May sleep if the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock_irq(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock_irq(xa); + + return err; +} + /** * xa_reserve() - Reserve this index in the XArray. * @xa: XArray. diff --git a/lib/test_xarray.c b/lib/test_xarray.c index b5a6b981454d..eaf53f742c72 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -715,6 +715,57 @@ static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) xa_destroy(xa); } +static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base) +{ + struct xa_limit limit = XA_LIMIT(1, 0x3fff); + u32 next = 0; + unsigned int i, id; + unsigned long index; + void *entry; + + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(1), limit, + &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 1); + + next = 0x3ffd; + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit, + &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 0x3ffd); + xa_erase_index(xa, 0x3ffd); + xa_erase_index(xa, 1); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = 0x3ffe; i < 0x4003; i++) { + if (i < 0x4000) + entry = xa_mk_index(i); + else + entry = xa_mk_index(i - 0x3fff); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, entry, limit, + &next, GFP_KERNEL) != (id == 1)); + XA_BUG_ON(xa, xa_mk_index(id) != entry); + } + + /* Check wrap-around is handled correctly */ + if (base != 0) + xa_erase_index(xa, base); + xa_erase_index(xa, base + 1); + next = UINT_MAX; + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX), + xa_limit_32b, &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != UINT_MAX); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base), + xa_limit_32b, &next, GFP_KERNEL) != 1); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base + 1), + xa_limit_32b, &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base + 1); + + xa_for_each(xa, index, entry) + xa_erase_index(xa, index); + + XA_BUG_ON(xa, !xa_empty(xa)); +} + static DEFINE_XARRAY_ALLOC(xa0); static DEFINE_XARRAY_ALLOC1(xa1); @@ -724,6 +775,8 @@ static noinline void check_xa_alloc(void) check_xa_alloc_1(&xa1, 1); check_xa_alloc_2(&xa0, 0); check_xa_alloc_2(&xa1, 1); + check_xa_alloc_3(&xa0, 0); + check_xa_alloc_3(&xa1, 1); } static noinline void __check_store_iter(struct xarray *xa, unsigned long start, diff --git a/lib/xarray.c b/lib/xarray.c index c707388fb05e..89e37ac50850 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1656,6 +1656,56 @@ int __xa_alloc(struct xarray *xa, u32 *id, void *entry, } EXPORT_SYMBOL(__xa_alloc); +/** + * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + u32 min = limit.min; + int ret; + + limit.min = max(min, *next); + ret = __xa_alloc(xa, id, entry, limit, gfp); + if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) { + xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED; + ret = 1; + } + + if (ret < 0 && limit.min > min) { + limit.min = min; + ret = __xa_alloc(xa, id, entry, limit, gfp); + if (ret == 0) + ret = 1; + } + + if (ret >= 0) { + *next = *id + 1; + if (*next == 0) + xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED; + } + return ret; +} +EXPORT_SYMBOL(__xa_alloc_cyclic); + /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. -- cgit From 60b8f0ddf1a927ef02141a6610fd52575134f821 Mon Sep 17 00:00:00 2001 From: Phil Edworthy Date: Mon, 3 Dec 2018 11:13:09 +0000 Subject: clk: Add (devm_)clk_get_optional() functions This adds clk_get_optional() and devm_clk_get_optional() functions to get optional clocks. They behave the same as (devm_)clk_get() except where there is no clock producer. In this case, instead of returning -ENOENT, the function returns NULL. This makes error checking simpler and allows clk_prepare_enable, etc to be called on the returned reference without additional checks. Signed-off-by: Phil Edworthy Reviewed-by: Andy Shevchenko Cc: Russell King [sboyd@kernel.org: Document in devres.txt] Signed-off-by: Stephen Boyd --- Documentation/driver-model/devres.txt | 1 + drivers/clk/clk-devres.c | 11 +++++++++++ include/linux/clk.h | 36 +++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index b277cafce71e..83f38c0439cd 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -242,6 +242,7 @@ certainly invest a bit more effort into libata core layer). CLOCK devm_clk_get() + devm_clk_get_optional() devm_clk_put() devm_clk_hw_register() devm_of_clk_add_hw_provider() diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index c9a86156ced8..daa1fc8fba53 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -29,6 +29,17 @@ struct clk *devm_clk_get(struct device *dev, const char *id) } EXPORT_SYMBOL(devm_clk_get); +struct clk *devm_clk_get_optional(struct device *dev, const char *id) +{ + struct clk *clk = devm_clk_get(dev, id); + + if (clk == ERR_PTR(-ENOENT)) + return NULL; + + return clk; +} +EXPORT_SYMBOL(devm_clk_get_optional); + struct clk_bulk_devres { struct clk_bulk_data *clks; int num_clks; diff --git a/include/linux/clk.h b/include/linux/clk.h index a7773b5c0b9f..d8bc1a856b39 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -383,6 +383,17 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, */ struct clk *devm_clk_get(struct device *dev, const char *id); +/** + * devm_clk_get_optional - lookup and obtain a managed reference to an optional + * clock producer. + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Behaves the same as devm_clk_get() except where there is no clock producer. + * In this case, instead of returning -ENOENT, the function returns NULL. + */ +struct clk *devm_clk_get_optional(struct device *dev, const char *id); + /** * devm_get_clk_from_child - lookup and obtain a managed reference to a * clock producer from child node. @@ -718,6 +729,12 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id) return NULL; } +static inline struct clk *devm_clk_get_optional(struct device *dev, + const char *id) +{ + return NULL; +} + static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { @@ -862,6 +879,25 @@ static inline void clk_bulk_disable_unprepare(int num_clks, clk_bulk_unprepare(num_clks, clks); } +/** + * clk_get_optional - lookup and obtain a reference to an optional clock + * producer. + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Behaves the same as clk_get() except where there is no clock producer. In + * this case, instead of returning -ENOENT, the function returns NULL. + */ +static inline struct clk *clk_get_optional(struct device *dev, const char *id) +{ + struct clk *clk = clk_get(dev, id); + + if (clk == ERR_PTR(-ENOENT)) + return NULL; + + return clk; +} + #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK) struct clk *of_clk_get(struct device_node *np, int index); struct clk *of_clk_get_by_name(struct device_node *np, const char *name); -- cgit From 3eee6c7d119cd8563ad25898f94d6c1b514da548 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 7 Dec 2018 13:09:39 +0200 Subject: clkdev: add managed clkdev lookup registration Clkdev registration lacks of managed registration functions and it seems few drivers do not drop clkdev lookups at exit. Add devm_clk_hw_register_clkdev and devm_clk_release_clkdev to ease lookup releasing at exit. Signed-off-by: Matti Vaittinen Signed-off-by: Stephen Boyd --- Documentation/driver-model/devres.txt | 1 + drivers/clk/clkdev.c | 111 +++++++++++++++++++++++++++------- include/linux/clkdev.h | 4 ++ 3 files changed, 93 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index b277cafce71e..805b7bf5d98f 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -245,6 +245,7 @@ CLOCK devm_clk_put() devm_clk_hw_register() devm_of_clk_add_hw_provider() + devm_clk_hw_register_clkdev() DMA dmaenginem_async_device_register() diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 9ab3db8b3988..4621f8a91fc0 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -401,6 +401,23 @@ static struct clk_lookup *__clk_register_clkdev(struct clk_hw *hw, return cl; } +static int do_clk_register_clkdev(struct clk_hw *hw, + struct clk_lookup **cl, const char *con_id, const char *dev_id) +{ + if (IS_ERR(hw)) + return PTR_ERR(hw); + /* + * Since dev_id can be NULL, and NULL is handled specially, we must + * pass it as either a NULL format string, or with "%s". + */ + if (dev_id) + *cl = __clk_register_clkdev(hw, con_id, "%s", dev_id); + else + *cl = __clk_register_clkdev(hw, con_id, NULL); + + return *cl ? 0 : -ENOMEM; +} + /** * clk_register_clkdev - register one clock lookup for a struct clk * @clk: struct clk to associate with all clk_lookups @@ -423,17 +440,8 @@ int clk_register_clkdev(struct clk *clk, const char *con_id, if (IS_ERR(clk)) return PTR_ERR(clk); - /* - * Since dev_id can be NULL, and NULL is handled specially, we must - * pass it as either a NULL format string, or with "%s". - */ - if (dev_id) - cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, "%s", - dev_id); - else - cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, NULL); - - return cl ? 0 : -ENOMEM; + return do_clk_register_clkdev(__clk_get_hw(clk), &cl, con_id, + dev_id); } EXPORT_SYMBOL(clk_register_clkdev); @@ -456,18 +464,75 @@ int clk_hw_register_clkdev(struct clk_hw *hw, const char *con_id, { struct clk_lookup *cl; - if (IS_ERR(hw)) - return PTR_ERR(hw); + return do_clk_register_clkdev(hw, &cl, con_id, dev_id); +} +EXPORT_SYMBOL(clk_hw_register_clkdev); - /* - * Since dev_id can be NULL, and NULL is handled specially, we must - * pass it as either a NULL format string, or with "%s". - */ - if (dev_id) - cl = __clk_register_clkdev(hw, con_id, "%s", dev_id); - else - cl = __clk_register_clkdev(hw, con_id, NULL); +static void devm_clkdev_release(struct device *dev, void *res) +{ + clkdev_drop(*(struct clk_lookup **)res); +} - return cl ? 0 : -ENOMEM; +static int devm_clk_match_clkdev(struct device *dev, void *res, void *data) +{ + struct clk_lookup **l = res; + + return *l == data; } -EXPORT_SYMBOL(clk_hw_register_clkdev); + +/** + * devm_clk_release_clkdev - Resource managed clkdev lookup release + * @dev: device this lookup is bound + * @con_id: connection ID string on device + * @dev_id: format string describing device name + * + * Drop the clkdev lookup created with devm_clk_hw_register_clkdev. + * Normally this function will not need to be called and the resource + * management code will ensure that the resource is freed. + */ +void devm_clk_release_clkdev(struct device *dev, const char *con_id, + const char *dev_id) +{ + struct clk_lookup *cl; + int rval; + + cl = clk_find(dev_id, con_id); + WARN_ON(!cl); + rval = devres_release(dev, devm_clkdev_release, + devm_clk_match_clkdev, cl); + WARN_ON(rval); +} +EXPORT_SYMBOL(devm_clk_release_clkdev); + +/** + * devm_clk_hw_register_clkdev - managed clk lookup registration for clk_hw + * @dev: device this lookup is bound + * @hw: struct clk_hw to associate with all clk_lookups + * @con_id: connection ID string on device + * @dev_id: format string describing device name + * + * con_id or dev_id may be NULL as a wildcard, just as in the rest of + * clkdev. + * + * To make things easier for mass registration, we detect error clk_hws + * from a previous clk_hw_register_*() call, and return the error code for + * those. This is to permit this function to be called immediately + * after clk_hw_register_*(). + */ +int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw, + const char *con_id, const char *dev_id) +{ + int rval = -ENOMEM; + struct clk_lookup **cl; + + cl = devres_alloc(devm_clkdev_release, sizeof(*cl), GFP_KERNEL); + if (cl) { + rval = do_clk_register_clkdev(hw, cl, con_id, dev_id); + if (!rval) + devres_add(dev, cl); + else + devres_free(cl); + } + return rval; +} +EXPORT_SYMBOL(devm_clk_hw_register_clkdev); diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index 4890ff033220..ccb32af5848b 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -52,4 +52,8 @@ int clk_add_alias(const char *, const char *, const char *, struct device *); int clk_register_clkdev(struct clk *, const char *, const char *); int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *); +int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw, + const char *con_id, const char *dev_id); +void devm_clk_release_clkdev(struct device *dev, const char *con_id, + const char *dev_id); #endif -- cgit From 4d5f007eedb74d71a7bde2bff69b6a31ad8ab427 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 2 Jan 2019 13:28:47 +0100 Subject: time: make adjtime compat handling available for 32 bit We want to reuse the compat_timex handling on 32-bit architectures the same way we are using the compat handling for timespec when moving to 64-bit time_t. Move all definitions related to compat_timex out of the compat code into the normal timekeeping code, along with a rename to old_timex32, corresponding to the timespec/timeval structures, and make it controlled by CONFIG_COMPAT_32BIT_TIME, which 32-bit architectures will then select. Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 35 ++--------------------- include/linux/time32.h | 32 ++++++++++++++++++++- kernel/compat.c | 64 ------------------------------------------ kernel/time/posix-timers.c | 14 ++-------- kernel/time/time.c | 70 +++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 102 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 056be0d03722..657ca6abd855 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -132,37 +132,6 @@ struct compat_tms { compat_clock_t tms_cstime; }; -struct compat_timex { - compat_uint_t modes; - compat_long_t offset; - compat_long_t freq; - compat_long_t maxerror; - compat_long_t esterror; - compat_int_t status; - compat_long_t constant; - compat_long_t precision; - compat_long_t tolerance; - struct old_timeval32 time; - compat_long_t tick; - compat_long_t ppsfreq; - compat_long_t jitter; - compat_int_t shift; - compat_long_t stabil; - compat_long_t jitcnt; - compat_long_t calcnt; - compat_long_t errcnt; - compat_long_t stbcnt; - compat_int_t tai; - - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; -}; - -struct timex; -int compat_get_timex(struct timex *, const struct compat_timex __user *); -int compat_put_timex(struct compat_timex __user *, const struct timex *); - #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) typedef struct { @@ -808,7 +777,7 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); -asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); +asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp); /* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); @@ -911,7 +880,7 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, - struct compat_timex __user *tp); + struct old_timex32 __user *tp); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, diff --git a/include/linux/time32.h b/include/linux/time32.h index 118b9977080c..820a22e2b98b 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -10,6 +10,7 @@ */ #include +#include #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) @@ -35,13 +36,42 @@ struct old_utimbuf32 { old_time32_t modtime; }; +struct old_timex32 { + u32 modes; + s32 offset; + s32 freq; + s32 maxerror; + s32 esterror; + s32 status; + s32 constant; + s32 precision; + s32 tolerance; + struct old_timeval32 time; + s32 tick; + s32 ppsfreq; + s32 jitter; + s32 shift; + s32 stabil; + s32 jitcnt; + s32 calcnt; + s32 errcnt; + s32 stbcnt; + s32 tai; + + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; +}; + extern int get_old_timespec32(struct timespec64 *, const void __user *); extern int put_old_timespec32(const struct timespec64 *, void __user *); extern int get_old_itimerspec32(struct itimerspec64 *its, const struct old_itimerspec32 __user *uits); extern int put_old_itimerspec32(const struct itimerspec64 *its, struct old_itimerspec32 __user *uits); - +struct timex; +int get_old_timex32(struct timex *, const struct old_timex32 __user *); +int put_old_timex32(struct old_timex32 __user *, const struct timex *); #if __BITS_PER_LONG == 64 diff --git a/kernel/compat.c b/kernel/compat.c index f01affa17e22..d8a36c6ad7c9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -30,69 +29,6 @@ #include -int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp) -{ - struct compat_timex tx32; - - memset(txc, 0, sizeof(struct timex)); - if (copy_from_user(&tx32, utp, sizeof(struct compat_timex))) - return -EFAULT; - - txc->modes = tx32.modes; - txc->offset = tx32.offset; - txc->freq = tx32.freq; - txc->maxerror = tx32.maxerror; - txc->esterror = tx32.esterror; - txc->status = tx32.status; - txc->constant = tx32.constant; - txc->precision = tx32.precision; - txc->tolerance = tx32.tolerance; - txc->time.tv_sec = tx32.time.tv_sec; - txc->time.tv_usec = tx32.time.tv_usec; - txc->tick = tx32.tick; - txc->ppsfreq = tx32.ppsfreq; - txc->jitter = tx32.jitter; - txc->shift = tx32.shift; - txc->stabil = tx32.stabil; - txc->jitcnt = tx32.jitcnt; - txc->calcnt = tx32.calcnt; - txc->errcnt = tx32.errcnt; - txc->stbcnt = tx32.stbcnt; - - return 0; -} - -int compat_put_timex(struct compat_timex __user *utp, const struct timex *txc) -{ - struct compat_timex tx32; - - memset(&tx32, 0, sizeof(struct compat_timex)); - tx32.modes = txc->modes; - tx32.offset = txc->offset; - tx32.freq = txc->freq; - tx32.maxerror = txc->maxerror; - tx32.esterror = txc->esterror; - tx32.status = txc->status; - tx32.constant = txc->constant; - tx32.precision = txc->precision; - tx32.tolerance = txc->tolerance; - tx32.time.tv_sec = txc->time.tv_sec; - tx32.time.tv_usec = txc->time.tv_usec; - tx32.tick = txc->tick; - tx32.ppsfreq = txc->ppsfreq; - tx32.jitter = txc->jitter; - tx32.shift = txc->shift; - tx32.stabil = txc->stabil; - tx32.jitcnt = txc->jitcnt; - tx32.calcnt = txc->calcnt; - tx32.errcnt = txc->errcnt; - tx32.stbcnt = txc->stbcnt; - tx32.tai = txc->tai; - if (copy_to_user(utp, &tx32, sizeof(struct compat_timex))) - return -EFAULT; - return 0; -} - static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv) { return (!access_ok(ctv, sizeof(*ctv)) || diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 0e84bb72a3da..8955f32f2a36 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1123,12 +1123,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, return err; } -#endif - -#ifdef CONFIG_COMPAT - COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, - struct compat_timex __user *, utp) + struct old_timex32 __user *, utp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timex ktx; @@ -1139,22 +1135,18 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, if (!kc->clock_adj) return -EOPNOTSUPP; - err = compat_get_timex(&ktx, utp); + err = get_old_timex32(&ktx, utp); if (err) return err; err = kc->clock_adj(which_clock, &ktx); if (err >= 0) - err = compat_put_timex(utp, &ktx); + err = put_old_timex32(utp, &ktx); return err; } -#endif - -#ifdef CONFIG_COMPAT_32BIT_TIME - COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, struct old_timespec32 __user *, tp) { diff --git a/kernel/time/time.c b/kernel/time/time.c index 2edb5088a70b..2d013bc2b271 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -278,20 +278,82 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; } -#ifdef CONFIG_COMPAT +#ifdef CONFIG_COMPAT_32BIT_TIME +int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp) +{ + struct old_timex32 tx32; + + memset(txc, 0, sizeof(struct timex)); + if (copy_from_user(&tx32, utp, sizeof(struct old_timex32))) + return -EFAULT; + + txc->modes = tx32.modes; + txc->offset = tx32.offset; + txc->freq = tx32.freq; + txc->maxerror = tx32.maxerror; + txc->esterror = tx32.esterror; + txc->status = tx32.status; + txc->constant = tx32.constant; + txc->precision = tx32.precision; + txc->tolerance = tx32.tolerance; + txc->time.tv_sec = tx32.time.tv_sec; + txc->time.tv_usec = tx32.time.tv_usec; + txc->tick = tx32.tick; + txc->ppsfreq = tx32.ppsfreq; + txc->jitter = tx32.jitter; + txc->shift = tx32.shift; + txc->stabil = tx32.stabil; + txc->jitcnt = tx32.jitcnt; + txc->calcnt = tx32.calcnt; + txc->errcnt = tx32.errcnt; + txc->stbcnt = tx32.stbcnt; + + return 0; +} + +int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc) +{ + struct old_timex32 tx32; + + memset(&tx32, 0, sizeof(struct old_timex32)); + tx32.modes = txc->modes; + tx32.offset = txc->offset; + tx32.freq = txc->freq; + tx32.maxerror = txc->maxerror; + tx32.esterror = txc->esterror; + tx32.status = txc->status; + tx32.constant = txc->constant; + tx32.precision = txc->precision; + tx32.tolerance = txc->tolerance; + tx32.time.tv_sec = txc->time.tv_sec; + tx32.time.tv_usec = txc->time.tv_usec; + tx32.tick = txc->tick; + tx32.ppsfreq = txc->ppsfreq; + tx32.jitter = txc->jitter; + tx32.shift = txc->shift; + tx32.stabil = txc->stabil; + tx32.jitcnt = txc->jitcnt; + tx32.calcnt = txc->calcnt; + tx32.errcnt = txc->errcnt; + tx32.stbcnt = txc->stbcnt; + tx32.tai = txc->tai; + if (copy_to_user(utp, &tx32, sizeof(struct old_timex32))) + return -EFAULT; + return 0; +} -COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp) +COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp) { struct timex txc; int err, ret; - err = compat_get_timex(&txc, utp); + err = get_old_timex32(&txc, utp); if (err) return err; ret = do_adjtimex(&txc); - err = compat_put_timex(utp, &txc); + err = put_old_timex32(utp, &txc); if (err) return err; -- cgit From 2c620ff93d9fbd5d644760d4c21d389078ec1080 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 2 Jul 2018 22:44:20 -0700 Subject: time: Add struct __kernel_timex struct timex uses struct timeval internally. struct timeval is not y2038 safe. Introduce a new UAPI type struct __kernel_timex that is y2038 safe. struct __kernel_timex uses a timeval type that is similar to struct __kernel_timespec which preserves the same structure size across 32 bit and 64 bit ABIs. struct __kernel_timex also restructures other members of the structure to make the structure the same on 64 bit and 32 bit architectures. Note that struct __kernel_timex is the same as struct timex on a 64 bit architecture. The above solution is similar to other new y2038 syscalls that are being introduced: both 32 bit and 64 bit ABIs have a common entry, and the compat entry supports the old 32 bit syscall interface. Alternatives considered were: 1. Add new time type to struct timex that makes use of padded bits. This time type could be based on the struct __kernel_timespec. modes will use a flag to notify which time structure should be used internally. This needs some application level changes on both 64 bit and 32 bit architectures. Although 64 bit machines could continue to use the older timeval structure without any changes. 2. Add a new u8 type to struct timex that makes use of padded bits. This can be used to save higher order tv_sec bits. modes will use a flag to notify presence of such a type. This will need some application level changes on 32 bit architectures. 3. Add a new compat_timex structure that differs in only the size of the time type; keep rest of struct timex the same. This requires extra syscalls to manage all 3 cases on 64 bit architectures. This will not need any application level changes but will add more complexity from kernel side. Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/linux/timex.h | 7 +++++++ include/uapi/linux/timex.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 39c25dbebfe8..7f40e9e42ecc 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,6 +53,13 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H +/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path + * and 32-bit emulation. + */ +#ifndef CONFIG_64BIT_TIME +#define __kernel_timex timex +#endif + #include #define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */ diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index 92685d826444..a1c6b73016a5 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -92,6 +92,47 @@ struct timex { int :32; int :32; int :32; }; +struct __kernel_timex_timeval { + __kernel_time64_t tv_sec; + long long tv_usec; +}; + +#ifndef __kernel_timex +struct __kernel_timex { + unsigned int modes; /* mode selector */ + int :32; /* pad */ + long long offset; /* time offset (usec) */ + long long freq; /* frequency offset (scaled ppm) */ + long long maxerror;/* maximum error (usec) */ + long long esterror;/* estimated error (usec) */ + int status; /* clock command/status */ + int :32; /* pad */ + long long constant;/* pll time constant */ + long long precision;/* clock precision (usec) (read only) */ + long long tolerance;/* clock frequency tolerance (ppm) + * (read only) + */ + struct __kernel_timex_timeval time; /* (read only, except for ADJ_SETOFFSET) */ + long long tick; /* (modified) usecs between clock ticks */ + + long long ppsfreq;/* pps frequency (scaled ppm) (ro) */ + long long jitter; /* pps jitter (us) (ro) */ + int shift; /* interval duration (s) (shift) (ro) */ + int :32; /* pad */ + long long stabil; /* pps stability (scaled ppm) (ro) */ + long long jitcnt; /* jitter limit exceeded (ro) */ + long long calcnt; /* calibration intervals (ro) */ + long long errcnt; /* calibration errors (ro) */ + long long stbcnt; /* stability limit exceeded (ro) */ + + int tai; /* TAI offset (ro) */ + + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; +}; +#endif + /* * Mode codes (timex.mode) */ -- cgit From 50b93f30f6d8672f9ec80e90af94d733f11a20e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 1 Jan 2019 17:34:39 +0100 Subject: time: fix sys_timer_settime prototype A small typo has crept into the y2038 conversion of the timer_settime system call. So far this was completely harmless, but once we start using the new version, this has to be fixed. Fixes: 6ff847350702 ("time: Change types to new y2038 safe __kernel_itimerspec") Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 938d8908b9e0..baa4b70b02d3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -591,7 +591,7 @@ asmlinkage long sys_timer_gettime(timer_t timer_id, asmlinkage long sys_timer_getoverrun(timer_t timer_id); asmlinkage long sys_timer_settime(timer_t timer_id, int flags, const struct __kernel_itimerspec __user *new_setting, - struct itimerspec __user *old_setting); + struct __kernel_itimerspec __user *old_setting); asmlinkage long sys_timer_delete(timer_t timer_id); asmlinkage long sys_clock_settime(clockid_t which_clock, const struct __kernel_timespec __user *tp); -- cgit From 1a596398a3d75f966b75f428e992cf1f242f9a5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Jan 2019 21:12:39 +0100 Subject: sparc64: add custom adjtimex/clock_adjtime functions sparc64 is the only architecture on Linux that has a 'timeval' definition with a 32-bit tv_usec but a 64-bit tv_sec. This causes problems for sparc32 compat mode when we convert it to use the new __kernel_timex type that has the same layout as all other 64-bit architectures. To avoid adding sparc64 specific code into the generic adjtimex implementation, this adds a wrapper in the sparc64 system call handling that converts the sparc64 'timex' into the new '__kernel_timex'. At this point, the two structures are defined to be identical, but that will change in the next step once we convert sparc32. Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys_sparc_64.c | 59 +++++++++++++++++++++++++++++++++- arch/sparc/kernel/syscalls/syscall.tbl | 6 ++-- include/linux/timex.h | 2 ++ kernel/time/posix-timers.c | 24 +++++++------- 4 files changed, 76 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 1c079e7bab09..37de18a11207 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -28,8 +28,9 @@ #include #include #include - +#include #include + #include #include @@ -544,6 +545,62 @@ out_unlock: return err; } +SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *)&txc; + int ret; + + /* Copy the user data space into the kernel copy + * structure. But bear in mind that the structures + * may change + */ + if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* + * override for sparc64 specific timeval type: tv_usec + * is 32 bit wide instead of 64-bit in __kernel_timex + */ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_adjtimex(kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; +} + +SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *)&txc; + int ret; + + if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { + pr_err_once("process %d (%s) attempted a POSIX timer syscall " + "while CONFIG_POSIX_TIMERS is not set\n", + current->pid, current->comm); + + return -ENOSYS; + } + + /* Copy the user data space into the kernel copy + * structure. But bear in mind that the structures + * may change + */ + if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* + * override for sparc64 specific timeval type: tv_usec + * is 32 bit wide instead of 64-bit in __kernel_timex + */ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_clock_adjtime(which_clock, kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; +} + SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type, utrap_handler_t, new_p, utrap_handler_t, new_d, utrap_handler_t __user *, old_p, diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 6992d17cce37..e63cd013cc77 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -258,7 +258,8 @@ 216 64 sigreturn sys_nis_syscall 217 common clone sys_clone 218 common ioprio_get sys_ioprio_get -219 common adjtimex sys_adjtimex compat_sys_adjtimex +219 32 adjtimex sys_adjtimex compat_sys_adjtimex +219 64 adjtimex sys_sparc_adjtimex 220 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 220 64 sigprocmask sys_nis_syscall 221 common create_module sys_ni_syscall @@ -377,7 +378,8 @@ 331 common prlimit64 sys_prlimit64 332 common name_to_handle_at sys_name_to_handle_at 333 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -334 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +334 32 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +334 64 clock_adjtime sys_sparc_clock_adjtime 335 common syncfs sys_syncfs 336 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 337 common setns sys_setns diff --git a/include/linux/timex.h b/include/linux/timex.h index 7f40e9e42ecc..a15e6aeb8d49 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -159,6 +159,8 @@ extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) extern int do_adjtimex(struct timex *); +extern int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx); + extern void hardpps(const struct timespec64 *, const struct timespec64 *); int read_current_timer(unsigned long *timer_val); diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 8955f32f2a36..8f7f1dd95940 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1047,22 +1047,28 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, return error; } -SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, - struct timex __user *, utx) +int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timex ktx; - int err; if (!kc) return -EINVAL; if (!kc->clock_adj) return -EOPNOTSUPP; + return kc->clock_adj(which_clock, ktx); +} + +SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, + struct timex __user *, utx) +{ + struct timex ktx; + int err; + if (copy_from_user(&ktx, utx, sizeof(ktx))) return -EFAULT; - err = kc->clock_adj(which_clock, &ktx); + err = do_clock_adjtime(which_clock, &ktx); if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx))) return -EFAULT; @@ -1126,20 +1132,14 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, struct old_timex32 __user *, utp) { - const struct k_clock *kc = clockid_to_kclock(which_clock); struct timex ktx; int err; - if (!kc) - return -EINVAL; - if (!kc->clock_adj) - return -EOPNOTSUPP; - err = get_old_timex32(&ktx, utp); if (err) return err; - err = kc->clock_adj(which_clock, &ktx); + err = do_clock_adjtime(which_clock, &ktx); if (err >= 0) err = put_old_timex32(utp, &ktx); -- cgit From ead25417f82ed7f8a21da4dcefc768169f7da884 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 2 Jul 2018 22:44:21 -0700 Subject: timex: use __kernel_timex internally struct timex is not y2038 safe. Replace all uses of timex with y2038 safe __kernel_timex. Note that struct __kernel_timex is an ABI interface definition. We could define a new structure based on __kernel_timex that is only available internally instead. Right now, there isn't a strong motivation for this as the structure is isolated to a few defined struct timex interfaces and such a structure would be exactly the same as struct timex. The patch was generated by the following coccinelle script: virtual patch @depends on patch forall@ identifier ts; expression e; @@ ( - struct timex ts; + struct __kernel_timex ts; | - struct timex ts = {}; + struct __kernel_timex ts = {}; | - struct timex ts = e; + struct __kernel_timex ts = e; | - struct timex *ts; + struct __kernel_timex *ts; | (memset \| copy_from_user \| copy_to_user \)(..., - sizeof(struct timex)) + sizeof(struct __kernel_timex)) ) @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts, + struct __kernel_timex *ts, ...) { ... } @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts) { + struct __kernel_timex *ts) { ... } Signed-off-by: Deepa Dinamani Cc: linux-alpha@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/osf_sys.c | 5 +++-- arch/sparc/kernel/sys_sparc_64.c | 4 ++-- drivers/ptp/ptp_clock.c | 2 +- include/linux/posix-clock.h | 2 +- include/linux/time32.h | 6 +++--- include/linux/timex.h | 4 ++-- kernel/time/ntp.c | 18 ++++++++++-------- kernel/time/ntp_internal.h | 2 +- kernel/time/posix-clock.c | 2 +- kernel/time/posix-timers.c | 8 ++++---- kernel/time/posix-timers.h | 2 +- kernel/time/time.c | 14 +++++++------- kernel/time/timekeeping.c | 4 ++-- 13 files changed, 38 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 792586038808..bf497b8b0ec6 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1253,7 +1253,7 @@ struct timex32 { SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) { - struct timex txc; + struct __kernel_timex txc; int ret; /* copy relevant bits of struct timex. */ @@ -1270,7 +1270,8 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) if (copy_to_user(txc_p, &txc, offsetof(struct timex32, time)) || (copy_to_user(&txc_p->tick, &txc.tick, sizeof(struct timex32) - offsetof(struct timex32, tick))) || - (put_tv_to_tv32(&txc_p->time, &txc.time))) + (put_user(txc.time.tv_sec, &txc_p->time.tv_sec)) || + (put_user(txc.time.tv_usec, &txc_p->time.tv_usec))) return -EFAULT; return ret; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 37de18a11207..9825ca6a6020 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -548,7 +548,7 @@ out_unlock: SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *)&txc; + struct __kernel_timex *kt = (void *)&txc; int ret; /* Copy the user data space into the kernel copy @@ -572,7 +572,7 @@ SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *)&txc; + struct __kernel_timex *kt = (void *)&txc; int ret; if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 48f3594a7458..79bd102c9bbc 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -124,7 +124,7 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp) return err; } -static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) +static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops; diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 3a3bc71017d5..18674d7d5b1c 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -51,7 +51,7 @@ struct posix_clock; struct posix_clock_operations { struct module *owner; - int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); + int (*clock_adjtime)(struct posix_clock *pc, struct __kernel_timex *tx); int (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts); diff --git a/include/linux/time32.h b/include/linux/time32.h index 820a22e2b98b..0a1f302a1753 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -69,9 +69,9 @@ extern int get_old_itimerspec32(struct itimerspec64 *its, const struct old_itimerspec32 __user *uits); extern int put_old_itimerspec32(const struct itimerspec64 *its, struct old_itimerspec32 __user *uits); -struct timex; -int get_old_timex32(struct timex *, const struct old_timex32 __user *); -int put_old_timex32(struct old_timex32 __user *, const struct timex *); +struct __kernel_timex; +int get_old_timex32(struct __kernel_timex *, const struct old_timex32 __user *); +int put_old_timex32(struct old_timex32 __user *, const struct __kernel_timex *); #if __BITS_PER_LONG == 64 diff --git a/include/linux/timex.h b/include/linux/timex.h index a15e6aeb8d49..4aff9f0d1367 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -158,8 +158,8 @@ extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ #define NTP_INTERVAL_FREQ (HZ) #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) -extern int do_adjtimex(struct timex *); -extern int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx); +extern int do_adjtimex(struct __kernel_timex *); +extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx); extern void hardpps(const struct timespec64 *, const struct timespec64 *); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 36a2bef00125..92a90014a925 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -188,13 +188,13 @@ static inline int is_error_status(int status) && (status & (STA_PPSWANDER|STA_PPSERROR))); } -static inline void pps_fill_timex(struct timex *txc) +static inline void pps_fill_timex(struct __kernel_timex *txc) { txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) * PPM_SCALE_INV, NTP_SCALE_SHIFT); txc->jitter = pps_jitter; if (!(time_status & STA_NANO)) - txc->jitter /= NSEC_PER_USEC; + txc->jitter = pps_jitter / NSEC_PER_USEC; txc->shift = pps_shift; txc->stabil = pps_stabil; txc->jitcnt = pps_jitcnt; @@ -220,7 +220,7 @@ static inline int is_error_status(int status) return status & (STA_UNSYNC|STA_CLOCKERR); } -static inline void pps_fill_timex(struct timex *txc) +static inline void pps_fill_timex(struct __kernel_timex *txc) { /* PPS is not implemented, so these are zero */ txc->ppsfreq = 0; @@ -633,7 +633,7 @@ void ntp_notify_cmos_timer(void) /* * Propagate a new txc->status value into the NTP state: */ -static inline void process_adj_status(const struct timex *txc) +static inline void process_adj_status(const struct __kernel_timex *txc) { if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { time_state = TIME_OK; @@ -656,7 +656,8 @@ static inline void process_adj_status(const struct timex *txc) } -static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai) +static inline void process_adjtimex_modes(const struct __kernel_timex *txc, + s32 *time_tai) { if (txc->modes & ADJ_STATUS) process_adj_status(txc); @@ -707,7 +708,8 @@ static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ -int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai) +int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, + s32 *time_tai) { int result; @@ -729,7 +731,7 @@ int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai) txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); if (!(time_status & STA_NANO)) - txc->offset /= NSEC_PER_USEC; + txc->offset = (u32)txc->offset / NSEC_PER_USEC; } result = time_state; /* mostly `TIME_OK' */ @@ -754,7 +756,7 @@ int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai) txc->time.tv_sec = (time_t)ts->tv_sec; txc->time.tv_usec = ts->tv_nsec; if (!(time_status & STA_NANO)) - txc->time.tv_usec /= NSEC_PER_USEC; + txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC; /* Handle leapsec adjustments */ if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) { diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index c24b0e13f011..40e6122e634e 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -8,6 +8,6 @@ extern void ntp_clear(void); extern u64 ntp_tick_length(void); extern ktime_t ntp_get_next_leap(void); extern int second_overflow(time64_t secs); -extern int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai); +extern int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 425bbfce6819..ec960bb939fd 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -228,7 +228,7 @@ static void put_clock_desc(struct posix_clock_desc *cd) fput(cd->fp); } -static int pc_clock_adjtime(clockid_t id, struct timex *tx) +static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx) { struct posix_clock_desc cd; int err; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 8f7f1dd95940..2d84b3db1ade 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -179,7 +179,7 @@ static int posix_clock_realtime_set(const clockid_t which_clock, } static int posix_clock_realtime_adj(const clockid_t which_clock, - struct timex *t) + struct __kernel_timex *t) { return do_adjtimex(t); } @@ -1047,7 +1047,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, return error; } -int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx) +int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) { const struct k_clock *kc = clockid_to_kclock(which_clock); @@ -1062,7 +1062,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx) SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, struct timex __user *, utx) { - struct timex ktx; + struct __kernel_timex ktx; int err; if (copy_from_user(&ktx, utx, sizeof(ktx))) @@ -1132,7 +1132,7 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, struct old_timex32 __user *, utp) { - struct timex ktx; + struct __kernel_timex ktx; int err; err = get_old_timex32(&ktx, utp); diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index ddb21145211a..de5daa6d975a 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -8,7 +8,7 @@ struct k_clock { const struct timespec64 *tp); int (*clock_get)(const clockid_t which_clock, struct timespec64 *tp); - int (*clock_adj)(const clockid_t which_clock, struct timex *tx); + int (*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx); int (*timer_create)(struct k_itimer *timer); int (*nsleep)(const clockid_t which_clock, int flags, const struct timespec64 *); diff --git a/kernel/time/time.c b/kernel/time/time.c index 2d013bc2b271..d179d33f639a 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -265,25 +265,25 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) { - struct timex txc; /* Local copy of parameter */ + struct __kernel_timex txc; /* Local copy of parameter */ int ret; /* Copy the user data space into the kernel copy * structure. But bear in mind that the structures * may change */ - if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + if (copy_from_user(&txc, txc_p, sizeof(struct __kernel_timex))) return -EFAULT; ret = do_adjtimex(&txc); - return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; + return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret; } #ifdef CONFIG_COMPAT_32BIT_TIME -int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp) +int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp) { struct old_timex32 tx32; - memset(txc, 0, sizeof(struct timex)); + memset(txc, 0, sizeof(struct __kernel_timex)); if (copy_from_user(&tx32, utp, sizeof(struct old_timex32))) return -EFAULT; @@ -311,7 +311,7 @@ int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp) return 0; } -int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc) +int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex *txc) { struct old_timex32 tx32; @@ -344,7 +344,7 @@ int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc) COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp) { - struct timex txc; + struct __kernel_timex txc; int err, ret; err = get_old_timex32(&txc, utp); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ac5dbf2cd4a2..f986e1918d12 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2234,7 +2234,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, /** * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex */ -static int timekeeping_validate_timex(const struct timex *txc) +static int timekeeping_validate_timex(const struct __kernel_timex *txc) { if (txc->modes & ADJ_ADJTIME) { /* singleshot must not be used with any other mode bits */ @@ -2300,7 +2300,7 @@ static int timekeeping_validate_timex(const struct timex *txc) /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function */ -int do_adjtimex(struct timex *txc) +int do_adjtimex(struct __kernel_timex *txc) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; -- cgit From 3876ced476c8ec17265d1739467e726ada88b660 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 2 Jul 2018 22:44:22 -0700 Subject: timex: change syscalls to use struct __kernel_timex struct timex is not y2038 safe. Switch all the syscall apis to use y2038 safe __kernel_timex. Note that sys_adjtimex() does not have a y2038 safe solution. C libraries can implement it by calling clock_adjtime(CLOCK_REALTIME, ...). Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 6 +++--- kernel/time/posix-timers.c | 2 +- kernel/time/time.c | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index baa4b70b02d3..09330d5bda0c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,7 +54,7 @@ struct __sysctl_args; struct sysinfo; struct timespec; struct timeval; -struct timex; +struct __kernel_timex; struct timezone; struct tms; struct utimbuf; @@ -695,7 +695,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_settimeofday(struct timeval __user *tv, struct timezone __user *tz); -asmlinkage long sys_adjtimex(struct timex __user *txc_p); +asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); /* kernel/timer.c */ asmlinkage long sys_getpid(void); @@ -870,7 +870,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long sys_clock_adjtime(clockid_t which_clock, - struct timex __user *tx); + struct __kernel_timex __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2d84b3db1ade..de79f85ae14f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1060,7 +1060,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) } SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, - struct timex __user *, utx) + struct __kernel_timex __user *, utx) { struct __kernel_timex ktx; int err; diff --git a/kernel/time/time.c b/kernel/time/time.c index d179d33f639a..78b5c8f1495a 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -263,7 +263,8 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, } #endif -SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) +#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) +SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p) { struct __kernel_timex txc; /* Local copy of parameter */ int ret; @@ -277,6 +278,7 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) ret = do_adjtimex(&txc); return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret; } +#endif #ifdef CONFIG_COMPAT_32BIT_TIME int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp) -- cgit From 8dabe7245bbc134f2cfcc12cde75c019dab924cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Jan 2019 00:33:08 +0100 Subject: y2038: syscalls: rename y2038 compat syscalls A lot of system calls that pass a time_t somewhere have an implementation using a COMPAT_SYSCALL_DEFINEx() on 64-bit architectures, and have been reworked so that this implementation can now be used on 32-bit architectures as well. The missing step is to redefine them using the regular SYSCALL_DEFINEx() to get them out of the compat namespace and make it possible to build them on 32-bit architectures. Any system call that ends in 'time' gets a '32' suffix on its name for that version, while the others get a '_time32' suffix, to distinguish them from the normal version, which takes a 64-bit time argument in the future. In this step, only 64-bit architectures are changed, doing this rename first lets us avoid touching the 32-bit architectures twice. Acked-by: Catalin Marinas Signed-off-by: Arnd Bergmann --- arch/arm64/include/asm/unistd32.h | 48 ++++++++++---------- arch/mips/kernel/syscalls/syscall_n32.tbl | 50 ++++++++++----------- arch/mips/kernel/syscalls/syscall_o32.tbl | 52 +++++++++++----------- arch/parisc/kernel/syscalls/syscall.tbl | 54 +++++++++++------------ arch/powerpc/kernel/syscalls/syscall.tbl | 52 +++++++++++----------- arch/s390/kernel/syscalls/syscall.tbl | 52 +++++++++++----------- arch/sparc/kernel/syscalls/syscall.tbl | 52 +++++++++++----------- arch/x86/entry/syscalls/syscall_32.tbl | 52 +++++++++++----------- fs/aio.c | 10 ++--- fs/select.c | 4 +- fs/timerfd.c | 4 +- fs/utimes.c | 10 ++--- include/linux/compat.h | 73 ++----------------------------- include/linux/syscalls.h | 57 ++++++++++++++++++++++++ include/uapi/asm-generic/unistd.h | 44 +++++++++---------- ipc/mqueue.c | 16 +++---- ipc/sem.c | 2 +- kernel/futex.c | 2 +- kernel/sched/core.c | 5 +-- kernel/signal.c | 2 +- kernel/sys_ni.c | 18 ++++---- kernel/time/hrtimer.c | 2 +- kernel/time/posix-stubs.c | 25 ++++++----- kernel/time/posix-timers.c | 32 +++++++------- kernel/time/time.c | 8 ++-- net/compat.c | 2 +- 26 files changed, 361 insertions(+), 367 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index d10cce69a4b0..1ded82857161 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -270,7 +270,7 @@ __SYSCALL(__NR_uname, sys_newuname) /* 123 was sys_modify_ldt */ __SYSCALL(123, sys_ni_syscall) #define __NR_adjtimex 124 -__SYSCALL(__NR_adjtimex, compat_sys_adjtimex) +__SYSCALL(__NR_adjtimex, sys_adjtimex_time32) #define __NR_mprotect 125 __SYSCALL(__NR_mprotect, sys_mprotect) #define __NR_sigprocmask 126 @@ -344,9 +344,9 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 160 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) #define __NR_sched_rr_get_interval 161 -__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval) +__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32) #define __NR_nanosleep 162 -__SYSCALL(__NR_nanosleep, compat_sys_nanosleep) +__SYSCALL(__NR_nanosleep, sys_nanosleep_time32) #define __NR_mremap 163 __SYSCALL(__NR_mremap, sys_mremap) #define __NR_setresuid 164 @@ -376,7 +376,7 @@ __SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) #define __NR_rt_sigpending 176 __SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) #define __NR_rt_sigtimedwait 177 -__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait) +__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) #define __NR_rt_sigqueueinfo 178 __SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) #define __NR_rt_sigsuspend 179 @@ -502,7 +502,7 @@ __SYSCALL(__NR_tkill, sys_tkill) #define __NR_sendfile64 239 __SYSCALL(__NR_sendfile64, sys_sendfile64) #define __NR_futex 240 -__SYSCALL(__NR_futex, compat_sys_futex) +__SYSCALL(__NR_futex, sys_futex_time32) #define __NR_sched_setaffinity 241 __SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) #define __NR_sched_getaffinity 242 @@ -512,7 +512,7 @@ __SYSCALL(__NR_io_setup, compat_sys_io_setup) #define __NR_io_destroy 244 __SYSCALL(__NR_io_destroy, sys_io_destroy) #define __NR_io_getevents 245 -__SYSCALL(__NR_io_getevents, compat_sys_io_getevents) +__SYSCALL(__NR_io_getevents, sys_io_getevents_time32) #define __NR_io_submit 246 __SYSCALL(__NR_io_submit, compat_sys_io_submit) #define __NR_io_cancel 247 @@ -538,21 +538,21 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address) #define __NR_timer_create 257 __SYSCALL(__NR_timer_create, compat_sys_timer_create) #define __NR_timer_settime 258 -__SYSCALL(__NR_timer_settime, compat_sys_timer_settime) +__SYSCALL(__NR_timer_settime, sys_timer_settime32) #define __NR_timer_gettime 259 -__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime) +__SYSCALL(__NR_timer_gettime, sys_timer_gettime32) #define __NR_timer_getoverrun 260 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) #define __NR_timer_delete 261 __SYSCALL(__NR_timer_delete, sys_timer_delete) #define __NR_clock_settime 262 -__SYSCALL(__NR_clock_settime, compat_sys_clock_settime) +__SYSCALL(__NR_clock_settime, sys_clock_settime32) #define __NR_clock_gettime 263 -__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime) +__SYSCALL(__NR_clock_gettime, sys_clock_gettime32) #define __NR_clock_getres 264 -__SYSCALL(__NR_clock_getres, compat_sys_clock_getres) +__SYSCALL(__NR_clock_getres, sys_clock_getres_time32) #define __NR_clock_nanosleep 265 -__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep) +__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep_time32) #define __NR_statfs64 266 __SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64) #define __NR_fstatfs64 267 @@ -560,7 +560,7 @@ __SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64) #define __NR_tgkill 268 __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 269 -__SYSCALL(__NR_utimes, compat_sys_utimes) +__SYSCALL(__NR_utimes, sys_utimes_time32) #define __NR_arm_fadvise64_64 270 __SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64) #define __NR_pciconfig_iobase 271 @@ -574,9 +574,9 @@ __SYSCALL(__NR_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 275 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) #define __NR_mq_timedsend 276 -__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend) +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend_time32) #define __NR_mq_timedreceive 277 -__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive) +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive_time32) #define __NR_mq_notify 278 __SYSCALL(__NR_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 279 @@ -646,7 +646,7 @@ __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 311 __SYSCALL(__NR_keyctl, compat_sys_keyctl) #define __NR_semtimedop 312 -__SYSCALL(__NR_semtimedop, compat_sys_semtimedop) +__SYSCALL(__NR_semtimedop, sys_semtimedop_time32) #define __NR_vserver 313 __SYSCALL(__NR_vserver, sys_ni_syscall) #define __NR_ioprio_set 314 @@ -674,7 +674,7 @@ __SYSCALL(__NR_mknodat, sys_mknodat) #define __NR_fchownat 325 __SYSCALL(__NR_fchownat, sys_fchownat) #define __NR_futimesat 326 -__SYSCALL(__NR_futimesat, compat_sys_futimesat) +__SYSCALL(__NR_futimesat, sys_futimesat_time32) #define __NR_fstatat64 327 __SYSCALL(__NR_fstatat64, sys_fstatat64) #define __NR_unlinkat 328 @@ -692,9 +692,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat) #define __NR_faccessat 334 __SYSCALL(__NR_faccessat, sys_faccessat) #define __NR_pselect6 335 -__SYSCALL(__NR_pselect6, compat_sys_pselect6) +__SYSCALL(__NR_pselect6, compat_sys_pselect6_time32) #define __NR_ppoll 336 -__SYSCALL(__NR_ppoll, compat_sys_ppoll) +__SYSCALL(__NR_ppoll, compat_sys_ppoll_time32) #define __NR_unshare 337 __SYSCALL(__NR_unshare, sys_unshare) #define __NR_set_robust_list 338 @@ -718,7 +718,7 @@ __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) #define __NR_kexec_load 347 __SYSCALL(__NR_kexec_load, compat_sys_kexec_load) #define __NR_utimensat 348 -__SYSCALL(__NR_utimensat, compat_sys_utimensat) +__SYSCALL(__NR_utimensat, sys_utimensat_time32) #define __NR_signalfd 349 __SYSCALL(__NR_signalfd, compat_sys_signalfd) #define __NR_timerfd_create 350 @@ -728,9 +728,9 @@ __SYSCALL(__NR_eventfd, sys_eventfd) #define __NR_fallocate 352 __SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate) #define __NR_timerfd_settime 353 -__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime) +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime32) #define __NR_timerfd_gettime 354 -__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime) +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime32) #define __NR_signalfd4 355 __SYSCALL(__NR_signalfd4, compat_sys_signalfd4) #define __NR_eventfd2 356 @@ -752,7 +752,7 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) #define __NR_perf_event_open 364 __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_recvmmsg 365 -__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg) +__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg_time32) #define __NR_accept4 366 __SYSCALL(__NR_accept4, sys_accept4) #define __NR_fanotify_init 367 @@ -766,7 +766,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) #define __NR_open_by_handle_at 371 __SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) #define __NR_clock_adjtime 372 -__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime) +__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime32) #define __NR_syncfs 373 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_sendmmsg 374 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index cc134b1211aa..6d1e019817c8 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -41,7 +41,7 @@ 31 n32 dup sys_dup 32 n32 dup2 sys_dup2 33 n32 pause sys_pause -34 n32 nanosleep compat_sys_nanosleep +34 n32 nanosleep sys_nanosleep_time32 35 n32 getitimer compat_sys_getitimer 36 n32 setitimer compat_sys_setitimer 37 n32 alarm sys_alarm @@ -133,11 +133,11 @@ 123 n32 capget sys_capget 124 n32 capset sys_capset 125 n32 rt_sigpending compat_sys_rt_sigpending -126 n32 rt_sigtimedwait compat_sys_rt_sigtimedwait +126 n32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 127 n32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo 128 n32 rt_sigsuspend compat_sys_rt_sigsuspend 129 n32 sigaltstack compat_sys_sigaltstack -130 n32 utime compat_sys_utime +130 n32 utime sys_utime32 131 n32 mknod sys_mknod 132 n32 personality sys_32_personality 133 n32 ustat compat_sys_ustat @@ -152,7 +152,7 @@ 142 n32 sched_getscheduler sys_sched_getscheduler 143 n32 sched_get_priority_max sys_sched_get_priority_max 144 n32 sched_get_priority_min sys_sched_get_priority_min -145 n32 sched_rr_get_interval compat_sys_sched_rr_get_interval +145 n32 sched_rr_get_interval sys_sched_rr_get_interval_time32 146 n32 mlock sys_mlock 147 n32 munlock sys_munlock 148 n32 mlockall sys_mlockall @@ -161,7 +161,7 @@ 151 n32 pivot_root sys_pivot_root 152 n32 _sysctl compat_sys_sysctl 153 n32 prctl sys_prctl -154 n32 adjtimex compat_sys_adjtimex +154 n32 adjtimex sys_adjtimex_time32 155 n32 setrlimit compat_sys_setrlimit 156 n32 chroot sys_chroot 157 n32 sync sys_sync @@ -202,7 +202,7 @@ 191 n32 fremovexattr sys_fremovexattr 192 n32 tkill sys_tkill 193 n32 reserved193 sys_ni_syscall -194 n32 futex compat_sys_futex +194 n32 futex sys_futex_time32 195 n32 sched_setaffinity compat_sys_sched_setaffinity 196 n32 sched_getaffinity compat_sys_sched_getaffinity 197 n32 cacheflush sys_cacheflush @@ -210,7 +210,7 @@ 199 n32 sysmips __sys_sysmips 200 n32 io_setup compat_sys_io_setup 201 n32 io_destroy sys_io_destroy -202 n32 io_getevents compat_sys_io_getevents +202 n32 io_getevents sys_io_getevents_time32 203 n32 io_submit compat_sys_io_submit 204 n32 io_cancel sys_io_cancel 205 n32 exit_group sys_exit_group @@ -223,29 +223,29 @@ 212 n32 fcntl64 compat_sys_fcntl64 213 n32 set_tid_address sys_set_tid_address 214 n32 restart_syscall sys_restart_syscall -215 n32 semtimedop compat_sys_semtimedop +215 n32 semtimedop sys_semtimedop_time32 216 n32 fadvise64 sys_fadvise64_64 217 n32 statfs64 compat_sys_statfs64 218 n32 fstatfs64 compat_sys_fstatfs64 219 n32 sendfile64 sys_sendfile64 220 n32 timer_create compat_sys_timer_create -221 n32 timer_settime compat_sys_timer_settime -222 n32 timer_gettime compat_sys_timer_gettime +221 n32 timer_settime sys_timer_settime32 +222 n32 timer_gettime sys_timer_gettime32 223 n32 timer_getoverrun sys_timer_getoverrun 224 n32 timer_delete sys_timer_delete -225 n32 clock_settime compat_sys_clock_settime -226 n32 clock_gettime compat_sys_clock_gettime -227 n32 clock_getres compat_sys_clock_getres -228 n32 clock_nanosleep compat_sys_clock_nanosleep +225 n32 clock_settime sys_clock_settime32 +226 n32 clock_gettime sys_clock_gettime32 +227 n32 clock_getres sys_clock_getres_time32 +228 n32 clock_nanosleep sys_clock_nanosleep_time32 229 n32 tgkill sys_tgkill -230 n32 utimes compat_sys_utimes +230 n32 utimes sys_utimes_time32 231 n32 mbind compat_sys_mbind 232 n32 get_mempolicy compat_sys_get_mempolicy 233 n32 set_mempolicy compat_sys_set_mempolicy 234 n32 mq_open compat_sys_mq_open 235 n32 mq_unlink sys_mq_unlink -236 n32 mq_timedsend compat_sys_mq_timedsend -237 n32 mq_timedreceive compat_sys_mq_timedreceive +236 n32 mq_timedsend sys_mq_timedsend_time32 +237 n32 mq_timedreceive sys_mq_timedreceive_time32 238 n32 mq_notify compat_sys_mq_notify 239 n32 mq_getsetattr compat_sys_mq_getsetattr 240 n32 vserver sys_ni_syscall @@ -263,7 +263,7 @@ 252 n32 mkdirat sys_mkdirat 253 n32 mknodat sys_mknodat 254 n32 fchownat sys_fchownat -255 n32 futimesat compat_sys_futimesat +255 n32 futimesat sys_futimesat_time32 256 n32 newfstatat sys_newfstatat 257 n32 unlinkat sys_unlinkat 258 n32 renameat sys_renameat @@ -272,8 +272,8 @@ 261 n32 readlinkat sys_readlinkat 262 n32 fchmodat sys_fchmodat 263 n32 faccessat sys_faccessat -264 n32 pselect6 compat_sys_pselect6 -265 n32 ppoll compat_sys_ppoll +264 n32 pselect6 compat_sys_pselect6_time32 +265 n32 ppoll compat_sys_ppoll_time32 266 n32 unshare sys_unshare 267 n32 splice sys_splice 268 n32 sync_file_range sys_sync_file_range @@ -287,14 +287,14 @@ 276 n32 epoll_pwait compat_sys_epoll_pwait 277 n32 ioprio_set sys_ioprio_set 278 n32 ioprio_get sys_ioprio_get -279 n32 utimensat compat_sys_utimensat +279 n32 utimensat sys_utimensat_time32 280 n32 signalfd compat_sys_signalfd 281 n32 timerfd sys_ni_syscall 282 n32 eventfd sys_eventfd 283 n32 fallocate sys_fallocate 284 n32 timerfd_create sys_timerfd_create -285 n32 timerfd_gettime compat_sys_timerfd_gettime -286 n32 timerfd_settime compat_sys_timerfd_settime +285 n32 timerfd_gettime sys_timerfd_gettime32 +286 n32 timerfd_settime sys_timerfd_settime32 287 n32 signalfd4 compat_sys_signalfd4 288 n32 eventfd2 sys_eventfd2 289 n32 epoll_create1 sys_epoll_create1 @@ -306,14 +306,14 @@ 295 n32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 296 n32 perf_event_open sys_perf_event_open 297 n32 accept4 sys_accept4 -298 n32 recvmmsg compat_sys_recvmmsg +298 n32 recvmmsg compat_sys_recvmmsg_time32 299 n32 getdents64 sys_getdents64 300 n32 fanotify_init sys_fanotify_init 301 n32 fanotify_mark sys_fanotify_mark 302 n32 prlimit64 sys_prlimit64 303 n32 name_to_handle_at sys_name_to_handle_at 304 n32 open_by_handle_at sys_open_by_handle_at -305 n32 clock_adjtime compat_sys_clock_adjtime +305 n32 clock_adjtime sys_clock_adjtime32 306 n32 syncfs sys_syncfs 307 n32 sendmmsg compat_sys_sendmmsg 308 n32 setns sys_setns diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index fa47ea8cc6ef..e9fec7bac5a9 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -20,7 +20,7 @@ 10 o32 unlink sys_unlink 11 o32 execve sys_execve compat_sys_execve 12 o32 chdir sys_chdir -13 o32 time sys_time compat_sys_time +13 o32 time sys_time sys_time32 14 o32 mknod sys_mknod 15 o32 chmod sys_chmod 16 o32 lchown sys_lchown @@ -33,13 +33,13 @@ 22 o32 umount sys_oldumount 23 o32 setuid sys_setuid 24 o32 getuid sys_getuid -25 o32 stime sys_stime compat_sys_stime +25 o32 stime sys_stime sys_stime32 26 o32 ptrace sys_ptrace compat_sys_ptrace 27 o32 alarm sys_alarm # 28 was sys_fstat 28 o32 unused28 sys_ni_syscall 29 o32 pause sys_pause -30 o32 utime sys_utime compat_sys_utime +30 o32 utime sys_utime sys_utime32 31 o32 stty sys_ni_syscall 32 o32 gtty sys_ni_syscall 33 o32 access sys_access @@ -135,7 +135,7 @@ 121 o32 setdomainname sys_setdomainname 122 o32 uname sys_newuname 123 o32 modify_ldt sys_ni_syscall -124 o32 adjtimex sys_adjtimex compat_sys_adjtimex +124 o32 adjtimex sys_adjtimex sys_adjtimex_time32 125 o32 mprotect sys_mprotect 126 o32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 o32 create_module sys_ni_syscall @@ -176,8 +176,8 @@ 162 o32 sched_yield sys_sched_yield 163 o32 sched_get_priority_max sys_sched_get_priority_max 164 o32 sched_get_priority_min sys_sched_get_priority_min -165 o32 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -166 o32 nanosleep sys_nanosleep compat_sys_nanosleep +165 o32 sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +166 o32 nanosleep sys_nanosleep sys_nanosleep_time32 167 o32 mremap sys_mremap 168 o32 accept sys_accept 169 o32 bind sys_bind @@ -208,7 +208,7 @@ 194 o32 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 195 o32 rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 196 o32 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -197 o32 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +197 o32 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 198 o32 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 199 o32 rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 200 o32 pread64 sys_pread64 sys_32_pread @@ -249,12 +249,12 @@ 235 o32 fremovexattr sys_fremovexattr 236 o32 tkill sys_tkill 237 o32 sendfile64 sys_sendfile64 -238 o32 futex sys_futex compat_sys_futex +238 o32 futex sys_futex sys_futex_time32 239 o32 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 240 o32 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 o32 io_setup sys_io_setup compat_sys_io_setup 242 o32 io_destroy sys_io_destroy -243 o32 io_getevents sys_io_getevents compat_sys_io_getevents +243 o32 io_getevents sys_io_getevents sys_io_getevents_time32 244 o32 io_submit sys_io_submit compat_sys_io_submit 245 o32 io_cancel sys_io_cancel 246 o32 exit_group sys_exit_group @@ -269,23 +269,23 @@ 255 o32 statfs64 sys_statfs64 compat_sys_statfs64 256 o32 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 257 o32 timer_create sys_timer_create compat_sys_timer_create -258 o32 timer_settime sys_timer_settime compat_sys_timer_settime -259 o32 timer_gettime sys_timer_gettime compat_sys_timer_gettime +258 o32 timer_settime sys_timer_settime sys_timer_settime32 +259 o32 timer_gettime sys_timer_gettime sys_timer_gettime32 260 o32 timer_getoverrun sys_timer_getoverrun 261 o32 timer_delete sys_timer_delete -262 o32 clock_settime sys_clock_settime compat_sys_clock_settime -263 o32 clock_gettime sys_clock_gettime compat_sys_clock_gettime -264 o32 clock_getres sys_clock_getres compat_sys_clock_getres -265 o32 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +262 o32 clock_settime sys_clock_settime sys_clock_settime32 +263 o32 clock_gettime sys_clock_gettime sys_clock_gettime32 +264 o32 clock_getres sys_clock_getres sys_clock_getres_time32 +265 o32 clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 266 o32 tgkill sys_tgkill -267 o32 utimes sys_utimes compat_sys_utimes +267 o32 utimes sys_utimes sys_utimes_time32 268 o32 mbind sys_mbind compat_sys_mbind 269 o32 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 270 o32 set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 o32 mq_open sys_mq_open compat_sys_mq_open 272 o32 mq_unlink sys_mq_unlink -273 o32 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -274 o32 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +273 o32 mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +274 o32 mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 275 o32 mq_notify sys_mq_notify compat_sys_mq_notify 276 o32 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 o32 vserver sys_ni_syscall @@ -303,7 +303,7 @@ 289 o32 mkdirat sys_mkdirat 290 o32 mknodat sys_mknodat 291 o32 fchownat sys_fchownat -292 o32 futimesat sys_futimesat compat_sys_futimesat +292 o32 futimesat sys_futimesat sys_futimesat_time32 293 o32 fstatat64 sys_fstatat64 sys_newfstatat 294 o32 unlinkat sys_unlinkat 295 o32 renameat sys_renameat @@ -312,8 +312,8 @@ 298 o32 readlinkat sys_readlinkat 299 o32 fchmodat sys_fchmodat 300 o32 faccessat sys_faccessat -301 o32 pselect6 sys_pselect6 compat_sys_pselect6 -302 o32 ppoll sys_ppoll compat_sys_ppoll +301 o32 pselect6 sys_pselect6 compat_sys_pselect6_time32 +302 o32 ppoll sys_ppoll compat_sys_ppoll_time32 303 o32 unshare sys_unshare 304 o32 splice sys_splice 305 o32 sync_file_range sys_sync_file_range sys32_sync_file_range @@ -327,14 +327,14 @@ 313 o32 epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 314 o32 ioprio_set sys_ioprio_set 315 o32 ioprio_get sys_ioprio_get -316 o32 utimensat sys_utimensat compat_sys_utimensat +316 o32 utimensat sys_utimensat sys_utimensat_time32 317 o32 signalfd sys_signalfd compat_sys_signalfd 318 o32 timerfd sys_ni_syscall 319 o32 eventfd sys_eventfd 320 o32 fallocate sys_fallocate sys32_fallocate 321 o32 timerfd_create sys_timerfd_create -322 o32 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime -323 o32 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime +322 o32 timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 +323 o32 timerfd_settime sys_timerfd_settime sys_timerfd_settime32 324 o32 signalfd4 sys_signalfd4 compat_sys_signalfd4 325 o32 eventfd2 sys_eventfd2 326 o32 epoll_create1 sys_epoll_create1 @@ -346,13 +346,13 @@ 332 o32 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 333 o32 perf_event_open sys_perf_event_open 334 o32 accept4 sys_accept4 -335 o32 recvmmsg sys_recvmmsg compat_sys_recvmmsg +335 o32 recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 336 o32 fanotify_init sys_fanotify_init 337 o32 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 338 o32 prlimit64 sys_prlimit64 339 o32 name_to_handle_at sys_name_to_handle_at 340 o32 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -341 o32 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +341 o32 clock_adjtime sys_clock_adjtime sys_clock_adjtime32 342 o32 syncfs sys_syncfs 343 o32 sendmmsg sys_sendmmsg compat_sys_sendmmsg 344 o32 setns sys_setns diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 71873bb72782..f7440427d459 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -20,7 +20,7 @@ 10 common unlink sys_unlink 11 common execve sys_execve compat_sys_execve 12 common chdir sys_chdir -13 common time sys_time compat_sys_time +13 common time sys_time sys_time32 14 common mknod sys_mknod 15 common chmod sys_chmod 16 common lchown sys_lchown @@ -32,12 +32,12 @@ 22 common bind sys_bind 23 common setuid sys_setuid 24 common getuid sys_getuid -25 common stime sys_stime compat_sys_stime +25 common stime sys_stime sys_stime32 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm 28 common fstat sys_newfstat compat_sys_newfstat 29 common pause sys_pause -30 common utime sys_utime compat_sys_utime +30 common utime sys_utime sys_utime32 31 common connect sys_connect 32 common listen sys_listen 33 common access sys_access @@ -133,7 +133,7 @@ 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile 123 common recvfrom sys_recvfrom -124 common adjtimex sys_adjtimex compat_sys_adjtimex +124 common adjtimex sys_adjtimex sys_adjtimex_time32 125 common mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask # 127 was create_module @@ -171,8 +171,8 @@ 158 common sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 163 common mremap sys_mremap 164 common setresuid sys_setresuid 165 common getresuid sys_getresuid @@ -187,7 +187,7 @@ 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common chown sys_chown @@ -223,14 +223,14 @@ 207 64 readahead sys_readahead 208 common tkill sys_tkill 209 common sendfile64 sys_sendfile64 compat_sys_sendfile64 -210 common futex sys_futex compat_sys_futex +210 common futex sys_futex sys_futex_time32 211 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 212 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity # 213 was set_thread_area # 214 was get_thread_area 215 common io_setup sys_io_setup compat_sys_io_setup 216 common io_destroy sys_io_destroy -217 common io_getevents sys_io_getevents compat_sys_io_getevents +217 common io_getevents sys_io_getevents sys_io_getevents_time32 218 common io_submit sys_io_submit compat_sys_io_submit 219 common io_cancel sys_io_cancel # 220 was alloc_hugepages @@ -241,11 +241,11 @@ 225 common epoll_ctl sys_epoll_ctl 226 common epoll_wait sys_epoll_wait 227 common remap_file_pages sys_remap_file_pages -228 common semtimedop sys_semtimedop compat_sys_semtimedop +228 common semtimedop sys_semtimedop sys_semtimedop_time32 229 common mq_open sys_mq_open compat_sys_mq_open 230 common mq_unlink sys_mq_unlink -231 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -232 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +231 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +232 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 233 common mq_notify sys_mq_notify compat_sys_mq_notify 234 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 235 common waitid sys_waitid compat_sys_waitid @@ -265,14 +265,14 @@ 248 common lremovexattr sys_lremovexattr 249 common fremovexattr sys_fremovexattr 250 common timer_create sys_timer_create compat_sys_timer_create -251 common timer_settime sys_timer_settime compat_sys_timer_settime -252 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +251 common timer_settime sys_timer_settime sys_timer_settime32 +252 common timer_gettime sys_timer_gettime sys_timer_gettime32 253 common timer_getoverrun sys_timer_getoverrun 254 common timer_delete sys_timer_delete -255 common clock_settime sys_clock_settime compat_sys_clock_settime -256 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -257 common clock_getres sys_clock_getres compat_sys_clock_getres -258 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +255 common clock_settime sys_clock_settime sys_clock_settime32 +256 common clock_gettime sys_clock_gettime sys_clock_gettime32 +257 common clock_getres sys_clock_getres sys_clock_getres_time32 +258 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 259 common tgkill sys_tgkill 260 common mbind sys_mbind compat_sys_mbind 261 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy @@ -287,13 +287,13 @@ 270 common inotify_add_watch sys_inotify_add_watch 271 common inotify_rm_watch sys_inotify_rm_watch 272 common migrate_pages sys_migrate_pages -273 common pselect6 sys_pselect6 compat_sys_pselect6 -274 common ppoll sys_ppoll compat_sys_ppoll +273 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +274 common ppoll sys_ppoll compat_sys_ppoll_time32 275 common openat sys_openat compat_sys_openat 276 common mkdirat sys_mkdirat 277 common mknodat sys_mknodat 278 common fchownat sys_fchownat -279 common futimesat sys_futimesat compat_sys_futimesat +279 common futimesat sys_futimesat sys_futimesat_time32 280 common fstatat64 sys_fstatat64 281 common unlinkat sys_unlinkat 282 common renameat sys_renameat @@ -316,15 +316,15 @@ 298 common statfs64 sys_statfs64 compat_sys_statfs64 299 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 300 common kexec_load sys_kexec_load compat_sys_kexec_load -301 common utimensat sys_utimensat compat_sys_utimensat +301 common utimensat sys_utimensat sys_utimensat_time32 302 common signalfd sys_signalfd compat_sys_signalfd # 303 was timerfd 304 common eventfd sys_eventfd 305 32 fallocate parisc_fallocate 305 64 fallocate sys_fallocate 306 common timerfd_create sys_timerfd_create -307 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -308 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +307 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +308 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 309 common signalfd4 sys_signalfd4 compat_sys_signalfd4 310 common eventfd2 sys_eventfd2 311 common epoll_create1 sys_epoll_create1 @@ -335,12 +335,12 @@ 316 common pwritev sys_pwritev compat_sys_pwritev 317 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 318 common perf_event_open sys_perf_event_open -319 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +319 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 320 common accept4 sys_accept4 321 common prlimit64 sys_prlimit64 322 common fanotify_init sys_fanotify_init 323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark -324 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +324 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 325 common name_to_handle_at sys_name_to_handle_at 326 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at 327 common syncfs sys_syncfs @@ -352,7 +352,7 @@ 333 common finit_module sys_finit_module 334 common sched_setattr sys_sched_setattr 335 common sched_getattr sys_sched_getattr -336 common utimes sys_utimes compat_sys_utimes +336 common utimes sys_utimes sys_utimes_time32 337 common renameat2 sys_renameat2 338 common seccomp sys_seccomp 339 common getrandom sys_getrandom diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 7555874ce39c..86650dcd2185 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -20,7 +20,7 @@ 10 common unlink sys_unlink 11 nospu execve sys_execve compat_sys_execve 12 common chdir sys_chdir -13 common time sys_time compat_sys_time +13 common time sys_time sys_time32 14 common mknod sys_mknod 15 common chmod sys_chmod 16 common lchown sys_lchown @@ -36,14 +36,14 @@ 22 spu umount sys_ni_syscall 23 common setuid sys_setuid 24 common getuid sys_getuid -25 common stime sys_stime compat_sys_stime +25 common stime sys_stime sys_stime32 26 nospu ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm 28 32 oldfstat sys_fstat sys_ni_syscall 28 64 oldfstat sys_ni_syscall 28 spu oldfstat sys_ni_syscall 29 nospu pause sys_pause -30 nospu utime sys_utime compat_sys_utime +30 nospu utime sys_utime sys_utime32 31 common stty sys_ni_syscall 32 common gtty sys_ni_syscall 33 common access sys_access @@ -157,7 +157,7 @@ 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall -124 common adjtimex sys_adjtimex compat_sys_adjtimex +124 common adjtimex sys_adjtimex sys_adjtimex_time32 125 common mprotect sys_mprotect 126 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 126 64 sigprocmask sys_ni_syscall @@ -198,8 +198,8 @@ 158 common sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 163 common mremap sys_mremap 164 common setresuid sys_setresuid 165 common getresuid sys_getresuid @@ -213,7 +213,7 @@ 173 nospu rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 174 nospu rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 175 nospu rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -176 nospu rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +176 nospu rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 common pread64 sys_pread64 compat_sys_pread64 @@ -260,7 +260,7 @@ 218 common removexattr sys_removexattr 219 common lremovexattr sys_lremovexattr 220 common fremovexattr sys_fremovexattr -221 common futex sys_futex compat_sys_futex +221 common futex sys_futex sys_futex_time32 222 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 223 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity # 224 unused @@ -268,7 +268,7 @@ 226 32 sendfile64 sys_sendfile64 compat_sys_sendfile64 227 common io_setup sys_io_setup compat_sys_io_setup 228 common io_destroy sys_io_destroy -229 common io_getevents sys_io_getevents compat_sys_io_getevents +229 common io_getevents sys_io_getevents sys_io_getevents_time32 230 common io_submit sys_io_submit compat_sys_io_submit 231 common io_cancel sys_io_cancel 232 nospu set_tid_address sys_set_tid_address @@ -280,19 +280,19 @@ 238 common epoll_wait sys_epoll_wait 239 common remap_file_pages sys_remap_file_pages 240 common timer_create sys_timer_create compat_sys_timer_create -241 common timer_settime sys_timer_settime compat_sys_timer_settime -242 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +241 common timer_settime sys_timer_settime sys_timer_settime32 +242 common timer_gettime sys_timer_gettime sys_timer_gettime32 243 common timer_getoverrun sys_timer_getoverrun 244 common timer_delete sys_timer_delete -245 common clock_settime sys_clock_settime compat_sys_clock_settime -246 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -247 common clock_getres sys_clock_getres compat_sys_clock_getres -248 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +245 common clock_settime sys_clock_settime sys_clock_settime32 +246 common clock_gettime sys_clock_gettime sys_clock_gettime32 +247 common clock_getres sys_clock_getres sys_clock_getres_time32 +248 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 249 32 swapcontext ppc_swapcontext ppc32_swapcontext 249 64 swapcontext ppc64_swapcontext 249 spu swapcontext sys_ni_syscall 250 common tgkill sys_tgkill -251 common utimes sys_utimes compat_sys_utimes +251 common utimes sys_utimes sys_utimes_time32 252 common statfs64 sys_statfs64 compat_sys_statfs64 253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 254 32 fadvise64_64 ppc_fadvise64_64 @@ -308,8 +308,8 @@ 261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 262 nospu mq_open sys_mq_open compat_sys_mq_open 263 nospu mq_unlink sys_mq_unlink -264 nospu mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -265 nospu mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +264 nospu mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +265 nospu mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 266 nospu mq_notify sys_mq_notify compat_sys_mq_notify 267 nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 268 nospu kexec_load sys_kexec_load compat_sys_kexec_load @@ -324,8 +324,8 @@ 277 nospu inotify_rm_watch sys_inotify_rm_watch 278 nospu spu_run sys_spu_run 279 nospu spu_create sys_spu_create -280 nospu pselect6 sys_pselect6 compat_sys_pselect6 -281 nospu ppoll sys_ppoll compat_sys_ppoll +280 nospu pselect6 sys_pselect6 compat_sys_pselect6_time32 +281 nospu ppoll sys_ppoll compat_sys_ppoll_time32 282 common unshare sys_unshare 283 common splice sys_splice 284 common tee sys_tee @@ -334,7 +334,7 @@ 287 common mkdirat sys_mkdirat 288 common mknodat sys_mknodat 289 common fchownat sys_fchownat -290 common futimesat sys_futimesat compat_sys_futimesat +290 common futimesat sys_futimesat sys_futimesat_time32 291 32 fstatat64 sys_fstatat64 291 64 newfstatat sys_newfstatat 291 spu newfstatat sys_newfstatat @@ -350,15 +350,15 @@ 301 common move_pages sys_move_pages compat_sys_move_pages 302 common getcpu sys_getcpu 303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -304 common utimensat sys_utimensat compat_sys_utimensat +304 common utimensat sys_utimensat sys_utimensat_time32 305 common signalfd sys_signalfd compat_sys_signalfd 306 common timerfd_create sys_timerfd_create 307 common eventfd sys_eventfd 308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2 309 nospu fallocate sys_fallocate compat_sys_fallocate 310 nospu subpage_prot sys_subpage_prot -311 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -312 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +311 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +312 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 313 common signalfd4 sys_signalfd4 compat_sys_signalfd4 314 common eventfd2 sys_eventfd2 315 common epoll_create1 sys_epoll_create1 @@ -389,11 +389,11 @@ 340 common getsockopt sys_getsockopt compat_sys_getsockopt 341 common sendmsg sys_sendmsg compat_sys_sendmsg 342 common recvmsg sys_recvmsg compat_sys_recvmsg -343 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +343 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 344 common accept4 sys_accept4 345 common name_to_handle_at sys_name_to_handle_at 346 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -347 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +347 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 348 common syncfs sys_syncfs 349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 350 common setns sys_setns diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 620e222003ca..285201cf1f83 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -20,7 +20,7 @@ 10 common unlink sys_unlink sys_unlink 11 common execve sys_execve compat_sys_execve 12 common chdir sys_chdir sys_chdir -13 32 time - compat_sys_time +13 32 time - sys_time32 14 common mknod sys_mknod sys_mknod 15 common chmod sys_chmod sys_chmod 16 32 lchown - sys_lchown16 @@ -30,11 +30,11 @@ 22 common umount sys_oldumount sys_oldumount 23 32 setuid - sys_setuid16 24 32 getuid - sys_getuid16 -25 32 stime - compat_sys_stime +25 32 stime - sys_stime32 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm sys_alarm 29 common pause sys_pause sys_pause -30 common utime sys_utime compat_sys_utime +30 common utime sys_utime sys_utime32 33 common access sys_access sys_access 34 common nice sys_nice sys_nice 36 common sync sys_sync sys_sync @@ -112,7 +112,7 @@ 120 common clone sys_clone sys_clone 121 common setdomainname sys_setdomainname sys_setdomainname 122 common uname sys_newuname sys_newuname -124 common adjtimex sys_adjtimex compat_sys_adjtimex +124 common adjtimex sys_adjtimex sys_adjtimex_time32 125 common mprotect sys_mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 common create_module - - @@ -150,8 +150,8 @@ 158 common sched_yield sys_sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 163 common mremap sys_mremap sys_mremap 164 32 setresuid - sys_setresuid16 165 32 getresuid - sys_getresuid16 @@ -165,7 +165,7 @@ 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common pread64 sys_pread64 compat_sys_s390_pread64 @@ -246,13 +246,13 @@ 235 common fremovexattr sys_fremovexattr sys_fremovexattr 236 common gettid sys_gettid sys_gettid 237 common tkill sys_tkill sys_tkill -238 common futex sys_futex compat_sys_futex +238 common futex sys_futex sys_futex_time32 239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 common tgkill sys_tgkill sys_tgkill 243 common io_setup sys_io_setup compat_sys_io_setup 244 common io_destroy sys_io_destroy sys_io_destroy -245 common io_getevents sys_io_getevents compat_sys_io_getevents +245 common io_getevents sys_io_getevents sys_io_getevents_time32 246 common io_submit sys_io_submit compat_sys_io_submit 247 common io_cancel sys_io_cancel sys_io_cancel 248 common exit_group sys_exit_group sys_exit_group @@ -262,14 +262,14 @@ 252 common set_tid_address sys_set_tid_address sys_set_tid_address 253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime compat_sys_timer_settime -256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +255 common timer_settime sys_timer_settime sys_timer_settime32 +256 common timer_gettime sys_timer_gettime sys_timer_gettime32 257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun 258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime compat_sys_clock_settime -260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -261 common clock_getres sys_clock_getres compat_sys_clock_getres -262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +259 common clock_settime sys_clock_settime sys_clock_settime32 +260 common clock_gettime sys_clock_gettime sys_clock_gettime32 +261 common clock_getres sys_clock_getres sys_clock_getres_time32 +262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 @@ -279,8 +279,8 @@ 270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open 272 common mq_unlink sys_mq_unlink sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 275 common mq_notify sys_mq_notify compat_sys_mq_notify 276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 common kexec_load sys_kexec_load compat_sys_kexec_load @@ -298,7 +298,7 @@ 289 common mkdirat sys_mkdirat sys_mkdirat 290 common mknodat sys_mknodat sys_mknodat 291 common fchownat sys_fchownat sys_fchownat -292 common futimesat sys_futimesat compat_sys_futimesat +292 common futimesat sys_futimesat sys_futimesat_time32 293 32 fstatat64 - compat_sys_s390_fstatat64 293 64 newfstatat sys_newfstatat - 294 common unlinkat sys_unlinkat sys_unlinkat @@ -308,8 +308,8 @@ 298 common readlinkat sys_readlinkat sys_readlinkat 299 common fchmodat sys_fchmodat sys_fchmodat 300 common faccessat sys_faccessat sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6 -302 common ppoll sys_ppoll compat_sys_ppoll +301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +302 common ppoll sys_ppoll compat_sys_ppoll_time32 303 common unshare sys_unshare sys_unshare 304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list @@ -320,15 +320,15 @@ 310 common move_pages sys_move_pages compat_sys_move_pages 311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes compat_sys_utimes +313 common utimes sys_utimes sys_utimes_time32 314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat compat_sys_utimensat +315 common utimensat sys_utimensat sys_utimensat_time32 316 common signalfd sys_signalfd compat_sys_signalfd 317 common timerfd - - 318 common eventfd sys_eventfd sys_eventfd 319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 323 common eventfd2 sys_eventfd2 sys_eventfd2 324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 @@ -344,7 +344,7 @@ 334 common prlimit64 sys_prlimit64 sys_prlimit64 335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at 336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns 340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv @@ -364,7 +364,7 @@ 354 common execveat sys_execveat compat_sys_execveat 355 common userfaultfd sys_userfaultfd sys_userfaultfd 356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 359 common socket sys_socket sys_socket 360 common socketpair sys_socketpair sys_socketpair diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index e63cd013cc77..7cb05b50aeaa 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -44,7 +44,7 @@ 28 common sigaltstack sys_sigaltstack compat_sys_sigaltstack 29 32 pause sys_pause 29 64 pause sys_nis_syscall -30 common utime sys_utime compat_sys_utime +30 common utime sys_utime sys_utime32 31 32 lchown32 sys_lchown 32 32 fchown32 sys_fchown 33 common access sys_access @@ -128,7 +128,7 @@ 102 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 103 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 104 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -105 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +105 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 106 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 107 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 108 32 setresuid32 sys_setresuid @@ -168,11 +168,11 @@ 135 common socketpair sys_socketpair 136 common mkdir sys_mkdir 137 common rmdir sys_rmdir -138 common utimes sys_utimes compat_sys_utimes +138 common utimes sys_utimes sys_utimes_time32 139 common stat64 sys_stat64 compat_sys_stat64 140 common sendfile64 sys_sendfile64 141 common getpeername sys_getpeername -142 common futex sys_futex compat_sys_futex +142 common futex sys_futex sys_futex_time32 143 common gettid sys_gettid 144 common getrlimit sys_getrlimit compat_sys_getrlimit 145 common setrlimit sys_setrlimit compat_sys_setrlimit @@ -258,7 +258,7 @@ 216 64 sigreturn sys_nis_syscall 217 common clone sys_clone 218 common ioprio_get sys_ioprio_get -219 32 adjtimex sys_adjtimex compat_sys_adjtimex +219 32 adjtimex sys_adjtimex sys_adjtimex_time32 219 64 adjtimex sys_sparc_adjtimex 220 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 220 64 sigprocmask sys_nis_syscall @@ -272,9 +272,9 @@ 228 common setfsuid sys_setfsuid16 229 common setfsgid sys_setfsgid16 230 common _newselect sys_select compat_sys_select -231 32 time sys_time compat_sys_time +231 32 time sys_time sys_time32 232 common splice sys_splice -233 common stime sys_stime compat_sys_stime +233 common stime sys_stime sys_stime32 234 common statfs64 sys_statfs64 compat_sys_statfs64 235 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 236 common _llseek sys_llseek @@ -289,8 +289,8 @@ 245 common sched_yield sys_sched_yield 246 common sched_get_priority_max sys_sched_get_priority_max 247 common sched_get_priority_min sys_sched_get_priority_min -248 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -249 common nanosleep sys_nanosleep compat_sys_nanosleep +248 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +249 common nanosleep sys_nanosleep sys_nanosleep_time32 250 32 mremap sys_mremap 250 64 mremap sys_64_mremap 251 common _sysctl sys_sysctl compat_sys_sysctl @@ -299,14 +299,14 @@ 254 32 nfsservctl sys_ni_syscall sys_nis_syscall 254 64 nfsservctl sys_nis_syscall 255 common sync_file_range sys_sync_file_range compat_sys_sync_file_range -256 common clock_settime sys_clock_settime compat_sys_clock_settime -257 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -258 common clock_getres sys_clock_getres compat_sys_clock_getres -259 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +256 common clock_settime sys_clock_settime sys_clock_settime32 +257 common clock_gettime sys_clock_gettime sys_clock_gettime32 +258 common clock_getres sys_clock_getres sys_clock_getres_time32 +259 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 260 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 261 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity -262 common timer_settime sys_timer_settime compat_sys_timer_settime -263 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +262 common timer_settime sys_timer_settime sys_timer_settime32 +263 common timer_gettime sys_timer_gettime sys_timer_gettime32 264 common timer_getoverrun sys_timer_getoverrun 265 common timer_delete sys_timer_delete 266 common timer_create sys_timer_create compat_sys_timer_create @@ -316,11 +316,11 @@ 269 common io_destroy sys_io_destroy 270 common io_submit sys_io_submit compat_sys_io_submit 271 common io_cancel sys_io_cancel -272 common io_getevents sys_io_getevents compat_sys_io_getevents +272 common io_getevents sys_io_getevents sys_io_getevents_time32 273 common mq_open sys_mq_open compat_sys_mq_open 274 common mq_unlink sys_mq_unlink -275 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -276 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +275 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +276 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 277 common mq_notify sys_mq_notify compat_sys_mq_notify 278 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 279 common waitid sys_waitid compat_sys_waitid @@ -332,7 +332,7 @@ 285 common mkdirat sys_mkdirat 286 common mknodat sys_mknodat 287 common fchownat sys_fchownat -288 common futimesat sys_futimesat compat_sys_futimesat +288 common futimesat sys_futimesat sys_futimesat_time32 289 common fstatat64 sys_fstatat64 compat_sys_fstatat64 290 common unlinkat sys_unlinkat 291 common renameat sys_renameat @@ -341,8 +341,8 @@ 294 common readlinkat sys_readlinkat 295 common fchmodat sys_fchmodat 296 common faccessat sys_faccessat -297 common pselect6 sys_pselect6 compat_sys_pselect6 -298 common ppoll sys_ppoll compat_sys_ppoll +297 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +298 common ppoll sys_ppoll compat_sys_ppoll_time32 299 common unshare sys_unshare 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 301 common get_robust_list sys_get_robust_list compat_sys_get_robust_list @@ -354,13 +354,13 @@ 307 common move_pages sys_move_pages compat_sys_move_pages 308 common getcpu sys_getcpu 309 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -310 common utimensat sys_utimensat compat_sys_utimensat +310 common utimensat sys_utimensat sys_utimensat_time32 311 common signalfd sys_signalfd compat_sys_signalfd 312 common timerfd_create sys_timerfd_create 313 common eventfd sys_eventfd 314 common fallocate sys_fallocate compat_sys_fallocate -315 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -316 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +315 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +316 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 317 common signalfd4 sys_signalfd4 compat_sys_signalfd4 318 common eventfd2 sys_eventfd2 319 common epoll_create1 sys_epoll_create1 @@ -372,13 +372,13 @@ 325 common pwritev sys_pwritev compat_sys_pwritev 326 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 327 common perf_event_open sys_perf_event_open -328 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +328 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 329 common fanotify_init sys_fanotify_init 330 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 331 common prlimit64 sys_prlimit64 332 common name_to_handle_at sys_name_to_handle_at 333 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -334 32 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +334 32 clock_adjtime sys_clock_adjtime sys_clock_adjtime32 334 64 clock_adjtime sys_sparc_clock_adjtime 335 common syncfs sys_syncfs 336 common sendmmsg sys_sendmmsg compat_sys_sendmmsg diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 2be1d0eb7754..7705d5ecad25 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -24,7 +24,7 @@ 10 i386 unlink sys_unlink __ia32_sys_unlink 11 i386 execve sys_execve __ia32_compat_sys_execve 12 i386 chdir sys_chdir __ia32_sys_chdir -13 i386 time sys_time __ia32_compat_sys_time +13 i386 time sys_time __ia32_sys_time32 14 i386 mknod sys_mknod __ia32_sys_mknod 15 i386 chmod sys_chmod __ia32_sys_chmod 16 i386 lchown sys_lchown16 __ia32_sys_lchown16 @@ -36,12 +36,12 @@ 22 i386 umount sys_oldumount __ia32_sys_oldumount 23 i386 setuid sys_setuid16 __ia32_sys_setuid16 24 i386 getuid sys_getuid16 __ia32_sys_getuid16 -25 i386 stime sys_stime __ia32_compat_sys_stime +25 i386 stime sys_stime __ia32_sys_stime32 26 i386 ptrace sys_ptrace __ia32_compat_sys_ptrace 27 i386 alarm sys_alarm __ia32_sys_alarm 28 i386 oldfstat sys_fstat __ia32_sys_fstat 29 i386 pause sys_pause __ia32_sys_pause -30 i386 utime sys_utime __ia32_compat_sys_utime +30 i386 utime sys_utime __ia32_sys_utime32 31 i386 stty 32 i386 gtty 33 i386 access sys_access __ia32_sys_access @@ -135,7 +135,7 @@ 121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname 122 i386 uname sys_newuname __ia32_sys_newuname 123 i386 modify_ldt sys_modify_ldt __ia32_sys_modify_ldt -124 i386 adjtimex sys_adjtimex __ia32_compat_sys_adjtimex +124 i386 adjtimex sys_adjtimex __ia32_sys_adjtimex_time32 125 i386 mprotect sys_mprotect __ia32_sys_mprotect 126 i386 sigprocmask sys_sigprocmask __ia32_compat_sys_sigprocmask 127 i386 create_module @@ -172,8 +172,8 @@ 158 i386 sched_yield sys_sched_yield __ia32_sys_sched_yield 159 i386 sched_get_priority_max sys_sched_get_priority_max __ia32_sys_sched_get_priority_max 160 i386 sched_get_priority_min sys_sched_get_priority_min __ia32_sys_sched_get_priority_min -161 i386 sched_rr_get_interval sys_sched_rr_get_interval __ia32_compat_sys_sched_rr_get_interval -162 i386 nanosleep sys_nanosleep __ia32_compat_sys_nanosleep +161 i386 sched_rr_get_interval sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval_time32 +162 i386 nanosleep sys_nanosleep __ia32_sys_nanosleep_time32 163 i386 mremap sys_mremap __ia32_sys_mremap 164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16 165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16 @@ -188,7 +188,7 @@ 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending -177 i386 rt_sigtimedwait sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait +177 i386 rt_sigtimedwait sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time32 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo 179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend 180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread @@ -251,14 +251,14 @@ 237 i386 fremovexattr sys_fremovexattr __ia32_sys_fremovexattr 238 i386 tkill sys_tkill __ia32_sys_tkill 239 i386 sendfile64 sys_sendfile64 __ia32_sys_sendfile64 -240 i386 futex sys_futex __ia32_compat_sys_futex +240 i386 futex sys_futex __ia32_sys_futex_time32 241 i386 sched_setaffinity sys_sched_setaffinity __ia32_compat_sys_sched_setaffinity 242 i386 sched_getaffinity sys_sched_getaffinity __ia32_compat_sys_sched_getaffinity 243 i386 set_thread_area sys_set_thread_area __ia32_sys_set_thread_area 244 i386 get_thread_area sys_get_thread_area __ia32_sys_get_thread_area 245 i386 io_setup sys_io_setup __ia32_compat_sys_io_setup 246 i386 io_destroy sys_io_destroy __ia32_sys_io_destroy -247 i386 io_getevents sys_io_getevents __ia32_compat_sys_io_getevents +247 i386 io_getevents sys_io_getevents __ia32_sys_io_getevents_time32 248 i386 io_submit sys_io_submit __ia32_compat_sys_io_submit 249 i386 io_cancel sys_io_cancel __ia32_sys_io_cancel 250 i386 fadvise64 sys_fadvise64 __ia32_compat_sys_x86_fadvise64 @@ -271,18 +271,18 @@ 257 i386 remap_file_pages sys_remap_file_pages __ia32_sys_remap_file_pages 258 i386 set_tid_address sys_set_tid_address __ia32_sys_set_tid_address 259 i386 timer_create sys_timer_create __ia32_compat_sys_timer_create -260 i386 timer_settime sys_timer_settime __ia32_compat_sys_timer_settime -261 i386 timer_gettime sys_timer_gettime __ia32_compat_sys_timer_gettime +260 i386 timer_settime sys_timer_settime __ia32_sys_timer_settime32 +261 i386 timer_gettime sys_timer_gettime __ia32_sys_timer_gettime32 262 i386 timer_getoverrun sys_timer_getoverrun __ia32_sys_timer_getoverrun 263 i386 timer_delete sys_timer_delete __ia32_sys_timer_delete -264 i386 clock_settime sys_clock_settime __ia32_compat_sys_clock_settime -265 i386 clock_gettime sys_clock_gettime __ia32_compat_sys_clock_gettime -266 i386 clock_getres sys_clock_getres __ia32_compat_sys_clock_getres -267 i386 clock_nanosleep sys_clock_nanosleep __ia32_compat_sys_clock_nanosleep +264 i386 clock_settime sys_clock_settime __ia32_sys_clock_settime32 +265 i386 clock_gettime sys_clock_gettime __ia32_sys_clock_gettime32 +266 i386 clock_getres sys_clock_getres __ia32_sys_clock_getres_time32 +267 i386 clock_nanosleep sys_clock_nanosleep __ia32_sys_clock_nanosleep_time32 268 i386 statfs64 sys_statfs64 __ia32_compat_sys_statfs64 269 i386 fstatfs64 sys_fstatfs64 __ia32_compat_sys_fstatfs64 270 i386 tgkill sys_tgkill __ia32_sys_tgkill -271 i386 utimes sys_utimes __ia32_compat_sys_utimes +271 i386 utimes sys_utimes __ia32_sys_utimes_time32 272 i386 fadvise64_64 sys_fadvise64_64 __ia32_compat_sys_x86_fadvise64_64 273 i386 vserver 274 i386 mbind sys_mbind __ia32_sys_mbind @@ -290,8 +290,8 @@ 276 i386 set_mempolicy sys_set_mempolicy __ia32_sys_set_mempolicy 277 i386 mq_open sys_mq_open __ia32_compat_sys_mq_open 278 i386 mq_unlink sys_mq_unlink __ia32_sys_mq_unlink -279 i386 mq_timedsend sys_mq_timedsend __ia32_compat_sys_mq_timedsend -280 i386 mq_timedreceive sys_mq_timedreceive __ia32_compat_sys_mq_timedreceive +279 i386 mq_timedsend sys_mq_timedsend __ia32_sys_mq_timedsend_time32 +280 i386 mq_timedreceive sys_mq_timedreceive __ia32_sys_mq_timedreceive_time32 281 i386 mq_notify sys_mq_notify __ia32_compat_sys_mq_notify 282 i386 mq_getsetattr sys_mq_getsetattr __ia32_compat_sys_mq_getsetattr 283 i386 kexec_load sys_kexec_load __ia32_compat_sys_kexec_load @@ -310,7 +310,7 @@ 296 i386 mkdirat sys_mkdirat __ia32_sys_mkdirat 297 i386 mknodat sys_mknodat __ia32_sys_mknodat 298 i386 fchownat sys_fchownat __ia32_sys_fchownat -299 i386 futimesat sys_futimesat __ia32_compat_sys_futimesat +299 i386 futimesat sys_futimesat __ia32_sys_futimesat_time32 300 i386 fstatat64 sys_fstatat64 __ia32_compat_sys_x86_fstatat 301 i386 unlinkat sys_unlinkat __ia32_sys_unlinkat 302 i386 renameat sys_renameat __ia32_sys_renameat @@ -319,8 +319,8 @@ 305 i386 readlinkat sys_readlinkat __ia32_sys_readlinkat 306 i386 fchmodat sys_fchmodat __ia32_sys_fchmodat 307 i386 faccessat sys_faccessat __ia32_sys_faccessat -308 i386 pselect6 sys_pselect6 __ia32_compat_sys_pselect6 -309 i386 ppoll sys_ppoll __ia32_compat_sys_ppoll +308 i386 pselect6 sys_pselect6 __ia32_compat_sys_pselect6_time32 +309 i386 ppoll sys_ppoll __ia32_compat_sys_ppoll_time32 310 i386 unshare sys_unshare __ia32_sys_unshare 311 i386 set_robust_list sys_set_robust_list __ia32_compat_sys_set_robust_list 312 i386 get_robust_list sys_get_robust_list __ia32_compat_sys_get_robust_list @@ -331,13 +331,13 @@ 317 i386 move_pages sys_move_pages __ia32_compat_sys_move_pages 318 i386 getcpu sys_getcpu __ia32_sys_getcpu 319 i386 epoll_pwait sys_epoll_pwait __ia32_sys_epoll_pwait -320 i386 utimensat sys_utimensat __ia32_compat_sys_utimensat +320 i386 utimensat sys_utimensat __ia32_sys_utimensat_time32 321 i386 signalfd sys_signalfd __ia32_compat_sys_signalfd 322 i386 timerfd_create sys_timerfd_create __ia32_sys_timerfd_create 323 i386 eventfd sys_eventfd __ia32_sys_eventfd 324 i386 fallocate sys_fallocate __ia32_compat_sys_x86_fallocate -325 i386 timerfd_settime sys_timerfd_settime __ia32_compat_sys_timerfd_settime -326 i386 timerfd_gettime sys_timerfd_gettime __ia32_compat_sys_timerfd_gettime +325 i386 timerfd_settime sys_timerfd_settime __ia32_sys_timerfd_settime32 +326 i386 timerfd_gettime sys_timerfd_gettime __ia32_sys_timerfd_gettime32 327 i386 signalfd4 sys_signalfd4 __ia32_compat_sys_signalfd4 328 i386 eventfd2 sys_eventfd2 __ia32_sys_eventfd2 329 i386 epoll_create1 sys_epoll_create1 __ia32_sys_epoll_create1 @@ -348,13 +348,13 @@ 334 i386 pwritev sys_pwritev __ia32_compat_sys_pwritev 335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo __ia32_compat_sys_rt_tgsigqueueinfo 336 i386 perf_event_open sys_perf_event_open __ia32_sys_perf_event_open -337 i386 recvmmsg sys_recvmmsg __ia32_compat_sys_recvmmsg +337 i386 recvmmsg sys_recvmmsg __ia32_compat_sys_recvmmsg_time32 338 i386 fanotify_init sys_fanotify_init __ia32_sys_fanotify_init 339 i386 fanotify_mark sys_fanotify_mark __ia32_compat_sys_fanotify_mark 340 i386 prlimit64 sys_prlimit64 __ia32_sys_prlimit64 341 i386 name_to_handle_at sys_name_to_handle_at __ia32_sys_name_to_handle_at 342 i386 open_by_handle_at sys_open_by_handle_at __ia32_compat_sys_open_by_handle_at -343 i386 clock_adjtime sys_clock_adjtime __ia32_compat_sys_clock_adjtime +343 i386 clock_adjtime sys_clock_adjtime __ia32_sys_clock_adjtime32 344 i386 syncfs sys_syncfs __ia32_sys_syncfs 345 i386 sendmmsg sys_sendmmsg __ia32_compat_sys_sendmmsg 346 i386 setns sys_setns __ia32_sys_setns diff --git a/fs/aio.c b/fs/aio.c index b906ff70c90f..4394d3fe116a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2198,11 +2198,11 @@ SYSCALL_DEFINE6(io_pgetevents_time32, #if defined(CONFIG_COMPAT_32BIT_TIME) -COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, - compat_long_t, min_nr, - compat_long_t, nr, - struct io_event __user *, events, - struct old_timespec32 __user *, timeout) +SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, + __s32, min_nr, + __s32, nr, + struct io_event __user *, events, + struct old_timespec32 __user *, timeout) { struct timespec64 t; int ret; diff --git a/fs/select.c b/fs/select.c index d0f35dbc0e8f..6cbc9ff56ba0 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1379,7 +1379,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp, #if defined(CONFIG_COMPAT_32BIT_TIME) -COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, +COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct old_timespec32 __user *, tsp, void __user *, sig) { @@ -1402,7 +1402,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, #endif #if defined(CONFIG_COMPAT_32BIT_TIME) -COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, +COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { diff --git a/fs/timerfd.c b/fs/timerfd.c index 803ca070d42e..6a6fc8aa1de7 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -560,7 +560,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, +SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags, const struct old_itimerspec32 __user *, utmr, struct old_itimerspec32 __user *, otmr) { @@ -577,7 +577,7 @@ COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return ret; } -COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd, +SYSCALL_DEFINE2(timerfd_gettime32, int, ufd, struct old_itimerspec32 __user *, otmr) { struct itimerspec64 kotmr; diff --git a/fs/utimes.c b/fs/utimes.c index bdcf2daf39c1..350c9c16ace1 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -224,8 +224,8 @@ SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times) * of sys_utimes. */ #ifdef __ARCH_WANT_SYS_UTIME32 -COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, - struct old_utimbuf32 __user *, t) +SYSCALL_DEFINE2(utime32, const char __user *, filename, + struct old_utimbuf32 __user *, t) { struct timespec64 tv[2]; @@ -240,7 +240,7 @@ COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, } #endif -COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags) +SYSCALL_DEFINE4(utimensat_time32, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags) { struct timespec64 tv[2]; @@ -276,14 +276,14 @@ static long do_compat_futimesat(unsigned int dfd, const char __user *filename, return do_utimes(dfd, filename, t ? tv : NULL, 0); } -COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, +SYSCALL_DEFINE3(futimesat_time32, unsigned int, dfd, const char __user *, filename, struct old_timeval32 __user *, t) { return do_compat_futimesat(dfd, filename, t); } -COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct old_timeval32 __user *, t) +SYSCALL_DEFINE2(utimes_time32, const char __user *, filename, struct old_timeval32 __user *, t) { return do_compat_futimesat(AT_FDCWD, filename, t); } diff --git a/include/linux/compat.h b/include/linux/compat.h index 657ca6abd855..ebddcb6cfcf8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -520,11 +520,6 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, u32 __user *iocb); -asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, - compat_long_t min_nr, - compat_long_t nr, - struct io_event __user *events, - struct old_timespec32 __user *timeout); asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id, compat_long_t min_nr, compat_long_t nr, @@ -617,7 +612,7 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, compat_size_t count); /* fs/select.c */ -asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, +asmlinkage long compat_sys_pselect6_time32(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct old_timespec32 __user *tsp, @@ -627,7 +622,7 @@ asmlinkage long compat_sys_pselect6_time64(int n, compat_ulong_t __user *inp, compat_ulong_t __user *exp, struct __kernel_timespec __user *tsp, void __user *sig); -asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, +asmlinkage long compat_sys_ppoll_time32(struct pollfd __user *ufds, unsigned int nfds, struct old_timespec32 __user *tsp, const compat_sigset_t __user *sigmask, @@ -657,19 +652,6 @@ asmlinkage long compat_sys_newfstat(unsigned int fd, /* fs/sync.c: No generic prototype for sync_file_range and sync_file_range2 */ -/* fs/timerfd.c */ -asmlinkage long compat_sys_timerfd_gettime(int ufd, - struct old_itimerspec32 __user *otmr); -asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, - const struct old_itimerspec32 __user *utmr, - struct old_itimerspec32 __user *otmr); - -/* fs/utimes.c */ -asmlinkage long compat_sys_utimensat(unsigned int dfd, - const char __user *filename, - struct old_timespec32 __user *t, - int flags); - /* kernel/exit.c */ asmlinkage long compat_sys_waitid(int, compat_pid_t, struct compat_siginfo __user *, int, @@ -678,9 +660,6 @@ asmlinkage long compat_sys_waitid(int, compat_pid_t, /* kernel/futex.c */ -asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, - struct old_timespec32 __user *utime, u32 __user *uaddr2, - u32 val3); asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len); @@ -688,10 +667,6 @@ asmlinkage long compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, compat_size_t __user *len_ptr); -/* kernel/hrtimer.c */ -asmlinkage long compat_sys_nanosleep(struct old_timespec32 __user *rqtp, - struct old_timespec32 __user *rmtp); - /* kernel/itimer.c */ asmlinkage long compat_sys_getitimer(int which, struct compat_itimerval __user *it); @@ -709,20 +684,6 @@ asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, asmlinkage long compat_sys_timer_create(clockid_t which_clock, struct compat_sigevent __user *timer_event_spec, timer_t __user *created_timer_id); -asmlinkage long compat_sys_timer_gettime(timer_t timer_id, - struct old_itimerspec32 __user *setting); -asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags, - struct old_itimerspec32 __user *new, - struct old_itimerspec32 __user *old); -asmlinkage long compat_sys_clock_settime(clockid_t which_clock, - struct old_timespec32 __user *tp); -asmlinkage long compat_sys_clock_gettime(clockid_t which_clock, - struct old_timespec32 __user *tp); -asmlinkage long compat_sys_clock_getres(clockid_t which_clock, - struct old_timespec32 __user *tp); -asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, - struct old_timespec32 __user *rqtp, - struct old_timespec32 __user *rmtp); /* kernel/ptrace.c */ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, @@ -735,8 +696,6 @@ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, compat_ulong_t __user *user_mask_ptr); -asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct old_timespec32 __user *interval); /* kernel/signal.c */ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, @@ -754,7 +713,7 @@ asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, compat_size_t sigsetsize); -asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, +asmlinkage long compat_sys_rt_sigtimedwait_time32(compat_sigset_t __user *uthese, struct compat_siginfo __user *uinfo, struct old_timespec32 __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigtimedwait_time64(compat_sigset_t __user *uthese, @@ -777,7 +736,6 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); -asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp); /* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); @@ -786,14 +744,6 @@ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); asmlinkage long compat_sys_mq_open(const char __user *u_name, int oflag, compat_mode_t mode, struct compat_mq_attr __user *u_attr); -asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, - const char __user *u_msg_ptr, - compat_size_t msg_len, unsigned int msg_prio, - const struct old_timespec32 __user *u_abs_timeout); -asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, - char __user *u_msg_ptr, - compat_size_t msg_len, unsigned int __user *u_msg_prio, - const struct old_timespec32 __user *u_abs_timeout); asmlinkage long compat_sys_mq_notify(mqd_t mqdes, const struct compat_sigevent __user *u_notification); asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, @@ -809,8 +759,6 @@ asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, /* ipc/sem.c */ asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); -asmlinkage long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned nsems, const struct old_timespec32 __user *timeout); /* ipc/shm.c */ asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr); @@ -868,7 +816,7 @@ asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, asmlinkage long compat_sys_recvmmsg_time64(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags, struct __kernel_timespec __user *timeout); -asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, +asmlinkage long compat_sys_recvmmsg_time32(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags, struct old_timespec32 __user *timeout); asmlinkage long compat_sys_wait4(compat_pid_t pid, @@ -879,8 +827,6 @@ asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); -asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, - struct old_timex32 __user *tp); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, @@ -921,8 +867,6 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd, /* __ARCH_WANT_SYSCALL_NO_AT */ asmlinkage long compat_sys_open(const char __user *filename, int flags, umode_t mode); -asmlinkage long compat_sys_utimes(const char __user *filename, - struct old_timeval32 __user *t); /* __ARCH_WANT_SYSCALL_NO_FLAGS */ asmlinkage long compat_sys_signalfd(int ufd, @@ -936,12 +880,6 @@ asmlinkage long compat_sys_newlstat(const char __user *filename, struct compat_stat __user *statbuf); /* __ARCH_WANT_SYSCALL_DEPRECATED */ -asmlinkage long compat_sys_time(old_time32_t __user *tloc); -asmlinkage long compat_sys_utime(const char __user *filename, - struct old_utimbuf32 __user *t); -asmlinkage long compat_sys_futimesat(unsigned int dfd, - const char __user *filename, - struct old_timeval32 __user *t); asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct old_timeval32 __user *tvp); @@ -976,9 +914,6 @@ asmlinkage long compat_sys_sigaction(int sig, struct compat_old_sigaction __user *oact); #endif -/* obsolete: kernel/time/time.c */ -asmlinkage long compat_sys_stime(old_time32_t __user *tptr); - /* obsolete: net/socket.c */ asmlinkage long compat_sys_socketcall(int call, u32 __user *args); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 09330d5bda0c..94369f5bd8e5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -297,6 +297,11 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id, long nr, struct io_event __user *events, struct __kernel_timespec __user *timeout); +asmlinkage long sys_io_getevents_time32(__u32 ctx_id, + __s32 min_nr, + __s32 nr, + struct io_event __user *events, + struct old_timespec32 __user *timeout); asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long min_nr, long nr, @@ -522,11 +527,19 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags, const struct __kernel_itimerspec __user *utmr, struct __kernel_itimerspec __user *otmr); asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr); +asmlinkage long sys_timerfd_gettime32(int ufd, + struct old_itimerspec32 __user *otmr); +asmlinkage long sys_timerfd_settime32(int ufd, int flags, + const struct old_itimerspec32 __user *utmr, + struct old_itimerspec32 __user *otmr); /* fs/utimes.c */ asmlinkage long sys_utimensat(int dfd, const char __user *filename, struct __kernel_timespec __user *utimes, int flags); +asmlinkage long sys_utimensat_time32(unsigned int dfd, + const char __user *filename, + struct old_timespec32 __user *t, int flags); /* kernel/acct.c */ asmlinkage long sys_acct(const char __user *name); @@ -555,6 +568,9 @@ asmlinkage long sys_unshare(unsigned long unshare_flags); asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, struct __kernel_timespec __user *utime, u32 __user *uaddr2, u32 val3); +asmlinkage long sys_futex_time32(u32 __user *uaddr, int op, u32 val, + struct old_timespec32 __user *utime, u32 __user *uaddr2, + u32 val3); asmlinkage long sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr); @@ -564,6 +580,8 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, /* kernel/hrtimer.c */ asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); +asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, + struct old_timespec32 __user *rmtp); /* kernel/itimer.c */ asmlinkage long sys_getitimer(int which, struct itimerval __user *value); @@ -602,6 +620,20 @@ asmlinkage long sys_clock_getres(clockid_t which_clock, asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, const struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); +asmlinkage long sys_timer_gettime32(timer_t timer_id, + struct old_itimerspec32 __user *setting); +asmlinkage long sys_timer_settime32(timer_t timer_id, int flags, + struct old_itimerspec32 __user *new, + struct old_itimerspec32 __user *old); +asmlinkage long sys_clock_settime32(clockid_t which_clock, + struct old_timespec32 __user *tp); +asmlinkage long sys_clock_gettime32(clockid_t which_clock, + struct old_timespec32 __user *tp); +asmlinkage long sys_clock_getres_time32(clockid_t which_clock, + struct old_timespec32 __user *tp); +asmlinkage long sys_clock_nanosleep_time32(clockid_t which_clock, int flags, + struct old_timespec32 __user *rqtp, + struct old_timespec32 __user *rmtp); /* kernel/printk.c */ asmlinkage long sys_syslog(int type, char __user *buf, int len); @@ -627,6 +659,8 @@ asmlinkage long sys_sched_get_priority_max(int policy); asmlinkage long sys_sched_get_priority_min(int policy); asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct __kernel_timespec __user *interval); +asmlinkage long sys_sched_rr_get_interval_time32(pid_t pid, + struct old_timespec32 __user *interval); /* kernel/signal.c */ asmlinkage long sys_restart_syscall(void); @@ -696,6 +730,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, asmlinkage long sys_settimeofday(struct timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); +asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p); /* kernel/timer.c */ asmlinkage long sys_getpid(void); @@ -714,6 +749,14 @@ asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct __kernel_timespec __user *abs_timeout); asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); +asmlinkage long sys_mq_timedreceive_time32(mqd_t mqdes, + char __user *u_msg_ptr, + unsigned int msg_len, unsigned int __user *u_msg_prio, + const struct old_timespec32 __user *u_abs_timeout); +asmlinkage long sys_mq_timedsend_time32(mqd_t mqdes, + const char __user *u_msg_ptr, + unsigned int msg_len, unsigned int msg_prio, + const struct old_timespec32 __user *u_abs_timeout); /* ipc/msg.c */ asmlinkage long sys_msgget(key_t key, int msgflg); @@ -731,6 +774,9 @@ asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct __kernel_timespec __user *timeout); +asmlinkage long sys_semtimedop_time32(int semid, struct sembuf __user *sops, + unsigned nsops, + const struct old_timespec32 __user *timeout); asmlinkage long sys_semop(int semid, struct sembuf __user *sops, unsigned nsops); @@ -871,6 +917,8 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, int flags); asmlinkage long sys_clock_adjtime(clockid_t which_clock, struct __kernel_timex __user *tx); +asmlinkage long sys_clock_adjtime32(clockid_t which_clock, + struct old_timex32 __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, @@ -1006,6 +1054,7 @@ asmlinkage long sys_alarm(unsigned int seconds); asmlinkage long sys_getpgrp(void); asmlinkage long sys_pause(void); asmlinkage long sys_time(time_t __user *tloc); +asmlinkage long sys_time32(old_time32_t __user *tloc); #ifdef __ARCH_WANT_SYS_UTIME asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times); @@ -1014,6 +1063,13 @@ asmlinkage long sys_utimes(char __user *filename, asmlinkage long sys_futimesat(int dfd, const char __user *filename, struct timeval __user *utimes); #endif +asmlinkage long sys_futimesat_time32(unsigned int dfd, + const char __user *filename, + struct old_timeval32 __user *t); +asmlinkage long sys_utime32(const char __user *filename, + struct old_utimbuf32 __user *t); +asmlinkage long sys_utimes_time32(const char __user *filename, + struct old_timeval32 __user *t); asmlinkage long sys_creat(const char __user *pathname, umode_t mode); asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user *dirent, @@ -1038,6 +1094,7 @@ asmlinkage long sys_fork(void); /* obsolete: kernel/time/time.c */ asmlinkage long sys_stime(time_t __user *tptr); +asmlinkage long sys_stime32(old_time32_t __user *tptr); /* obsolete: kernel/signal.c */ asmlinkage long sys_sigpending(old_sigset_t __user *uset); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 509484dbfd5d..153b55b94234 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -39,7 +39,7 @@ __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit) #define __NR_io_cancel 3 __SYSCALL(__NR_io_cancel, sys_io_cancel) #define __NR_io_getevents 4 -__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents) +__SC_COMP(__NR_io_getevents, sys_io_getevents, sys_io_getevents_time32) /* fs/xattr.c */ #define __NR_setxattr 5 @@ -223,9 +223,9 @@ __SYSCALL(__NR3264_sendfile, sys_sendfile64) /* fs/select.c */ #define __NR_pselect6 72 -__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6) +__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6_time32) #define __NR_ppoll 73 -__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll) +__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll_time32) /* fs/signalfd.c */ #define __NR_signalfd4 74 @@ -271,14 +271,14 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \ __SYSCALL(__NR_timerfd_create, sys_timerfd_create) #define __NR_timerfd_settime 86 __SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \ - compat_sys_timerfd_settime) + sys_timerfd_settime32) #define __NR_timerfd_gettime 87 __SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \ - compat_sys_timerfd_gettime) + sys_timerfd_gettime32) /* fs/utimes.c */ #define __NR_utimensat 88 -__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat) +__SC_COMP(__NR_utimensat, sys_utimensat, sys_utimensat_time32) /* kernel/acct.c */ #define __NR_acct 89 @@ -310,7 +310,7 @@ __SYSCALL(__NR_unshare, sys_unshare) /* kernel/futex.c */ #define __NR_futex 98 -__SC_COMP(__NR_futex, sys_futex, compat_sys_futex) +__SC_COMP(__NR_futex, sys_futex, sys_futex_time32) #define __NR_set_robust_list 99 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ compat_sys_set_robust_list) @@ -320,7 +320,7 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \ /* kernel/hrtimer.c */ #define __NR_nanosleep 101 -__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep) +__SC_COMP(__NR_nanosleep, sys_nanosleep, sys_nanosleep_time32) /* kernel/itimer.c */ #define __NR_getitimer 102 @@ -342,22 +342,22 @@ __SYSCALL(__NR_delete_module, sys_delete_module) #define __NR_timer_create 107 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create) #define __NR_timer_gettime 108 -__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime) +__SC_COMP(__NR_timer_gettime, sys_timer_gettime, sys_timer_gettime32) #define __NR_timer_getoverrun 109 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) #define __NR_timer_settime 110 -__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime) +__SC_COMP(__NR_timer_settime, sys_timer_settime, sys_timer_settime32) #define __NR_timer_delete 111 __SYSCALL(__NR_timer_delete, sys_timer_delete) #define __NR_clock_settime 112 -__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime) +__SC_COMP(__NR_clock_settime, sys_clock_settime, sys_clock_settime32) #define __NR_clock_gettime 113 -__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime) +__SC_COMP(__NR_clock_gettime, sys_clock_gettime, sys_clock_gettime32) #define __NR_clock_getres 114 -__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres) +__SC_COMP(__NR_clock_getres, sys_clock_getres, sys_clock_getres_time32) #define __NR_clock_nanosleep 115 __SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \ - compat_sys_clock_nanosleep) + sys_clock_nanosleep_time32) /* kernel/printk.c */ #define __NR_syslog 116 @@ -390,7 +390,7 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) #define __NR_sched_rr_get_interval 127 __SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \ - compat_sys_sched_rr_get_interval) + sys_sched_rr_get_interval_time32) /* kernel/signal.c */ #define __NR_restart_syscall 128 @@ -413,7 +413,7 @@ __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask) __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending) #define __NR_rt_sigtimedwait 137 __SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \ - compat_sys_rt_sigtimedwait) + compat_sys_rt_sigtimedwait_time32) #define __NR_rt_sigqueueinfo 138 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \ compat_sys_rt_sigqueueinfo) @@ -486,7 +486,7 @@ __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday) #define __NR_settimeofday 170 __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) #define __NR_adjtimex 171 -__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex) +__SC_COMP(__NR_adjtimex, sys_adjtimex, sys_adjtimex_time32) /* kernel/timer.c */ #define __NR_getpid 172 @@ -512,10 +512,10 @@ __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 181 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) #define __NR_mq_timedsend 182 -__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend) +__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, sys_mq_timedsend_time32) #define __NR_mq_timedreceive 183 __SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \ - compat_sys_mq_timedreceive) + sys_mq_timedreceive_time32) #define __NR_mq_notify 184 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 185 @@ -537,7 +537,7 @@ __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 191 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) #define __NR_semtimedop 192 -__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop) +__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32) #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) @@ -659,7 +659,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_accept4 242 __SYSCALL(__NR_accept4, sys_accept4) #define __NR_recvmmsg 243 -__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg) +__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg_time32) /* * Architectures may provide up to 16 syscalls of their own @@ -681,7 +681,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) __SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \ compat_sys_open_by_handle_at) #define __NR_clock_adjtime 266 -__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) +__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, sys_clock_adjtime32) #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c595bed7bfcb..c839bf83231d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1471,10 +1471,10 @@ static int compat_prepare_timeout(const struct old_timespec32 __user *p, return 0; } -COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, - const char __user *, u_msg_ptr, - compat_size_t, msg_len, unsigned int, msg_prio, - const struct old_timespec32 __user *, u_abs_timeout) +SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes, + const char __user *, u_msg_ptr, + unsigned int, msg_len, unsigned int, msg_prio, + const struct old_timespec32 __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { @@ -1486,10 +1486,10 @@ COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p); } -COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, - char __user *, u_msg_ptr, - compat_size_t, msg_len, unsigned int __user *, u_msg_prio, - const struct old_timespec32 __user *, u_abs_timeout) +SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes, + char __user *, u_msg_ptr, + unsigned int, msg_len, unsigned int __user *, u_msg_prio, + const struct old_timespec32 __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { diff --git a/ipc/sem.c b/ipc/sem.c index d1efff3a81bb..80909464acff 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2250,7 +2250,7 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, return do_semtimedop(semid, tsems, nsops, NULL); } -COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, +SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems, unsigned int, nsops, const struct old_timespec32 __user *, timeout) { diff --git a/kernel/futex.c b/kernel/futex.c index be3bff2315ff..caead6c113d4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3812,7 +3812,7 @@ err_unlock: #endif /* CONFIG_COMPAT */ #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, +SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, struct old_timespec32 __user *, utime, u32 __user *, uaddr2, u32, val3) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a674c7db2f29..62862419cd05 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5252,9 +5252,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, - compat_pid_t, pid, - struct old_timespec32 __user *, interval) +SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, + struct old_timespec32 __user *, interval) { struct timespec64 t; int retval = sched_rr_get_interval(pid, &t); diff --git a/kernel/signal.c b/kernel/signal.c index e1d7ad8e6ab1..af27629918cf 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3397,7 +3397,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time64, compat_sigset_t __user *, uthese, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, +COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese, struct compat_siginfo __user *, uinfo, struct old_timespec32 __user *, uts, compat_size_t, sigsetsize) { diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index ce04431a40d1..85e5ccec0955 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -42,9 +42,11 @@ COND_SYSCALL(io_destroy); COND_SYSCALL(io_submit); COND_SYSCALL_COMPAT(io_submit); COND_SYSCALL(io_cancel); +COND_SYSCALL(io_getevents_time32); COND_SYSCALL(io_getevents); +COND_SYSCALL(io_pgetevents_time32); COND_SYSCALL(io_pgetevents); -COND_SYSCALL_COMPAT(io_getevents); +COND_SYSCALL_COMPAT(io_pgetevents_time32); COND_SYSCALL_COMPAT(io_pgetevents); /* fs/xattr.c */ @@ -114,9 +116,9 @@ COND_SYSCALL_COMPAT(signalfd4); /* fs/timerfd.c */ COND_SYSCALL(timerfd_create); COND_SYSCALL(timerfd_settime); -COND_SYSCALL_COMPAT(timerfd_settime); +COND_SYSCALL(timerfd_settime32); COND_SYSCALL(timerfd_gettime); -COND_SYSCALL_COMPAT(timerfd_gettime); +COND_SYSCALL(timerfd_gettime32); /* fs/utimes.c */ @@ -135,7 +137,7 @@ COND_SYSCALL(capset); /* kernel/futex.c */ COND_SYSCALL(futex); -COND_SYSCALL_COMPAT(futex); +COND_SYSCALL(futex_time32); COND_SYSCALL(set_robust_list); COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); @@ -187,9 +189,9 @@ COND_SYSCALL(mq_open); COND_SYSCALL_COMPAT(mq_open); COND_SYSCALL(mq_unlink); COND_SYSCALL(mq_timedsend); -COND_SYSCALL_COMPAT(mq_timedsend); +COND_SYSCALL(mq_timedsend_time32); COND_SYSCALL(mq_timedreceive); -COND_SYSCALL_COMPAT(mq_timedreceive); +COND_SYSCALL(mq_timedreceive_time32); COND_SYSCALL(mq_notify); COND_SYSCALL_COMPAT(mq_notify); COND_SYSCALL(mq_getsetattr); @@ -211,7 +213,7 @@ COND_SYSCALL(old_semctl); COND_SYSCALL(semctl); COND_SYSCALL_COMPAT(semctl); COND_SYSCALL(semtimedop); -COND_SYSCALL_COMPAT(semtimedop); +COND_SYSCALL(semtimedop_time32); COND_SYSCALL(semop); /* ipc/shm.c */ @@ -288,7 +290,7 @@ COND_SYSCALL(perf_event_open); COND_SYSCALL(accept4); COND_SYSCALL(recvmmsg); COND_SYSCALL(recvmmsg_time32); -COND_SYSCALL_COMPAT(recvmmsg); +COND_SYSCALL_COMPAT(recvmmsg_time32); COND_SYSCALL_COMPAT(recvmmsg_time64); /* diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f5cfa1b73d6f..0f5f96075110 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1771,7 +1771,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(nanosleep, struct old_timespec32 __user *, rqtp, +SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, struct old_timespec32 __user *, rmtp) { struct timespec64 tu; diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index a51895486e5e..67df65f887ac 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -45,6 +45,7 @@ SYS_NI(timer_delete); SYS_NI(clock_adjtime); SYS_NI(getitimer); SYS_NI(setitimer); +SYS_NI(clock_adjtime32); #ifdef __ARCH_WANT_SYS_ALARM SYS_NI(alarm); #endif @@ -150,16 +151,16 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, #ifdef CONFIG_COMPAT COMPAT_SYS_NI(timer_create); -COMPAT_SYS_NI(clock_adjtime); -COMPAT_SYS_NI(timer_settime); -COMPAT_SYS_NI(timer_gettime); COMPAT_SYS_NI(getitimer); COMPAT_SYS_NI(setitimer); #endif #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYS_NI(timer_settime32); +SYS_NI(timer_gettime32); + +SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock, + struct old_timespec32 __user *, tp) { struct timespec64 new_tp; @@ -171,8 +172,8 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, return do_sys_settimeofday64(&new_tp, NULL); } -COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { int ret; struct timespec64 kernel_tp; @@ -186,8 +187,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, return 0; } -COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { struct timespec64 rtn_tp = { .tv_sec = 0, @@ -206,9 +207,9 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, } } -COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, - struct old_timespec32 __user *, rqtp, - struct old_timespec32 __user *, rmtp) +SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, + struct old_timespec32 __user *, rqtp, + struct old_timespec32 __user *, rmtp) { struct timespec64 t; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index de79f85ae14f..29176635991f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -730,8 +730,8 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, - struct old_itimerspec32 __user *, setting) +SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, + struct old_itimerspec32 __user *, setting) { struct itimerspec64 cur_setting; @@ -903,9 +903,9 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, - struct old_itimerspec32 __user *, new, - struct old_itimerspec32 __user *, old) +SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags, + struct old_itimerspec32 __user *, new, + struct old_itimerspec32 __user *, old) { struct itimerspec64 new_spec, old_spec; struct itimerspec64 *rtn = old ? &old_spec : NULL; @@ -1096,8 +1096,8 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; @@ -1111,8 +1111,8 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, return kc->clock_set(which_clock, &ts); } -COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; @@ -1129,8 +1129,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, return err; } -COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, - struct old_timex32 __user *, utp) +SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock, + struct old_timex32 __user *, utp) { struct __kernel_timex ktx; int err; @@ -1147,8 +1147,8 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, return err; } -COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; @@ -1204,9 +1204,9 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, - struct old_timespec32 __user *, rqtp, - struct old_timespec32 __user *, rmtp) +SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, + struct old_timespec32 __user *, rqtp, + struct old_timespec32 __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 t; diff --git a/kernel/time/time.c b/kernel/time/time.c index 78b5c8f1495a..6261f969dcb7 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -98,11 +98,11 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr) #endif /* __ARCH_WANT_SYS_TIME */ -#ifdef CONFIG_COMPAT +#ifdef CONFIG_COMPAT_32BIT_TIME #ifdef __ARCH_WANT_COMPAT_SYS_TIME /* old_time32_t is a 32 bit "long" and needs to get converted. */ -COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc) +SYSCALL_DEFINE1(time32, old_time32_t __user *, tloc) { old_time32_t i; @@ -116,7 +116,7 @@ COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc) return i; } -COMPAT_SYSCALL_DEFINE1(stime, old_time32_t __user *, tptr) +SYSCALL_DEFINE1(stime32, old_time32_t __user *, tptr) { struct timespec64 tv; int err; @@ -344,7 +344,7 @@ int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex return 0; } -COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp) +SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp) { struct __kernel_timex txc; int err, ret; diff --git a/net/compat.c b/net/compat.c index 959d1c51826d..2fef7b9db434 100644 --- a/net/compat.c +++ b/net/compat.c @@ -822,7 +822,7 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, +COMPAT_SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct old_timespec32 __user *, timeout) { -- cgit From c70a772fda11570ebddecbce1543a3fda008db4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Jan 2019 00:00:34 +0100 Subject: y2038: remove struct definition redirects We now use 64-bit time_t on all architectures, so the __kernel_timex, __kernel_timeval and __kernel_timespec redirects can be removed after having served their purpose. This makes it all much less confusing, as the __kernel_* types now always refer to the same layout based on 64-bit time_t across all 32-bit and 64-bit architectures. Signed-off-by: Arnd Bergmann --- include/linux/time64.h | 8 -------- include/linux/timex.h | 7 ------- include/uapi/linux/time.h | 4 ---- include/uapi/linux/timex.h | 2 -- 4 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 05634afba0db..f38d382ffec1 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -7,14 +7,6 @@ typedef __s64 time64_t; typedef __u64 timeu64_t; -/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path - * and 32-bit emulation. - */ -#ifndef CONFIG_64BIT_TIME -#define __kernel_timespec timespec -#define __kernel_itimerspec itimerspec -#endif - #include struct timespec64 { diff --git a/include/linux/timex.h b/include/linux/timex.h index 4aff9f0d1367..ce0859763670 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,13 +53,6 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H -/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path - * and 32-bit emulation. - */ -#ifndef CONFIG_64BIT_TIME -#define __kernel_timex timex -#endif - #include #define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */ diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 6b56a2208be7..b03f8717c312 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -42,19 +42,15 @@ struct itimerval { struct timeval it_value; /* current value */ }; -#ifndef __kernel_timespec struct __kernel_timespec { __kernel_time64_t tv_sec; /* seconds */ long long tv_nsec; /* nanoseconds */ }; -#endif -#ifndef __kernel_itimerspec struct __kernel_itimerspec { struct __kernel_timespec it_interval; /* timer period */ struct __kernel_timespec it_value; /* timer expiration */ }; -#endif /* * legacy timeval structure, only embedded in structures that diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index a1c6b73016a5..9f517f9010bb 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -97,7 +97,6 @@ struct __kernel_timex_timeval { long long tv_usec; }; -#ifndef __kernel_timex struct __kernel_timex { unsigned int modes; /* mode selector */ int :32; /* pad */ @@ -131,7 +130,6 @@ struct __kernel_timex { int :32; int :32; int :32; int :32; int :32; int :32; int :32; }; -#endif /* * Mode codes (timex.mode) -- cgit From a4f342b9607d8c2034d3135cbbb11b4028be3678 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 4 Feb 2019 11:09:48 +0000 Subject: PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret Tested-by: Matthias Kaehlcke Reviewed-by: Matthias Kaehlcke Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 6 +++ 2 files changed, 105 insertions(+) (limited to 'include/linux') diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 06f0f632ec47..cd58959e5158 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "opp.h" @@ -1047,3 +1048,101 @@ struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) return of_node_get(opp->np); } EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); + +/* + * Callback function provided to the Energy Model framework upon registration. + * This computes the power estimated by @CPU at @kHz if it is the frequency + * of an existing OPP, or at the frequency of the first OPP above @kHz otherwise + * (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled + * frequency and @mW to the associated power. The power is estimated as + * P = C * V^2 * f with C being the CPU's capacitance and V and f respectively + * the voltage and frequency of the OPP. + * + * Returns -ENODEV if the CPU device cannot be found, -EINVAL if the power + * calculation failed because of missing parameters, 0 otherwise. + */ +static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, + int cpu) +{ + struct device *cpu_dev; + struct dev_pm_opp *opp; + struct device_node *np; + unsigned long mV, Hz; + u32 cap; + u64 tmp; + int ret; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return -ENODEV; + + np = of_node_get(cpu_dev->of_node); + if (!np) + return -EINVAL; + + ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap); + of_node_put(np); + if (ret) + return -EINVAL; + + Hz = *kHz * 1000; + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &Hz); + if (IS_ERR(opp)) + return -EINVAL; + + mV = dev_pm_opp_get_voltage(opp) / 1000; + dev_pm_opp_put(opp); + if (!mV) + return -EINVAL; + + tmp = (u64)cap * mV * mV * (Hz / 1000000); + do_div(tmp, 1000000000); + + *mW = (unsigned long)tmp; + *kHz = Hz / 1000; + + return 0; +} + +/** + * dev_pm_opp_of_register_em() - Attempt to register an Energy Model + * @cpus : CPUs for which an Energy Model has to be registered + * + * This checks whether the "dynamic-power-coefficient" devicetree property has + * been specified, and tries to register an Energy Model with it if it has. + */ +void dev_pm_opp_of_register_em(struct cpumask *cpus) +{ + struct em_data_callback em_cb = EM_DATA_CB(_get_cpu_power); + int ret, nr_opp, cpu = cpumask_first(cpus); + struct device *cpu_dev; + struct device_node *np; + u32 cap; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return; + + nr_opp = dev_pm_opp_get_opp_count(cpu_dev); + if (nr_opp <= 0) + return; + + np = of_node_get(cpu_dev->of_node); + if (!np) + return; + + /* + * Register an EM only if the 'dynamic-power-coefficient' property is + * set in devicetree. It is assumed the voltage values are known if that + * property is set since it is useless otherwise. If voltages are not + * known, just let the EM registration fail with an error to alert the + * user about the inconsistent configuration. + */ + ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap); + of_node_put(np); + if (ret || !cap) + return; + + em_register_perf_domain(cpus, nr_opp, &em_cb); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0a2a88e5a383..1470c57933cf 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -322,6 +322,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); +void dev_pm_opp_of_register_em(struct cpumask *cpus); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -360,6 +361,11 @@ static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) { return NULL; } + +static inline void dev_pm_opp_of_register_em(struct cpumask *cpus) +{ +} + static inline int of_get_required_opp_performance_state(struct device_node *np, int index) { return -ENOTSUPP; -- cgit From 752b5da2359fee342d5264e2c10352daf5b9a199 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 21 Jan 2019 16:45:46 +0100 Subject: phy: dphy: Remove unused header The videomode.h header inclusion is an artifact from the patches development, remove it. Suggested-by: Sakari Ailus Acked-by: Sakari Ailus Signed-off-by: Maxime Ripard Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy-mipi-dphy.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy-mipi-dphy.h b/include/linux/phy/phy-mipi-dphy.h index c08aacc0ac35..9cf97cd1d303 100644 --- a/include/linux/phy/phy-mipi-dphy.h +++ b/include/linux/phy/phy-mipi-dphy.h @@ -6,8 +6,6 @@ #ifndef __PHY_MIPI_DPHY_H_ #define __PHY_MIPI_DPHY_H_ -#include