From 9807a54cd74149988f5d20088bf7a7957c205bfb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 7 Jan 2013 09:54:31 -0800 Subject: linux/openvswitch.h: Make OVSP_LOCAL 32-bit. OVS ports are now 32-bit, so OVSP_LOCAL should be too. (Internally, kernel module still keeps port numbers 16-bit, though.) Signed-off-by: Jarno Rajahalme Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index d42e174bd0c8..99e6414a40d9 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -94,7 +94,7 @@ struct ovs_vport_stats { }; /* Fixed logical ports. */ -#define OVSP_LOCAL ((__u16)0) +#define OVSP_LOCAL ((__u32)0) /* Packet transfer. */ -- cgit From 345046673449b5c35840e5cc34a60059cbec9305 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:00:53 +0000 Subject: slab: Move kmalloc related function defs Move these functions higher up in slab.h so that they are grouped with other generic kmalloc related definitions. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 5d168d7e0a28..ccbb37685c6c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -147,6 +147,15 @@ void kmem_cache_free(struct kmem_cache *, void *); sizeof(struct __struct), __alignof__(struct __struct),\ (__flags), NULL) +/* + * Common kmalloc functions provided by all allocators + */ +void * __must_check __krealloc(const void *, size_t, gfp_t); +void * __must_check krealloc(const void *, size_t, gfp_t); +void kfree(const void *); +void kzfree(const void *); +size_t ksize(const void *); + /* * The largest kmalloc size supported by the slab allocators is * 32 megabyte (2^25) or the maximum allocatable page order if that is @@ -224,15 +233,6 @@ struct seq_file; int cache_show(struct kmem_cache *s, struct seq_file *m); void print_slabinfo_header(struct seq_file *m); -/* - * Common kmalloc functions provided by all allocators - */ -void * __must_check __krealloc(const void *, size_t, gfp_t); -void * __must_check krealloc(const void *, size_t, gfp_t); -void kfree(const void *); -void kzfree(const void *); -size_t ksize(const void *); - /* * Allocator specific definitions. These are mainly used to establish optimized * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by -- cgit From ce6a50263d4ddeba1f0d08f16716a82770c03690 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: slab: Common kmalloc slab index determination Extract the function to determine the index of the slab within the array of kmalloc caches as well as a function to determine maximum object size from the nr of the kmalloc slab. This is used here only to simplify slub bootstrap but will be used later also for SLAB. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 172 +++++++++++++++++++++++++++++++++-------------- include/linux/slub_def.h | 63 ----------------- 2 files changed, 122 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index ccbb37685c6c..c97fe92532d1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -94,29 +94,6 @@ #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) -/* - * Common fields provided in kmem_cache by all slab allocators - * This struct is either used directly by the allocator (SLOB) - * or the allocator must include definitions for all fields - * provided in kmem_cache_common in their definition of kmem_cache. - * - * Once we can do anonymous structs (C11 standard) we could put a - * anonymous struct definition in these allocators so that the - * separate allocations in the kmem_cache structure of SLAB and - * SLUB is no longer needed. - */ -#ifdef CONFIG_SLOB -struct kmem_cache { - unsigned int object_size;/* The original size of the object */ - unsigned int size; /* The aligned/padded/added on size */ - unsigned int align; /* Alignment as calculated */ - unsigned long flags; /* Active flags on the slab */ - const char *name; /* Slab name for sysfs */ - int refcount; /* Use counter */ - void (*ctor)(void *); /* Called on object slot creation */ - struct list_head list; /* List of all slab caches on the system */ -}; -#endif struct mem_cgroup; /* @@ -156,6 +133,35 @@ void kfree(const void *); void kzfree(const void *); size_t ksize(const void *); +#ifdef CONFIG_SLOB +/* + * Common fields provided in kmem_cache by all slab allocators + * This struct is either used directly by the allocator (SLOB) + * or the allocator must include definitions for all fields + * provided in kmem_cache_common in their definition of kmem_cache. + * + * Once we can do anonymous structs (C11 standard) we could put a + * anonymous struct definition in these allocators so that the + * separate allocations in the kmem_cache structure of SLAB and + * SLUB is no longer needed. + */ +struct kmem_cache { + unsigned int object_size;/* The original size of the object */ + unsigned int size; /* The aligned/padded/added on size */ + unsigned int align; /* Alignment as calculated */ + unsigned long flags; /* Active flags on the slab */ + const char *name; /* Slab name for sysfs */ + int refcount; /* Use counter */ + void (*ctor)(void *); /* Called on object slot creation */ + struct list_head list; /* List of all slab caches on the system */ +}; + +#define KMALLOC_MAX_SIZE (1UL << 30) + +#include + +#else /* CONFIG_SLOB */ + /* * The largest kmalloc size supported by the slab allocators is * 32 megabyte (2^25) or the maximum allocatable page order if that is @@ -171,6 +177,99 @@ size_t ksize(const void *); #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) +/* + * Kmalloc subsystem. + */ +#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 +#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN +#else +#ifdef CONFIG_SLAB +#define KMALLOC_MIN_SIZE 32 +#else +#define KMALLOC_MIN_SIZE 8 +#endif +#endif + +#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) + +/* + * Figure out which kmalloc slab an allocation of a certain size + * belongs to. + * 0 = zero alloc + * 1 = 65 .. 96 bytes + * 2 = 120 .. 192 bytes + * n = 2^(n-1) .. 2^n -1 + */ +static __always_inline int kmalloc_index(size_t size) +{ + if (!size) + return 0; + + if (size <= KMALLOC_MIN_SIZE) + return KMALLOC_SHIFT_LOW; + + if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) + return 1; + if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) + return 2; + if (size <= 8) return 3; + if (size <= 16) return 4; + if (size <= 32) return 5; + if (size <= 64) return 6; + if (size <= 128) return 7; + if (size <= 256) return 8; + if (size <= 512) return 9; + if (size <= 1024) return 10; + if (size <= 2 * 1024) return 11; + if (size <= 4 * 1024) return 12; + if (size <= 8 * 1024) return 13; + if (size <= 16 * 1024) return 14; + if (size <= 32 * 1024) return 15; + if (size <= 64 * 1024) return 16; + if (size <= 128 * 1024) return 17; + if (size <= 256 * 1024) return 18; + if (size <= 512 * 1024) return 19; + if (size <= 1024 * 1024) return 20; + if (size <= 2 * 1024 * 1024) return 21; + if (size <= 4 * 1024 * 1024) return 22; + if (size <= 8 * 1024 * 1024) return 23; + if (size <= 16 * 1024 * 1024) return 24; + if (size <= 32 * 1024 * 1024) return 25; + if (size <= 64 * 1024 * 1024) return 26; + BUG(); + + /* Will never be reached. Needed because the compiler may complain */ + return -1; +} + +#ifdef CONFIG_SLAB +#include +#elif defined(CONFIG_SLUB) +#include +#else +#error "Unknown slab allocator" +#endif + +/* + * Determine size used for the nth kmalloc cache. + * return size or 0 if a kmalloc cache for that + * size does not exist + */ +static __always_inline int kmalloc_size(int n) +{ + if (n > 2) + return 1 << n; + + if (n == 1 && KMALLOC_MIN_SIZE <= 32) + return 96; + + if (n == 2 && KMALLOC_MIN_SIZE <= 64) + return 192; + + return 0; +} +#endif /* !CONFIG_SLOB */ + /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. @@ -233,33 +332,6 @@ struct seq_file; int cache_show(struct kmem_cache *s, struct seq_file *m); void print_slabinfo_header(struct seq_file *m); -/* - * Allocator specific definitions. These are mainly used to establish optimized - * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by - * selecting the appropriate general cache at compile time. - * - * Allocators must define at least: - * - * kmem_cache_alloc() - * __kmalloc() - * kmalloc() - * - * Those wishing to support NUMA must also define: - * - * kmem_cache_alloc_node() - * kmalloc_node() - * - * See each allocator definition file for additional comments and - * implementation notes. - */ -#ifdef CONFIG_SLUB -#include -#elif defined(CONFIG_SLOB) -#include -#else -#include -#endif - /** * kmalloc_array - allocate memory for an array. * @n: number of elements. diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9db4825cd393..99c3e05ff1f0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -115,17 +115,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -/* - * Kmalloc subsystem. - */ -#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 -#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN -#else -#define KMALLOC_MIN_SIZE 8 -#endif - -#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) - /* * Maximum kmalloc object size handled by SLUB. Larger object allocations * are passed through to the page allocator. The page allocator "fastpath" @@ -152,58 +141,6 @@ struct kmem_cache { */ extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; -/* - * Sorry that the following has to be that ugly but some versions of GCC - * have trouble with constant propagation and loops. - */ -static __always_inline int kmalloc_index(size_t size) -{ - if (!size) - return 0; - - if (size <= KMALLOC_MIN_SIZE) - return KMALLOC_SHIFT_LOW; - - if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) - return 1; - if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) - return 2; - if (size <= 8) return 3; - if (size <= 16) return 4; - if (size <= 32) return 5; - if (size <= 64) return 6; - if (size <= 128) return 7; - if (size <= 256) return 8; - if (size <= 512) return 9; - if (size <= 1024) return 10; - if (size <= 2 * 1024) return 11; - if (size <= 4 * 1024) return 12; -/* - * The following is only needed to support architectures with a larger page - * size than 4k. We need to support 2 * PAGE_SIZE here. So for a 64k page - * size we would have to go up to 128k. - */ - if (size <= 8 * 1024) return 13; - if (size <= 16 * 1024) return 14; - if (size <= 32 * 1024) return 15; - if (size <= 64 * 1024) return 16; - if (size <= 128 * 1024) return 17; - if (size <= 256 * 1024) return 18; - if (size <= 512 * 1024) return 19; - if (size <= 1024 * 1024) return 20; - if (size <= 2 * 1024 * 1024) return 21; - BUG(); - return -1; /* Will never be reached */ - -/* - * What we really wanted to do and cannot do because of compiler issues is: - * int i; - * for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) - * if (size <= (1 << i)) - * return i; - */ -} - /* * Find the slab cache for a given combination of allocation flags and size. * -- cgit From e33660165c901d18e7d3df2290db070d3e4b46df Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:18 +0000 Subject: slab: Use common kmalloc_index/kmalloc_size functions Make slab use the common functions. We can get rid of a lot of old ugly stuff as a results. Among them the sizes array and the weird include/linux/kmalloc_sizes file and some pretty bad #include statements in slab_def.h. The one thing that is different in slab is that the 32 byte cache will also be created for arches that have page sizes larger than 4K. There are numerous smaller allocations that SLOB and SLUB can handle better because of their support for smaller allocation sizes so lets keep the 32 byte slab also for arches with > 4K pages. Reviewed-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/kmalloc_sizes.h | 45 ----------- include/linux/slab_def.h | 47 +++--------- mm/slab.c | 169 +++++++++++++++++++----------------------- 3 files changed, 88 insertions(+), 173 deletions(-) delete mode 100644 include/linux/kmalloc_sizes.h (limited to 'include/linux') diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h deleted file mode 100644 index e576b848ce10..000000000000 --- a/include/linux/kmalloc_sizes.h +++ /dev/null @@ -1,45 +0,0 @@ -#if (PAGE_SIZE == 4096) - CACHE(32) -#endif - CACHE(64) -#if L1_CACHE_BYTES < 64 - CACHE(96) -#endif - CACHE(128) -#if L1_CACHE_BYTES < 128 - CACHE(192) -#endif - CACHE(256) - CACHE(512) - CACHE(1024) - CACHE(2048) - CACHE(4096) - CACHE(8192) - CACHE(16384) - CACHE(32768) - CACHE(65536) - CACHE(131072) -#if KMALLOC_MAX_SIZE >= 262144 - CACHE(262144) -#endif -#if KMALLOC_MAX_SIZE >= 524288 - CACHE(524288) -#endif -#if KMALLOC_MAX_SIZE >= 1048576 - CACHE(1048576) -#endif -#if KMALLOC_MAX_SIZE >= 2097152 - CACHE(2097152) -#endif -#if KMALLOC_MAX_SIZE >= 4194304 - CACHE(4194304) -#endif -#if KMALLOC_MAX_SIZE >= 8388608 - CACHE(8388608) -#endif -#if KMALLOC_MAX_SIZE >= 16777216 - CACHE(16777216) -#endif -#if KMALLOC_MAX_SIZE >= 33554432 - CACHE(33554432) -#endif diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8bb6e0eaf3c6..e0f30ef9525d 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -11,8 +11,6 @@ */ #include -#include /* kmalloc_sizes.h needs PAGE_SIZE */ -#include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include /* @@ -104,15 +102,8 @@ struct kmem_cache { */ }; -/* Size description struct for general caches. */ -struct cache_sizes { - size_t cs_size; - struct kmem_cache *cs_cachep; -#ifdef CONFIG_ZONE_DMA - struct kmem_cache *cs_dmacachep; -#endif -}; -extern struct cache_sizes malloc_sizes[]; +extern struct kmem_cache *kmalloc_caches[PAGE_SHIFT + MAX_ORDER]; +extern struct kmem_cache *kmalloc_dma_caches[PAGE_SHIFT + MAX_ORDER]; void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); @@ -133,26 +124,19 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) void *ret; if (__builtin_constant_p(size)) { - int i = 0; + int i; if (!size) return ZERO_SIZE_PTR; -#define CACHE(x) \ - if (size <= x) \ - goto found; \ - else \ - i++; -#include -#undef CACHE - return NULL; -found: + i = kmalloc_index(size); + #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - cachep = malloc_sizes[i].cs_dmacachep; + cachep = kmalloc_dma_caches[i]; else #endif - cachep = malloc_sizes[i].cs_cachep; + cachep = kmalloc_caches[i]; ret = kmem_cache_alloc_trace(cachep, flags, size); @@ -186,26 +170,19 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *cachep; if (__builtin_constant_p(size)) { - int i = 0; + int i; if (!size) return ZERO_SIZE_PTR; -#define CACHE(x) \ - if (size <= x) \ - goto found; \ - else \ - i++; -#include -#undef CACHE - return NULL; -found: + i = kmalloc_index(size); + #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - cachep = malloc_sizes[i].cs_dmacachep; + cachep = kmalloc_dma_caches[i]; else #endif - cachep = malloc_sizes[i].cs_cachep; + cachep = kmalloc_caches[i]; return kmem_cache_alloc_node_trace(cachep, flags, node, size); } diff --git a/mm/slab.c b/mm/slab.c index e7667a3584bc..2a7132ec4ff6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -318,34 +318,18 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int len, static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); -/* - * This function must be completely optimized away if a constant is passed to - * it. Mostly the same as what is in linux/slab.h except it returns an index. - */ -static __always_inline int index_of(const size_t size) -{ - extern void __bad_size(void); - - if (__builtin_constant_p(size)) { - int i = 0; +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_caches); -#define CACHE(x) \ - if (size <=x) \ - return i; \ - else \ - i++; -#include -#undef CACHE - __bad_size(); - } else - __bad_size(); - return 0; -} +#ifdef CONFIG_ZONE_DMA +struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_dma_caches); +#endif static int slab_early_init = 1; -#define INDEX_AC index_of(sizeof(struct arraycache_init)) -#define INDEX_L3 index_of(sizeof(struct kmem_list3)) +#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init)) +#define INDEX_L3 kmalloc_index(sizeof(struct kmem_list3)) static void kmem_list3_init(struct kmem_list3 *parent) { @@ -524,30 +508,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache, return reciprocal_divide(offset, cache->reciprocal_buffer_size); } -/* - * These are the default caches for kmalloc. Custom caches can have other sizes. - */ -struct cache_sizes malloc_sizes[] = { -#define CACHE(x) { .cs_size = (x) }, -#include - CACHE(ULONG_MAX) -#undef CACHE -}; -EXPORT_SYMBOL(malloc_sizes); - -/* Must match cache_sizes above. Out of line to keep cache footprint low. */ -struct cache_names { - char *name; - char *name_dma; -}; - -static struct cache_names __initdata cache_names[] = { -#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, -#include - {NULL,} -#undef CACHE -}; - static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; @@ -625,19 +585,23 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) static void init_node_lock_keys(int q) { - struct cache_sizes *s = malloc_sizes; + int i; if (slab_state < UP) return; - for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { + for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { struct kmem_list3 *l3; + struct kmem_cache *cache = kmalloc_caches[i]; + + if (!cache) + continue; - l3 = s->cs_cachep->nodelists[q]; - if (!l3 || OFF_SLAB(s->cs_cachep)) + l3 = cache->nodelists[q]; + if (!l3 || OFF_SLAB(cache)) continue; - slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, + slab_set_lock_classes(cache, &on_slab_l3_key, &on_slab_alc_key, q); } } @@ -705,20 +669,19 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) { - struct cache_sizes *csizep = malloc_sizes; + int i; #if DEBUG /* This happens if someone tries to call * kmem_cache_create(), or __kmalloc(), before * the generic caches are initialized. */ - BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); + BUG_ON(kmalloc_caches[INDEX_AC] == NULL); #endif if (!size) return ZERO_SIZE_PTR; - while (size > csizep->cs_size) - csizep++; + i = kmalloc_index(size); /* * Really subtle: The last entry with cs->cs_size==ULONG_MAX @@ -727,9 +690,9 @@ static inline struct kmem_cache *__find_general_cachep(size_t size, */ #ifdef CONFIG_ZONE_DMA if (unlikely(gfpflags & GFP_DMA)) - return csizep->cs_dmacachep; + return kmalloc_dma_caches[i]; #endif - return csizep->cs_cachep; + return kmalloc_caches[i]; } static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) @@ -1602,8 +1565,6 @@ static void setup_nodelists_pointer(struct kmem_cache *cachep) */ void __init kmem_cache_init(void) { - struct cache_sizes *sizes; - struct cache_names *names; int i; kmem_cache = &kmem_cache_boot; @@ -1657,8 +1618,6 @@ void __init kmem_cache_init(void) list_add(&kmem_cache->list, &slab_caches); /* 2+3) create the kmalloc caches */ - sizes = malloc_sizes; - names = cache_names; /* * Initialize the caches that provide memory for the array cache and the @@ -1666,35 +1625,39 @@ void __init kmem_cache_init(void) * bug. */ - sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name, - sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS); + kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac", + kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS); if (INDEX_AC != INDEX_L3) - sizes[INDEX_L3].cs_cachep = - create_kmalloc_cache(names[INDEX_L3].name, - sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS); + kmalloc_caches[INDEX_L3] = + create_kmalloc_cache("kmalloc-l3", + kmalloc_size(INDEX_L3), ARCH_KMALLOC_FLAGS); slab_early_init = 0; - while (sizes->cs_size != ULONG_MAX) { - /* - * For performance, all the general caches are L1 aligned. - * This should be particularly beneficial on SMP boxes, as it - * eliminates "false sharing". - * Note for systems short on memory removing the alignment will - * allow tighter packing of the smaller caches. - */ - if (!sizes->cs_cachep) - sizes->cs_cachep = create_kmalloc_cache(names->name, - sizes->cs_size, ARCH_KMALLOC_FLAGS); + for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { + size_t cs_size = kmalloc_size(i); + + if (cs_size < KMALLOC_MIN_SIZE) + continue; + + if (!kmalloc_caches[i]) { + /* + * For performance, all the general caches are L1 aligned. + * This should be particularly beneficial on SMP boxes, as it + * eliminates "false sharing". + * Note for systems short on memory removing the alignment will + * allow tighter packing of the smaller caches. + */ + kmalloc_caches[i] = create_kmalloc_cache("kmalloc", + cs_size, ARCH_KMALLOC_FLAGS); + } #ifdef CONFIG_ZONE_DMA - sizes->cs_dmacachep = create_kmalloc_cache( - names->name_dma, sizes->cs_size, + kmalloc_dma_caches[i] = create_kmalloc_cache( + "kmalloc-dma", cs_size, SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS); #endif - sizes++; - names++; } /* 4) Replace the bootstrap head arrays */ { @@ -1713,17 +1676,16 @@ void __init kmem_cache_init(void) ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) + BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC]) != &initarray_generic.cache); - memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), + memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]), sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = - ptr; + kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr; } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1732,17 +1694,39 @@ void __init kmem_cache_init(void) for_each_online_node(nid) { init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid); - init_list(malloc_sizes[INDEX_AC].cs_cachep, + init_list(kmalloc_caches[INDEX_AC], &initkmem_list3[SIZE_AC + nid], nid); if (INDEX_AC != INDEX_L3) { - init_list(malloc_sizes[INDEX_L3].cs_cachep, + init_list(kmalloc_caches[INDEX_L3], &initkmem_list3[SIZE_L3 + nid], nid); } } } slab_state = UP; + + /* Create the proper names */ + for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { + char *s; + struct kmem_cache *c = kmalloc_caches[i]; + + if (!c) + continue; + + s = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); + + BUG_ON(!s); + c->name = s; + +#ifdef CONFIG_ZONE_DMA + c = kmalloc_dma_caches[i]; + BUG_ON(!c); + s = kasprintf(GFP_NOWAIT, "dma-kmalloc-%d", kmalloc_size(i)); + BUG_ON(!s); + c->name = s; +#endif + } } void __init kmem_cache_init_late(void) @@ -2428,10 +2412,9 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= malloc_sizes[INDEX_L3 + 1].cs_size - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { - cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); + if (size >= kmalloc_size(INDEX_L3 + 1) + && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { + cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); size = PAGE_SIZE; } #endif -- cgit From 6744f087ba2a49f6d6935d9daa0b20a0f03567b5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:12:17 +0000 Subject: slab: Common name for the per node structures Rename the structure used for the per node structures in slab to have a name that expresses that fact. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 2 +- mm/slab.c | 87 ++++++++++++++++++++++++------------------------ 2 files changed, 44 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index e0f30ef9525d..8b5b2f6b36d3 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -95,7 +95,7 @@ struct kmem_cache { * pointer for each node since "nodelists" uses the remainder of * available pointers. */ - struct kmem_list3 **nodelists; + struct kmem_cache_node **nodelists; struct array_cache *array[NR_CPUS + MAX_NUMNODES]; /* * Do not add fields after array[] diff --git a/mm/slab.c b/mm/slab.c index 2a7132ec4ff6..7c0da4c86973 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -288,7 +288,7 @@ struct arraycache_init { /* * The slab lists for all objects. */ -struct kmem_list3 { +struct kmem_cache_node { struct list_head slabs_partial; /* partial list first, better asm code */ struct list_head slabs_full; struct list_head slabs_free; @@ -306,13 +306,13 @@ struct kmem_list3 { * Need this for bootstrapping a per node allocator. */ #define NUM_INIT_LISTS (3 * MAX_NUMNODES) -static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; +static struct kmem_cache_node __initdata initkmem_list3[NUM_INIT_LISTS]; #define CACHE_CACHE 0 #define SIZE_AC MAX_NUMNODES #define SIZE_L3 (2 * MAX_NUMNODES) static int drain_freelist(struct kmem_cache *cache, - struct kmem_list3 *l3, int tofree); + struct kmem_cache_node *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); @@ -329,9 +329,9 @@ EXPORT_SYMBOL(kmalloc_dma_caches); static int slab_early_init = 1; #define INDEX_AC kmalloc_index(sizeof(struct arraycache_init)) -#define INDEX_L3 kmalloc_index(sizeof(struct kmem_list3)) +#define INDEX_L3 kmalloc_index(sizeof(struct kmem_cache_node)) -static void kmem_list3_init(struct kmem_list3 *parent) +static void kmem_list3_init(struct kmem_cache_node *parent) { INIT_LIST_HEAD(&parent->slabs_full); INIT_LIST_HEAD(&parent->slabs_partial); @@ -546,7 +546,7 @@ static void slab_set_lock_classes(struct kmem_cache *cachep, int q) { struct array_cache **alc; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int r; l3 = cachep->nodelists[q]; @@ -591,7 +591,7 @@ static void init_node_lock_keys(int q) return; for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct kmem_cache *cache = kmalloc_caches[i]; if (!cache) @@ -608,9 +608,8 @@ static void init_node_lock_keys(int q) static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) { - struct kmem_list3 *l3; - l3 = cachep->nodelists[q]; - if (!l3) + + if (!cachep->nodelists[q]) return; slab_set_lock_classes(cachep, &on_slab_l3_key, @@ -901,7 +900,7 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp) static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { - struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; + struct kmem_cache_node *l3 = cachep->nodelists[numa_mem_id()]; struct slab *slabp; unsigned long flags; @@ -934,7 +933,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ if (unlikely(is_obj_pfmemalloc(objp))) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; if (gfp_pfmemalloc_allowed(flags)) { clear_obj_pfmemalloc(&objp); @@ -1106,7 +1105,7 @@ static void free_alien_cache(struct array_cache **ac_ptr) static void __drain_alien_cache(struct kmem_cache *cachep, struct array_cache *ac, int node) { - struct kmem_list3 *rl3 = cachep->nodelists[node]; + struct kmem_cache_node *rl3 = cachep->nodelists[node]; if (ac->avail) { spin_lock(&rl3->list_lock); @@ -1127,7 +1126,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, /* * Called from cache_reap() to regularly drain alien caches round robin. */ -static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) +static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *l3) { int node = __this_cpu_read(slab_reap_node); @@ -1162,7 +1161,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) { struct slab *slabp = virt_to_slab(objp); int nodeid = slabp->nodeid; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct array_cache *alien = NULL; int node; @@ -1207,8 +1206,8 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) static int init_cache_nodelists_node(int node) { struct kmem_cache *cachep; - struct kmem_list3 *l3; - const int memsize = sizeof(struct kmem_list3); + struct kmem_cache_node *l3; + const int memsize = sizeof(struct kmem_cache_node); list_for_each_entry(cachep, &slab_caches, list) { /* @@ -1244,7 +1243,7 @@ static int init_cache_nodelists_node(int node) static void __cpuinit cpuup_canceled(long cpu) { struct kmem_cache *cachep; - struct kmem_list3 *l3 = NULL; + struct kmem_cache_node *l3 = NULL; int node = cpu_to_mem(cpu); const struct cpumask *mask = cpumask_of_node(node); @@ -1309,7 +1308,7 @@ free_array_cache: static int __cpuinit cpuup_prepare(long cpu) { struct kmem_cache *cachep; - struct kmem_list3 *l3 = NULL; + struct kmem_cache_node *l3 = NULL; int node = cpu_to_mem(cpu); int err; @@ -1463,7 +1462,7 @@ static int __meminit drain_cache_nodelists_node(int node) int ret = 0; list_for_each_entry(cachep, &slab_caches, list) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; l3 = cachep->nodelists[node]; if (!l3) @@ -1516,15 +1515,15 @@ out: /* * swap the static kmem_list3 with kmalloced memory */ -static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list, +static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list, int nodeid) { - struct kmem_list3 *ptr; + struct kmem_cache_node *ptr; - ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); + ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid); BUG_ON(!ptr); - memcpy(ptr, list, sizeof(struct kmem_list3)); + memcpy(ptr, list, sizeof(struct kmem_cache_node)); /* * Do not assume that spinlocks can be initialized via memcpy: */ @@ -1556,7 +1555,7 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) */ static void setup_nodelists_pointer(struct kmem_cache *cachep) { - cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; + cachep->nodelists = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids]; } /* @@ -1613,7 +1612,7 @@ void __init kmem_cache_init(void) */ create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, array[nr_cpu_ids]) + - nr_node_ids * sizeof(struct kmem_list3 *), + nr_node_ids * sizeof(struct kmem_cache_node *), SLAB_HWCACHE_ALIGN); list_add(&kmem_cache->list, &slab_caches); @@ -1787,7 +1786,7 @@ __initcall(cpucache_init); static noinline void slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct slab *slabp; unsigned long flags; int node; @@ -2279,7 +2278,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) int node; for_each_online_node(node) { cachep->nodelists[node] = - kmalloc_node(sizeof(struct kmem_list3), + kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); BUG_ON(!cachep->nodelists[node]); kmem_list3_init(cachep->nodelists[node]); @@ -2547,7 +2546,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) #define check_spinlock_acquired_node(x, y) do { } while(0) #endif -static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, +static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, struct array_cache *ac, int force, int node); @@ -2567,7 +2566,7 @@ static void do_drain(void *arg) static void drain_cpu_caches(struct kmem_cache *cachep) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int node; on_each_cpu(do_drain, cachep, 1); @@ -2592,7 +2591,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep) * Returns the actual number of slabs released. */ static int drain_freelist(struct kmem_cache *cache, - struct kmem_list3 *l3, int tofree) + struct kmem_cache_node *l3, int tofree) { struct list_head *p; int nr_freed; @@ -2630,7 +2629,7 @@ out: static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; drain_cpu_caches(cachep); @@ -2672,7 +2671,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); int __kmem_cache_shutdown(struct kmem_cache *cachep) { int i; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int rc = __cache_shrink(cachep); if (rc) @@ -2869,7 +2868,7 @@ static int cache_grow(struct kmem_cache *cachep, struct slab *slabp; size_t offset; gfp_t local_flags; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; /* * Be lazy and only check for valid flags here, keeping it out of the @@ -3059,7 +3058,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, bool force_refill) { int batchcount; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct array_cache *ac; int node; @@ -3391,7 +3390,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, { struct list_head *entry; struct slab *slabp; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; void *obj; int x; @@ -3586,7 +3585,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, int node) { int i; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; for (i = 0; i < nr_objects; i++) { void *objp; @@ -3632,7 +3631,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) { int batchcount; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int node = numa_mem_id(); batchcount = ac->batchcount; @@ -3924,7 +3923,7 @@ EXPORT_SYMBOL(kfree); static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) { int node; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct array_cache *new_shared; struct array_cache **new_alien = NULL; @@ -3969,7 +3968,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) free_alien_cache(new_alien); continue; } - l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); + l3 = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); if (!l3) { free_alien_cache(new_alien); kfree(new_shared); @@ -4165,7 +4164,7 @@ skip_setup: * necessary. Note that the l3 listlock also protects the array_cache * if drain_array() is used on the shared array. */ -static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, +static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, struct array_cache *ac, int force, int node) { int tofree; @@ -4204,7 +4203,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, static void cache_reap(struct work_struct *w) { struct kmem_cache *searchp; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int node = numa_mem_id(); struct delayed_work *work = to_delayed_work(w); @@ -4268,7 +4267,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) const char *name; char *error = NULL; int node; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; active_objs = 0; num_slabs = 0; @@ -4482,7 +4481,7 @@ static int leaks_show(struct seq_file *m, void *p) { struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); struct slab *slabp; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; const char *name; unsigned long *n = m->private; int node; -- cgit From 6a67368c36e2c0c2578ba62f6264ab739af08cce Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: slab: Rename nodelists to node Have a common naming between both slab caches for future changes. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 2 +- mm/slab.c | 135 +++++++++++++++++++++++------------------------ 2 files changed, 68 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8b5b2f6b36d3..4ff50e8d1a2c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -95,7 +95,7 @@ struct kmem_cache { * pointer for each node since "nodelists" uses the remainder of * available pointers. */ - struct kmem_cache_node **nodelists; + struct kmem_cache_node **node; struct array_cache *array[NR_CPUS + MAX_NUMNODES]; /* * Do not add fields after array[] diff --git a/mm/slab.c b/mm/slab.c index 7c0da4c86973..3416f4c544b3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -347,7 +347,7 @@ static void kmem_list3_init(struct kmem_cache_node *parent) #define MAKE_LIST(cachep, listp, slab, nodeid) \ do { \ INIT_LIST_HEAD(listp); \ - list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ + list_splice(&(cachep->node[nodeid]->slab), listp); \ } while (0) #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ @@ -549,7 +549,7 @@ static void slab_set_lock_classes(struct kmem_cache *cachep, struct kmem_cache_node *l3; int r; - l3 = cachep->nodelists[q]; + l3 = cachep->node[q]; if (!l3) return; @@ -597,7 +597,7 @@ static void init_node_lock_keys(int q) if (!cache) continue; - l3 = cache->nodelists[q]; + l3 = cache->node[q]; if (!l3 || OFF_SLAB(cache)) continue; @@ -608,8 +608,7 @@ static void init_node_lock_keys(int q) static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) { - - if (!cachep->nodelists[q]) + if (!cachep->node[q]) return; slab_set_lock_classes(cachep, &on_slab_l3_key, @@ -900,7 +899,7 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp) static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { - struct kmem_cache_node *l3 = cachep->nodelists[numa_mem_id()]; + struct kmem_cache_node *l3 = cachep->node[numa_mem_id()]; struct slab *slabp; unsigned long flags; @@ -955,7 +954,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, * If there are empty slabs on the slabs_free list and we are * being forced to refill the cache, mark this one !pfmemalloc. */ - l3 = cachep->nodelists[numa_mem_id()]; + l3 = cachep->node[numa_mem_id()]; if (!list_empty(&l3->slabs_free) && force_refill) { struct slab *slabp = virt_to_slab(objp); ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); @@ -1105,7 +1104,7 @@ static void free_alien_cache(struct array_cache **ac_ptr) static void __drain_alien_cache(struct kmem_cache *cachep, struct array_cache *ac, int node) { - struct kmem_cache_node *rl3 = cachep->nodelists[node]; + struct kmem_cache_node *rl3 = cachep->node[node]; if (ac->avail) { spin_lock(&rl3->list_lock); @@ -1174,7 +1173,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) if (likely(slabp->nodeid == node)) return 0; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; STATS_INC_NODEFREES(cachep); if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; @@ -1186,24 +1185,24 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) ac_put_obj(cachep, alien, objp); spin_unlock(&alien->lock); } else { - spin_lock(&(cachep->nodelists[nodeid])->list_lock); + spin_lock(&(cachep->node[nodeid])->list_lock); free_block(cachep, &objp, 1, nodeid); - spin_unlock(&(cachep->nodelists[nodeid])->list_lock); + spin_unlock(&(cachep->node[nodeid])->list_lock); } return 1; } #endif /* - * Allocates and initializes nodelists for a node on each slab cache, used for + * Allocates and initializes node for a node on each slab cache, used for * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 * will be allocated off-node since memory is not yet online for the new node. - * When hotplugging memory or a cpu, existing nodelists are not replaced if + * When hotplugging memory or a cpu, existing node are not replaced if * already in use. * * Must hold slab_mutex. */ -static int init_cache_nodelists_node(int node) +static int init_cache_node_node(int node) { struct kmem_cache *cachep; struct kmem_cache_node *l3; @@ -1215,7 +1214,7 @@ static int init_cache_nodelists_node(int node) * begin anything. Make sure some other cpu on this * node has not already allocated this */ - if (!cachep->nodelists[node]) { + if (!cachep->node[node]) { l3 = kmalloc_node(memsize, GFP_KERNEL, node); if (!l3) return -ENOMEM; @@ -1228,14 +1227,14 @@ static int init_cache_nodelists_node(int node) * go. slab_mutex is sufficient * protection here. */ - cachep->nodelists[node] = l3; + cachep->node[node] = l3; } - spin_lock_irq(&cachep->nodelists[node]->list_lock); - cachep->nodelists[node]->free_limit = + spin_lock_irq(&cachep->node[node]->list_lock); + cachep->node[node]->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->nodelists[node]->list_lock); + spin_unlock_irq(&cachep->node[node]->list_lock); } return 0; } @@ -1255,7 +1254,7 @@ static void __cpuinit cpuup_canceled(long cpu) /* cpu is dead; no one can alloc from it. */ nc = cachep->array[cpu]; cachep->array[cpu] = NULL; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) goto free_array_cache; @@ -1298,7 +1297,7 @@ free_array_cache: * shrink each nodelist to its limit. */ list_for_each_entry(cachep, &slab_caches, list) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; drain_freelist(cachep, l3, l3->free_objects); @@ -1318,7 +1317,7 @@ static int __cpuinit cpuup_prepare(long cpu) * kmalloc_node allows us to add the slab to the right * kmem_list3 and not this cpu's kmem_list3 */ - err = init_cache_nodelists_node(node); + err = init_cache_node_node(node); if (err < 0) goto bad; @@ -1353,7 +1352,7 @@ static int __cpuinit cpuup_prepare(long cpu) } } cachep->array[cpu] = nc; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; BUG_ON(!l3); spin_lock_irq(&l3->list_lock); @@ -1456,7 +1455,7 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { * * Must hold slab_mutex. */ -static int __meminit drain_cache_nodelists_node(int node) +static int __meminit drain_cache_node_node(int node) { struct kmem_cache *cachep; int ret = 0; @@ -1464,7 +1463,7 @@ static int __meminit drain_cache_nodelists_node(int node) list_for_each_entry(cachep, &slab_caches, list) { struct kmem_cache_node *l3; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; @@ -1493,12 +1492,12 @@ static int __meminit slab_memory_callback(struct notifier_block *self, switch (action) { case MEM_GOING_ONLINE: mutex_lock(&slab_mutex); - ret = init_cache_nodelists_node(nid); + ret = init_cache_node_node(nid); mutex_unlock(&slab_mutex); break; case MEM_GOING_OFFLINE: mutex_lock(&slab_mutex); - ret = drain_cache_nodelists_node(nid); + ret = drain_cache_node_node(nid); mutex_unlock(&slab_mutex); break; case MEM_ONLINE: @@ -1530,7 +1529,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node * spin_lock_init(&ptr->list_lock); MAKE_ALL_LISTS(cachep, ptr, nodeid); - cachep->nodelists[nodeid] = ptr; + cachep->node[nodeid] = ptr; } /* @@ -1542,8 +1541,8 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) int node; for_each_online_node(node) { - cachep->nodelists[node] = &initkmem_list3[index + node]; - cachep->nodelists[node]->next_reap = jiffies + + cachep->node[node] = &initkmem_list3[index + node]; + cachep->node[node]->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; } @@ -1551,11 +1550,11 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) /* * The memory after the last cpu cache pointer is used for the - * the nodelists pointer. + * the node pointer. */ -static void setup_nodelists_pointer(struct kmem_cache *cachep) +static void setup_node_pointer(struct kmem_cache *cachep) { - cachep->nodelists = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids]; + cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids]; } /* @@ -1567,7 +1566,7 @@ void __init kmem_cache_init(void) int i; kmem_cache = &kmem_cache_boot; - setup_nodelists_pointer(kmem_cache); + setup_node_pointer(kmem_cache); if (num_possible_nodes() == 1) use_alien_caches = 0; @@ -1756,7 +1755,7 @@ void __init kmem_cache_init_late(void) #ifdef CONFIG_NUMA /* * Register a memory hotplug callback that initializes and frees - * nodelists. + * node. */ hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); #endif @@ -1801,7 +1800,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) unsigned long active_objs = 0, num_objs = 0, free_objects = 0; unsigned long active_slabs = 0, num_slabs = 0; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; @@ -2277,15 +2276,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) } else { int node; for_each_online_node(node) { - cachep->nodelists[node] = + cachep->node[node] = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); - BUG_ON(!cachep->nodelists[node]); - kmem_list3_init(cachep->nodelists[node]); + BUG_ON(!cachep->node[node]); + kmem_list3_init(cachep->node[node]); } } } - cachep->nodelists[numa_mem_id()]->next_reap = + cachep->node[numa_mem_id()]->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; @@ -2388,7 +2387,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) else gfp = GFP_NOWAIT; - setup_nodelists_pointer(cachep); + setup_node_pointer(cachep); #if DEBUG /* @@ -2527,7 +2526,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock); + assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock); #endif } @@ -2535,7 +2534,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&cachep->nodelists[node]->list_lock); + assert_spin_locked(&cachep->node[node]->list_lock); #endif } @@ -2558,9 +2557,9 @@ static void do_drain(void *arg) check_irq_off(); ac = cpu_cache_get(cachep); - spin_lock(&cachep->nodelists[node]->list_lock); + spin_lock(&cachep->node[node]->list_lock); free_block(cachep, ac->entry, ac->avail, node); - spin_unlock(&cachep->nodelists[node]->list_lock); + spin_unlock(&cachep->node[node]->list_lock); ac->avail = 0; } @@ -2572,13 +2571,13 @@ static void drain_cpu_caches(struct kmem_cache *cachep) on_each_cpu(do_drain, cachep, 1); check_irq_on(); for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (l3 && l3->alien) drain_alien_cache(cachep, l3->alien); } for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (l3) drain_array(cachep, l3, l3->shared, 1, node); } @@ -2635,7 +2634,7 @@ static int __cache_shrink(struct kmem_cache *cachep) check_irq_on(); for_each_online_node(i) { - l3 = cachep->nodelists[i]; + l3 = cachep->node[i]; if (!l3) continue; @@ -2682,7 +2681,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) /* NUMA: free the list3 structures */ for_each_online_node(i) { - l3 = cachep->nodelists[i]; + l3 = cachep->node[i]; if (l3) { kfree(l3->shared); free_alien_cache(l3->alien); @@ -2879,7 +2878,7 @@ static int cache_grow(struct kmem_cache *cachep, /* Take the l3 list lock to change the colour_next on this node */ check_irq_off(); - l3 = cachep->nodelists[nodeid]; + l3 = cachep->node[nodeid]; spin_lock(&l3->list_lock); /* Get colour for the slab, and cal the next value. */ @@ -3077,7 +3076,7 @@ retry: */ batchcount = BATCHREFILL_LIMIT; } - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; BUG_ON(ac->avail > 0 || !l3); spin_lock(&l3->list_lock); @@ -3299,7 +3298,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) /* * Fallback function if there was no memory available and no objects on a * certain node and fall back is permitted. First we scan all the - * available nodelists for available objects. If that fails then we + * available node for available objects. If that fails then we * perform an allocation without specifying a node. This allows the page * allocator to do its reclaim / fallback magic. We then insert the * slab into the proper nodelist and then allocate from it. @@ -3333,8 +3332,8 @@ retry: nid = zone_to_nid(zone); if (cpuset_zone_allowed_hardwall(zone, flags) && - cache->nodelists[nid] && - cache->nodelists[nid]->free_objects) { + cache->node[nid] && + cache->node[nid]->free_objects) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); if (obj) @@ -3394,7 +3393,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, void *obj; int x; - l3 = cachep->nodelists[nodeid]; + l3 = cachep->node[nodeid]; BUG_ON(!l3); retry: @@ -3479,7 +3478,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, if (nodeid == NUMA_NO_NODE) nodeid = slab_node; - if (unlikely(!cachep->nodelists[nodeid])) { + if (unlikely(!cachep->node[nodeid])) { /* Node not bootstrapped yet */ ptr = fallback_alloc(cachep, flags); goto out; @@ -3595,7 +3594,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, objp = objpp[i]; slabp = virt_to_slab(objp); - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; list_del(&slabp->list); check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); @@ -3639,7 +3638,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; spin_lock(&l3->list_lock); if (l3->shared) { struct array_cache *shared_array = l3->shared; @@ -3946,7 +3945,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) } } - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (l3) { struct array_cache *shared = l3->shared; @@ -3982,7 +3981,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) l3->alien = new_alien; l3->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - cachep->nodelists[node] = l3; + cachep->node[node] = l3; } return 0; @@ -3991,13 +3990,13 @@ fail: /* Cache is not active yet. Roll back what we did */ node--; while (node >= 0) { - if (cachep->nodelists[node]) { - l3 = cachep->nodelists[node]; + if (cachep->node[node]) { + l3 = cachep->node[node]; kfree(l3->shared); free_alien_cache(l3->alien); kfree(l3); - cachep->nodelists[node] = NULL; + cachep->node[node] = NULL; } node--; } @@ -4057,9 +4056,9 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, struct array_cache *ccold = new->new[i]; if (!ccold) continue; - spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); - spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); kfree(ccold); } kfree(new); @@ -4219,7 +4218,7 @@ static void cache_reap(struct work_struct *w) * have established with reasonable certainty that * we can do some work if the lock was obtained. */ - l3 = searchp->nodelists[node]; + l3 = searchp->node[node]; reap_alien(searchp, l3); @@ -4272,7 +4271,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) active_objs = 0; num_slabs = 0; for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; @@ -4497,7 +4496,7 @@ static int leaks_show(struct seq_file *m, void *p) n[1] = 0; for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; -- cgit From 95a05b428cc675694321c8f762591984f3fd2b1e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: slab: Common constants for kmalloc boundaries Standardize the constants that describe the smallest and largest object kept in the kmalloc arrays for SLAB and SLUB. Differentiate between the maximum size for which a slab cache is used (KMALLOC_MAX_CACHE_SIZE) and the maximum allocatable size (KMALLOC_MAX_SIZE, KMALLOC_MAX_ORDER). Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 34 ++++++++++++++++++++++++---------- include/linux/slub_def.h | 19 +++---------------- mm/slub.c | 22 +++++++++++----------- 3 files changed, 38 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index c97fe92532d1..c01780540054 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -163,7 +163,12 @@ struct kmem_cache { #else /* CONFIG_SLOB */ /* - * The largest kmalloc size supported by the slab allocators is + * Kmalloc array related definitions + */ + +#ifdef CONFIG_SLAB +/* + * The largest kmalloc size supported by the SLAB allocators is * 32 megabyte (2^25) or the maximum allocatable page order if that is * less than 32 MB. * @@ -173,9 +178,24 @@ struct kmem_cache { */ #define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ (MAX_ORDER + PAGE_SHIFT - 1) : 25) +#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH +#define KMALLOC_SHIFT_LOW 5 +#else +/* + * SLUB allocates up to order 2 pages directly and otherwise + * passes the request to the page allocator. + */ +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_LOW 3 +#endif -#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) -#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) +/* Maximum allocatable size */ +#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) +/* Maximum size for which we actually use a slab cache */ +#define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) +/* Maximum order allocatable via the slab allocagtor */ +#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) /* * Kmalloc subsystem. @@ -183,15 +203,9 @@ struct kmem_cache { #if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN #else -#ifdef CONFIG_SLAB -#define KMALLOC_MIN_SIZE 32 -#else -#define KMALLOC_MIN_SIZE 8 -#endif +#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif -#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) - /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 99c3e05ff1f0..032028ef9a34 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -115,19 +115,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -/* - * Maximum kmalloc object size handled by SLUB. Larger object allocations - * are passed through to the page allocator. The page allocator "fastpath" - * is relatively slow so we need this value sufficiently high so that - * performance critical objects are allocated through the SLUB fastpath. - * - * This should be dropped to PAGE_SIZE / 2 once the page allocator - * "fastpath" becomes competitive with the slab allocator fastpaths. - */ -#define SLUB_MAX_SIZE (2 * PAGE_SIZE) - -#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2) - #ifdef CONFIG_ZONE_DMA #define SLUB_DMA __GFP_DMA #else @@ -139,7 +126,7 @@ struct kmem_cache { * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; +extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; /* * Find the slab cache for a given combination of allocation flags and size. @@ -211,7 +198,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { - if (size > SLUB_MAX_SIZE) + if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); if (!(flags & SLUB_DMA)) { @@ -247,7 +234,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && - size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { + size <= KMALLOC_MAX_CACHE_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); if (!s) diff --git a/mm/slub.c b/mm/slub.c index ba2ca53f6c3a..d0f72ee06310 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2775,7 +2775,7 @@ init_kmem_cache_node(struct kmem_cache_node *n) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < - SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); + KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu)); /* * Must align to double word boundary for the double cmpxchg @@ -3174,11 +3174,11 @@ int __kmem_cache_shutdown(struct kmem_cache *s) * Kmalloc subsystem *******************************************************************/ -struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; EXPORT_SYMBOL(kmalloc_caches); #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; +static struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; #endif static int __init setup_slub_min_order(char *str) @@ -3280,7 +3280,7 @@ void *__kmalloc(size_t size, gfp_t flags) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -3316,7 +3316,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = kmalloc_large_node(size, flags, node); trace_kmalloc_node(_RET_IP_, ret, @@ -3721,7 +3721,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); caches++; } @@ -3739,7 +3739,7 @@ void __init kmem_cache_init(void) BUG_ON(!kmalloc_caches[2]->name); } - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); BUG_ON(!s); @@ -3751,7 +3751,7 @@ void __init kmem_cache_init(void) #endif #ifdef CONFIG_ZONE_DMA - for (i = 0; i < SLUB_PAGE_SHIFT; i++) { + for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { struct kmem_cache *s = kmalloc_caches[i]; if (s && s->size) { @@ -3930,7 +3930,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3953,7 +3953,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = kmalloc_large_node(size, gfpflags, node); trace_kmalloc_node(caller, ret, @@ -4312,7 +4312,7 @@ static void resiliency_test(void) { u8 *p; - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); printk(KERN_ERR "SLUB resiliency testing\n"); printk(KERN_ERR "-----------------------\n"); -- cgit From 9425c58e5445277699ff3c2a87bac1cfebc1b48d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:12:17 +0000 Subject: slab: Common definition for the array of kmalloc caches Have a common definition fo the kmalloc cache arrays in SLAB and SLUB Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 5 +++++ include/linux/slab_def.h | 3 --- include/linux/slub_def.h | 6 ------ mm/slab.c | 8 -------- mm/slab_common.c | 8 ++++++++ mm/slub.c | 7 ------- 6 files changed, 13 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index c01780540054..f2327a898a85 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -206,6 +206,11 @@ struct kmem_cache { #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif +extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +#ifdef CONFIG_ZONE_DMA +extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; +#endif + /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 4ff50e8d1a2c..113ec080313f 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -102,9 +102,6 @@ struct kmem_cache { */ }; -extern struct kmem_cache *kmalloc_caches[PAGE_SHIFT + MAX_ORDER]; -extern struct kmem_cache *kmalloc_dma_caches[PAGE_SHIFT + MAX_ORDER]; - void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 032028ef9a34..3701896f7f8a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -122,12 +122,6 @@ struct kmem_cache { #define SLUB_DMA (__force gfp_t)0 #endif -/* - * We keep the general caches in an array of slab caches that are used for - * 2^x bytes of allocations. - */ -extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; - /* * Find the slab cache for a given combination of allocation flags and size. * diff --git a/mm/slab.c b/mm/slab.c index 3416f4c544b3..357f0bdc5e43 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -318,14 +318,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int len, static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); -struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_caches); - -#ifdef CONFIG_ZONE_DMA -struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_dma_caches); -#endif - static int slab_early_init = 1; #define INDEX_AC kmalloc_index(sizeof(struct arraycache_init)) diff --git a/mm/slab_common.c b/mm/slab_common.c index 53adfbf2f3b2..0437b8189b8a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -319,6 +319,14 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, return s; } +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_caches); + +#ifdef CONFIG_ZONE_DMA +struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_dma_caches); +#endif + #endif /* !CONFIG_SLOB */ diff --git a/mm/slub.c b/mm/slub.c index d0f72ee06310..527cbfb5c49b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3174,13 +3174,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s) * Kmalloc subsystem *******************************************************************/ -struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_caches); - -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; -#endif - static int __init setup_slub_min_order(char *str) { get_option(&str, &slub_min_order); -- cgit From 2c59dd6544212faa5ce761920d2251f4152f408d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: slab: Common Kmalloc cache determination Extract the optimized lookup functions from slub and put them into slab_common.c. Then make slab use these functions as well. Joonsoo notes that this fixes some issues with constant folding which also reduces the code size for slub. https://lkml.org/lkml/2012/10/20/82 Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 41 +++++------------- mm/slab.c | 40 ++---------------- mm/slab.h | 3 ++ mm/slab_common.c | 105 ++++++++++++++++++++++++++++++++++++++++++++- mm/slub.c | 108 +++-------------------------------------------- 5 files changed, 124 insertions(+), 173 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 3701896f7f8a..16341e5316de 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -115,29 +115,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -#ifdef CONFIG_ZONE_DMA -#define SLUB_DMA __GFP_DMA -#else -/* Disable DMA functionality */ -#define SLUB_DMA (__force gfp_t)0 -#endif - -/* - * Find the slab cache for a given combination of allocation flags and size. - * - * This ought to end up with a global pointer to the right cache - * in kmalloc_caches. - */ -static __always_inline struct kmem_cache *kmalloc_slab(size_t size) -{ - int index = kmalloc_index(size); - - if (index == 0) - return NULL; - - return kmalloc_caches[index]; -} - void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); @@ -195,13 +172,14 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); - if (!(flags & SLUB_DMA)) { - struct kmem_cache *s = kmalloc_slab(size); + if (!(flags & GFP_DMA)) { + int index = kmalloc_index(size); - if (!s) + if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_trace(s, flags, size); + return kmem_cache_alloc_trace(kmalloc_caches[index], + flags, size); } } return __kmalloc(size, flags); @@ -228,13 +206,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE && !(flags & SLUB_DMA)) { - struct kmem_cache *s = kmalloc_slab(size); + size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { + int index = kmalloc_index(size); - if (!s) + if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node_trace(s, flags, node, size); + return kmem_cache_alloc_node_trace(kmalloc_caches[index], + flags, node, size); } return __kmalloc_node(size, flags, node); } diff --git a/mm/slab.c b/mm/slab.c index 08ba44f81a28..62629b11df38 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -656,40 +656,6 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) return cachep->array[smp_processor_id()]; } -static inline struct kmem_cache *__find_general_cachep(size_t size, - gfp_t gfpflags) -{ - int i; - -#if DEBUG - /* This happens if someone tries to call - * kmem_cache_create(), or __kmalloc(), before - * the generic caches are initialized. - */ - BUG_ON(kmalloc_caches[INDEX_AC] == NULL); -#endif - if (!size) - return ZERO_SIZE_PTR; - - i = kmalloc_index(size); - - /* - * Really subtle: The last entry with cs->cs_size==ULONG_MAX - * has cs_{dma,}cachep==NULL. Thus no special case - * for large kmalloc calls required. - */ -#ifdef CONFIG_ZONE_DMA - if (unlikely(gfpflags & GFP_DMA)) - return kmalloc_dma_caches[i]; -#endif - return kmalloc_caches[i]; -} - -static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) -{ - return __find_general_cachep(size, gfpflags); -} - static size_t slab_mgmt_size(size_t nr_objs, size_t align) { return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); @@ -2426,7 +2392,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) cachep->reciprocal_buffer_size = reciprocal_value(size); if (flags & CFLGS_OFF_SLAB) { - cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); + cachep->slabp_cache = kmalloc_slab(slab_size, 0u); /* * This is a possibility for one of the malloc_sizes caches. * But since we go off slab only for object size greater than @@ -3729,7 +3695,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) { struct kmem_cache *cachep; - cachep = kmem_find_general_cachep(size, flags); + cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; return kmem_cache_alloc_node_trace(cachep, flags, node, size); @@ -3774,7 +3740,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, * Then kmalloc uses the uninlined functions instead of the inline * functions. */ - cachep = __find_general_cachep(size, flags); + cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; ret = slab_alloc(cachep, flags, caller); diff --git a/mm/slab.h b/mm/slab.h index 44c0bd6dc19e..c01bc8921ac5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -38,6 +38,9 @@ unsigned long calculate_alignment(unsigned long flags, #ifndef CONFIG_SLOB /* Kmalloc array related functions */ void create_kmalloc_caches(unsigned long); + +/* Find the kmalloc slab corresponding for a certain size */ +struct kmem_cache *kmalloc_slab(size_t, gfp_t); #endif diff --git a/mm/slab_common.c b/mm/slab_common.c index 2b0ebb6d071d..6d73f0b7f21c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -327,6 +327,68 @@ struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; EXPORT_SYMBOL(kmalloc_dma_caches); #endif +/* + * Conversion table for small slabs sizes / 8 to the index in the + * kmalloc array. This is necessary for slabs < 192 since we have non power + * of two cache sizes there. The size of larger slabs can be determined using + * fls. + */ +static s8 size_index[24] = { + 3, /* 8 */ + 4, /* 16 */ + 5, /* 24 */ + 5, /* 32 */ + 6, /* 40 */ + 6, /* 48 */ + 6, /* 56 */ + 6, /* 64 */ + 1, /* 72 */ + 1, /* 80 */ + 1, /* 88 */ + 1, /* 96 */ + 7, /* 104 */ + 7, /* 112 */ + 7, /* 120 */ + 7, /* 128 */ + 2, /* 136 */ + 2, /* 144 */ + 2, /* 152 */ + 2, /* 160 */ + 2, /* 168 */ + 2, /* 176 */ + 2, /* 184 */ + 2 /* 192 */ +}; + +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + +/* + * Find the kmem_cache structure that serves a given size of + * allocation + */ +struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) +{ + int index; + + if (size <= 192) { + if (!size) + return ZERO_SIZE_PTR; + + index = size_index[size_index_elem(size)]; + } else + index = fls(size - 1); + +#ifdef CONFIG_ZONE_DMA + if (unlikely((flags & SLAB_CACHE_DMA))) + return kmalloc_dma_caches[index]; + +#endif + return kmalloc_caches[index]; +} + /* * Create the kmalloc array. Some of the regular kmalloc arrays * may already have been created because they were needed to @@ -336,6 +398,47 @@ void __init create_kmalloc_caches(unsigned long flags) { int i; + /* + * Patch up the size_index table if we have strange large alignment + * requirements for the kmalloc array. This is only the case for + * MIPS it seems. The standard arches will not generate any code here. + * + * Largest permitted alignment is 256 bytes due to the way we + * handle the index determination for the smaller caches. + * + * Make sure that nothing crazy happens if someone starts tinkering + * around with ARCH_KMALLOC_MINALIGN + */ + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || + (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); + + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } + + if (KMALLOC_MIN_SIZE >= 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + + } + + if (KMALLOC_MIN_SIZE >= 128) { + /* + * The 192 byte sized cache is not used if the alignment + * is 128 byte. Redirect kmalloc to use the 256 byte cache + * instead. + */ + for (i = 128 + 8; i <= 192; i += 8) + size_index[size_index_elem(i)] = 8; + } /* Caches that are not of the two-to-the-power-of size */ if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1]) kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); @@ -379,8 +482,6 @@ void __init create_kmalloc_caches(unsigned long flags) } #endif } - - #endif /* !CONFIG_SLOB */ diff --git a/mm/slub.c b/mm/slub.c index e813c2d30fe0..6184b0821f7e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2982,7 +2982,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) s->allocflags |= __GFP_COMP; if (s->flags & SLAB_CACHE_DMA) - s->allocflags |= SLUB_DMA; + s->allocflags |= GFP_DMA; if (s->flags & SLAB_RECLAIM_ACCOUNT) s->allocflags |= __GFP_RECLAIMABLE; @@ -3210,64 +3210,6 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -/* - * Conversion table for small slabs sizes / 8 to the index in the - * kmalloc array. This is necessary for slabs < 192 since we have non power - * of two cache sizes there. The size of larger slabs can be determined using - * fls. - */ -static s8 size_index[24] = { - 3, /* 8 */ - 4, /* 16 */ - 5, /* 24 */ - 5, /* 32 */ - 6, /* 40 */ - 6, /* 48 */ - 6, /* 56 */ - 6, /* 64 */ - 1, /* 72 */ - 1, /* 80 */ - 1, /* 88 */ - 1, /* 96 */ - 7, /* 104 */ - 7, /* 112 */ - 7, /* 120 */ - 7, /* 128 */ - 2, /* 136 */ - 2, /* 144 */ - 2, /* 152 */ - 2, /* 160 */ - 2, /* 168 */ - 2, /* 176 */ - 2, /* 184 */ - 2 /* 192 */ -}; - -static inline int size_index_elem(size_t bytes) -{ - return (bytes - 1) / 8; -} - -static struct kmem_cache *get_slab(size_t size, gfp_t flags) -{ - int index; - - if (size <= 192) { - if (!size) - return ZERO_SIZE_PTR; - - index = size_index[size_index_elem(size)]; - } else - index = fls(size - 1); - -#ifdef CONFIG_ZONE_DMA - if (unlikely((flags & SLUB_DMA))) - return kmalloc_dma_caches[index]; - -#endif - return kmalloc_caches[index]; -} - void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; @@ -3276,7 +3218,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, flags); - s = get_slab(size, flags); + s = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3319,7 +3261,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) return ret; } - s = get_slab(size, flags); + s = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3632,7 +3574,6 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; - int i; if (debug_guardpage_minorder()) slub_max_order = 0; @@ -3663,45 +3604,6 @@ void __init kmem_cache_init(void) kmem_cache_node = bootstrap(&boot_kmem_cache_node); /* Now we can use the kmem_cache to allocate kmalloc slabs */ - - /* - * Patch up the size_index table if we have strange large alignment - * requirements for the kmalloc array. This is only the case for - * MIPS it seems. The standard arches will not generate any code here. - * - * Largest permitted alignment is 256 bytes due to the way we - * handle the index determination for the smaller caches. - * - * Make sure that nothing crazy happens if someone starts tinkering - * around with ARCH_KMALLOC_MINALIGN - */ - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || - (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { - int elem = size_index_elem(i); - if (elem >= ARRAY_SIZE(size_index)) - break; - size_index[elem] = KMALLOC_SHIFT_LOW; - } - - if (KMALLOC_MIN_SIZE == 64) { - /* - * The 96 byte size cache is not used if the alignment - * is 64 byte. - */ - for (i = 64 + 8; i <= 96; i += 8) - size_index[size_index_elem(i)] = 7; - } else if (KMALLOC_MIN_SIZE == 128) { - /* - * The 192 byte sized cache is not used if the alignment - * is 128 byte. Redirect kmalloc to use the 256 byte cache - * instead. - */ - for (i = 128 + 8; i <= 192; i += 8) - size_index[size_index_elem(i)] = 8; - } - create_kmalloc_caches(0); #ifdef CONFIG_SMP @@ -3877,7 +3779,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, gfpflags); - s = get_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3907,7 +3809,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return ret; } - s = get_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; -- cgit From ca34956b804b7554fc4e88826773380d9d5122a8 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: slab: Common definition for kmem_cache_node Put the definitions for the kmem_cache_node structures together so that we have one structure. That will allow us to create more common fields in the future which could yield more opportunities to share code. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 11 ----------- mm/slab.c | 17 ----------------- mm/slab.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 16341e5316de..027276fa8713 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -53,17 +53,6 @@ struct kmem_cache_cpu { #endif }; -struct kmem_cache_node { - spinlock_t list_lock; /* Protect partial list and nr_partial */ - unsigned long nr_partial; - struct list_head partial; -#ifdef CONFIG_SLUB_DEBUG - atomic_long_t nr_slabs; - atomic_long_t total_objects; - struct list_head full; -#endif -}; - /* * Word size structure that can be atomically updated or read and that * contains both the order and the number of objects that a slab of the diff --git a/mm/slab.c b/mm/slab.c index c162b2eb493a..17f859614546 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -285,23 +285,6 @@ struct arraycache_init { void *entries[BOOT_CPUCACHE_ENTRIES]; }; -/* - * The slab lists for all objects. - */ -struct kmem_cache_node { - struct list_head slabs_partial; /* partial list first, better asm code */ - struct list_head slabs_full; - struct list_head slabs_free; - unsigned long free_objects; - unsigned int free_limit; - unsigned int colour_next; /* Per-node cache coloring */ - spinlock_t list_lock; - struct array_cache *shared; /* shared per node */ - struct array_cache **alien; /* on other nodes */ - unsigned long next_reap; /* updated without locking */ - int free_touched; /* updated without locking */ -}; - /* * Need this for bootstrapping a per node allocator. */ diff --git a/mm/slab.h b/mm/slab.h index f0a552ff7b9b..f96b49e4704e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -239,3 +239,35 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) return s; } #endif + + +/* + * The slab lists for all objects. + */ +struct kmem_cache_node { + spinlock_t list_lock; + +#ifdef CONFIG_SLAB + struct list_head slabs_partial; /* partial list first, better asm code */ + struct list_head slabs_full; + struct list_head slabs_free; + unsigned long free_objects; + unsigned int free_limit; + unsigned int colour_next; /* Per-node cache coloring */ + struct array_cache *shared; /* shared per node */ + struct array_cache **alien; /* on other nodes */ + unsigned long next_reap; /* updated without locking */ + int free_touched; /* updated without locking */ +#endif + +#ifdef CONFIG_SLUB + unsigned long nr_partial; + struct list_head partial; +#ifdef CONFIG_SLUB_DEBUG + atomic_long_t nr_slabs; + atomic_long_t total_objects; + struct list_head full; +#endif +#endif + +}; -- cgit From c601fd6956e92b0eb268d4af754073c76155b99d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Feb 2013 16:36:47 +0000 Subject: slab: Handle ARCH_DMA_MINALIGN correctly James Hogan hit boot problems in next-20130204 on Meta: META213-Thread0 DSP [LogF] kobject (4fc03980): tried to init an initialized object, something is seriously wrong. META213-Thread0 DSP [LogF] META213-Thread0 DSP [LogF] Call trace: META213-Thread0 DSP [LogF] [<4000888c>] _show_stack+0x68/0x7c META213-Thread0 DSP [LogF] [<400088b4>] _dump_stack+0x14/0x28 META213-Thread0 DSP [LogF] [<40103794>] _kobject_init+0x58/0x9c META213-Thread0 DSP [LogF] [<40103810>] _kobject_create+0x38/0x64 META213-Thread0 DSP [LogF] [<40103eac>] _kobject_create_and_add+0x14/0x8c META213-Thread0 DSP [LogF] [<40190ac4>] _mnt_init+0xd8/0x220 META213-Thread0 DSP [LogF] [<40190508>] _vfs_caches_init+0xb0/0x160 META213-Thread0 DSP [LogF] [<401851f4>] _start_kernel+0x274/0x340 META213-Thread0 DSP [LogF] [<40188424>] _metag_start_kernel+0x58/0x6c META213-Thread0 DSP [LogF] [<40000044>] __start+0x44/0x48 META213-Thread0 DSP [LogF] META213-Thread0 DSP [LogF] devtmpfs: initialized META213-Thread0 DSP [LogF] L2 Cache: Not present META213-Thread0 DSP [LogF] BUG: failure at fs/sysfs/dir.c:736/sysfs_read_ns_type()! META213-Thread0 DSP [LogF] Kernel panic - not syncing: BUG! META213-Thread0 DSP [Thread Exit] Thread has exited - return code = 4294967295 And bisected the problem to commit 95a05b4 ("slab: Common constants for kmalloc boundaries"). As it turns out, a fixed KMALLOC_SHIFT_LOW does not work for arches with higher alignment requirements. Determine KMALLOC_SHIFT_LOW from ARCH_DMA_MINALIGN instead. Reported-and-tested-by: James Hogan Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index f2327a898a85..0c621752caa6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -133,6 +133,19 @@ void kfree(const void *); void kzfree(const void *); size_t ksize(const void *); +/* + * Some archs want to perform DMA into kmalloc caches and need a guaranteed + * alignment larger than the alignment of a 64-bit integer. + * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. + */ +#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN +#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN) +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#endif + #ifdef CONFIG_SLOB /* * Common fields provided in kmem_cache by all slab allocators @@ -179,7 +192,9 @@ struct kmem_cache { #define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ (MAX_ORDER + PAGE_SHIFT - 1) : 25) #define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH +#ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 +#endif #else /* * SLUB allocates up to order 2 pages directly and otherwise @@ -187,8 +202,10 @@ struct kmem_cache { */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif +#endif /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) @@ -200,9 +217,7 @@ struct kmem_cache { /* * Kmalloc subsystem. */ -#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 -#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN -#else +#ifndef KMALLOC_MIN_SIZE #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif @@ -289,17 +304,6 @@ static __always_inline int kmalloc_size(int n) } #endif /* !CONFIG_SLOB */ -/* - * Some archs want to perform DMA into kmalloc caches and need a guaranteed - * alignment larger than the alignment of a 64-bit integer. - * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. - */ -#ifdef ARCH_DMA_MINALIGN -#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#else -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -#endif - /* * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. * Intended for arches that get misalignment faults even for 64 bit integer -- cgit From f43f627d2f17e95c78647eeddf968d12f5c286b1 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 4 Feb 2013 13:37:20 +0000 Subject: PM: make VT switching to the suspend console optional v3 KMS drivers can potentially restore the display configuration without userspace help. Such drivers can can call a new funciton, pm_vt_switch_required(false) if they support this feature. In that case, the PM layer won't VT switch to the suspend console at suspend time and then back to the original VT on resume, but rather leave things alone for a nicer looking suspend and resume sequence. v2: make a function so we can handle multiple drivers (Alan) v3: use a list to track device requests (Rafael) v4: Squash in build fix from Jesse for CONFIG_VT_CONSOLE_SLEEP=n v5: Squash in patch from Wu Fengguang to add a few missing static qualifiers. v6: Add missing EXPORT_SYMBOL. Signed-off-by: Jesse Barnes Reviewed-by: Rafael J. Wysocki (v3) Signed-off-by: Daniel Vetter --- include/linux/pm.h | 13 ++++++ kernel/power/console.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 03d7bb145311..e5da2f353e8f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -35,6 +35,19 @@ extern void (*pm_idle)(void); extern void (*pm_power_off)(void); extern void (*pm_power_off_prepare)(void); +struct device; /* we have a circular dep with device.h */ +#ifdef CONFIG_VT_CONSOLE_SLEEP +extern void pm_vt_switch_required(struct device *dev, bool required); +extern void pm_vt_switch_unregister(struct device *dev); +#else +static inline void pm_vt_switch_required(struct device *dev, bool required) +{ +} +static inline void pm_vt_switch_unregister(struct device *dev) +{ +} +#endif /* CONFIG_VT_CONSOLE_SLEEP */ + /* * Device power management */ diff --git a/kernel/power/console.c b/kernel/power/console.c index b1dc456474b5..463aa6736751 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -4,6 +4,7 @@ * Originally from swsusp. */ +#include #include #include #include @@ -14,8 +15,120 @@ static int orig_fgconsole, orig_kmsg; +static DEFINE_MUTEX(vt_switch_mutex); + +struct pm_vt_switch { + struct list_head head; + struct device *dev; + bool required; +}; + +static LIST_HEAD(pm_vt_switch_list); + + +/** + * pm_vt_switch_required - indicate VT switch at suspend requirements + * @dev: device + * @required: if true, caller needs VT switch at suspend/resume time + * + * The different console drivers may or may not require VT switches across + * suspend/resume, depending on how they handle restoring video state and + * what may be running. + * + * Drivers can indicate support for switchless suspend/resume, which can + * save time and flicker, by using this routine and passing 'false' as + * the argument. If any loaded driver needs VT switching, or the + * no_console_suspend argument has been passed on the command line, VT + * switches will occur. + */ +void pm_vt_switch_required(struct device *dev, bool required) +{ + struct pm_vt_switch *entry, *tmp; + + mutex_lock(&vt_switch_mutex); + list_for_each_entry(tmp, &pm_vt_switch_list, head) { + if (tmp->dev == dev) { + /* already registered, update requirement */ + tmp->required = required; + goto out; + } + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + + entry->required = required; + entry->dev = dev; + + list_add(&entry->head, &pm_vt_switch_list); +out: + mutex_unlock(&vt_switch_mutex); +} +EXPORT_SYMBOL(pm_vt_switch_required); + +/** + * pm_vt_switch_unregister - stop tracking a device's VT switching needs + * @dev: device + * + * Remove @dev from the vt switch list. + */ +void pm_vt_switch_unregister(struct device *dev) +{ + struct pm_vt_switch *tmp; + + mutex_lock(&vt_switch_mutex); + list_for_each_entry(tmp, &pm_vt_switch_list, head) { + if (tmp->dev == dev) { + list_del(&tmp->head); + break; + } + } + mutex_unlock(&vt_switch_mutex); +} +EXPORT_SYMBOL(pm_vt_switch_unregister); + +/* + * There are three cases when a VT switch on suspend/resume are required: + * 1) no driver has indicated a requirement one way or another, so preserve + * the old behavior + * 2) console suspend is disabled, we want to see debug messages across + * suspend/resume + * 3) any registered driver indicates it needs a VT switch + * + * If none of these conditions is present, meaning we have at least one driver + * that doesn't need the switch, and none that do, we can avoid it to make + * resume look a little prettier (and suspend too, but that's usually hidden, + * e.g. when closing the lid on a laptop). + */ +static bool pm_vt_switch(void) +{ + struct pm_vt_switch *entry; + bool ret = true; + + mutex_lock(&vt_switch_mutex); + if (list_empty(&pm_vt_switch_list)) + goto out; + + if (!console_suspend_enabled) + goto out; + + list_for_each_entry(entry, &pm_vt_switch_list, head) { + if (entry->required) + goto out; + } + + ret = false; +out: + mutex_unlock(&vt_switch_mutex); + return ret; +} + int pm_prepare_console(void) { + if (!pm_vt_switch()) + return 0; + orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1); if (orig_fgconsole < 0) return 1; @@ -26,6 +139,9 @@ int pm_prepare_console(void) void pm_restore_console(void) { + if (!pm_vt_switch()) + return; + if (orig_fgconsole >= 0) { vt_move_to_console(orig_fgconsole, 0); vt_kmsg_redirect(orig_kmsg); -- cgit From 3cf2667b9f8b2c2fe298a427deb399e52321da6b Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 4 Feb 2013 13:37:21 +0000 Subject: fb: add support for drivers not needing VT switch at suspend/resume time Use the new PM routines to indicate whether we need to VT switch at suspend and resume time. When a new driver is bound, set its flag accordingly, and when unbound, remove it from the PM's console tracking list. Signed-off-by: Jesse Barnes Acked-by: Rafael J. Wysocki Signed-off-by: Daniel Vetter --- drivers/video/fbmem.c | 7 +++++++ include/linux/fb.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index dc61c12ecf8c..2af7153da2e4 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1645,6 +1645,11 @@ static int do_register_framebuffer(struct fb_info *fb_info) if (!fb_info->modelist.prev || !fb_info->modelist.next) INIT_LIST_HEAD(&fb_info->modelist); + if (fb_info->skip_vt_switch) + pm_vt_switch_required(fb_info->dev, false); + else + pm_vt_switch_required(fb_info->dev, true); + fb_var_to_videomode(&mode, &fb_info->var); fb_add_videomode(&mode, &fb_info->modelist); registered_fb[i] = fb_info; @@ -1679,6 +1684,8 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) if (ret) return -EINVAL; + pm_vt_switch_unregister(fb_info->dev); + unlink_framebuffer(fb_info); if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) diff --git a/include/linux/fb.h b/include/linux/fb.h index 58b98606ac26..d49c60f5aa4c 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -501,6 +501,8 @@ struct fb_info { resource_size_t size; } ranges[0]; } *apertures; + + bool skip_vt_switch; /* no VT switch on suspend/resume required */ }; static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { -- cgit From 4490108b4a5ada14c7be712260829faecc814ae5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 15 Feb 2013 17:29:22 -0800 Subject: openvswitch: Allow OVS_USERSPACE_ATTR_USERDATA to be variable length. Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be exactly 64 bits long, if it was present. However, 64 bits is not enough space to associate as much information with a flow as would be convenient for some userspace features now under development. This commit generalizes the attribute, allowing it to be any length. This generalization is backward-compatible: if userspace only uses 64-bit attributes, then it will not see any change in behavior. CC: Romain Lenglet Signed-off-by: Ben Pfaff Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 11 ++++++----- net/openvswitch/datapath.c | 11 ++++++----- net/openvswitch/datapath.h | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 99e6414a40d9..67d6c7b03581 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -127,7 +127,8 @@ enum ovs_packet_cmd { * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an - * %OVS_USERSPACE_ATTR_USERDATA attribute. + * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content + * specified there. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -137,7 +138,7 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_PACKET, /* Packet data. */ OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_PACKET_ATTR_USERDATA, /* u64 OVS_ACTION_ATTR_USERSPACE arg. */ + OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ __OVS_PACKET_ATTR_MAX }; @@ -389,13 +390,13 @@ enum ovs_sample_attr { * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION * message should be sent. Required. - * @OVS_USERSPACE_ATTR_USERDATA: If present, its u64 argument is copied to the - * %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA, + * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is + * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. */ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_UNSPEC, OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ - OVS_USERSPACE_ATTR_USERDATA, /* u64 optional user-specified cookie. */ + OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ __OVS_USERSPACE_ATTR_MAX }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f9d2438e6437..96cd5b243d57 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -370,8 +370,8 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, len = sizeof(struct ovs_header); len += nla_total_size(skb->len); len += nla_total_size(FLOW_BUFSIZE); - if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) - len += nla_total_size(8); + if (upcall_info->userdata) + len += NLA_ALIGN(upcall_info->userdata->nla_len); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { @@ -388,8 +388,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, nla_nest_end(user_skb, nla); if (upcall_info->userdata) - nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, - nla_get_u64(upcall_info->userdata)); + __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, + nla_len(upcall_info->userdata), + nla_data(upcall_info->userdata)); nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); @@ -544,7 +545,7 @@ static int validate_userspace(const struct nlattr *attr) { static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 031dfbf37c93..9125ad5c5aeb 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -119,7 +119,7 @@ struct ovs_skb_cb { * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. - * @userdata: If nonnull, its u64 value is extracted and passed to userspace as + * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no * packet is sent and the packet is accounted in the datapath's @n_lost -- cgit From 27cef8b47cfb27fa2955a8577637794f1f275db2 Mon Sep 17 00:00:00 2001 From: Heiko Stübner Date: Sat, 23 Feb 2013 12:06:44 -0800 Subject: Input: auo-pixcir-ts - handle reset gpio directly Devicetree based platforms don't handle device callbacks very well and until now no board has come along that needs more extended hwinit than pulling the rst gpio high. Therefore pull the reset handling directly into the driver and remove the callbacks from the driver. If extended device setup is needed at some later point, power-sequences would probably be the solution of choice. Signed-off-by: Heiko Stuebner Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/auo-pixcir-ts.c | 26 ++++++++++++++++++++------ include/linux/input/auo-pixcir-ts.h | 4 +--- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c index 813413eebab7..6317a9c7884c 100644 --- a/drivers/input/touchscreen/auo-pixcir-ts.c +++ b/drivers/input/touchscreen/auo-pixcir-ts.c @@ -511,8 +511,21 @@ static int auo_pixcir_probe(struct i2c_client *client, goto err_gpio_dir; } - if (pdata->init_hw) - pdata->init_hw(client); + ret = gpio_request(pdata->gpio_rst, "auo_pixcir_ts_rst"); + if (ret) { + dev_err(&client->dev, "request of gpio %d failed, %d\n", + pdata->gpio_rst, ret); + goto err_gpio_dir; + } + + ret = gpio_direction_output(pdata->gpio_rst, 1); + if (ret) { + dev_err(&client->dev, "setting direction of gpio %d failed %d\n", + pdata->gpio_rst, ret); + goto err_gpio_rst; + } + + msleep(200); ts->client = client; ts->touch_ind_mode = 0; @@ -597,8 +610,9 @@ err_input_register: err_fw_vers: input_free_device(input_dev); err_input_alloc: - if (pdata->exit_hw) - pdata->exit_hw(client); + gpio_set_value(pdata->gpio_rst, 0); +err_gpio_rst: + gpio_free(pdata->gpio_rst); err_gpio_dir: gpio_free(pdata->gpio_int); err_gpio_int: @@ -616,8 +630,8 @@ static int auo_pixcir_remove(struct i2c_client *client) input_unregister_device(ts->input); - if (pdata->exit_hw) - pdata->exit_hw(client); + gpio_set_value(pdata->gpio_rst, 0); + gpio_free(pdata->gpio_rst); gpio_free(pdata->gpio_int); diff --git a/include/linux/input/auo-pixcir-ts.h b/include/linux/input/auo-pixcir-ts.h index 75d4be717714..5049f21928e4 100644 --- a/include/linux/input/auo-pixcir-ts.h +++ b/include/linux/input/auo-pixcir-ts.h @@ -43,12 +43,10 @@ */ struct auo_pixcir_ts_platdata { int gpio_int; + int gpio_rst; int int_setting; - void (*init_hw)(struct i2c_client *); - void (*exit_hw)(struct i2c_client *); - unsigned int x_max; unsigned int y_max; }; -- cgit From e90a6df80dc45ab53d2f4f4db297434e48c0208e Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Mon, 25 Feb 2013 11:31:43 +0100 Subject: HID: Extend the interface with report requests Some drivers send reports directly to underlying device, creating an unwanted dependency on the underlying transport layer. This patch adds hid_hw_request() to the interface, thereby removing usbhid from the lion share of the drivers. Signed-off-by: Henrik Rydberg Signed-off-by: Benjamin Tissoires Reviewed-by: Mika Westerberg Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 13 +++++++++++++ include/linux/hid.h | 20 ++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 8e0c4bf94ebc..366fd09d257d 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -1243,6 +1243,18 @@ static int usbhid_power(struct hid_device *hid, int lvl) return r; } +static void usbhid_request(struct hid_device *hid, struct hid_report *rep, int reqtype) +{ + switch (reqtype) { + case HID_REQ_GET_REPORT: + usbhid_submit_report(hid, rep, USB_DIR_IN); + break; + case HID_REQ_SET_REPORT: + usbhid_submit_report(hid, rep, USB_DIR_OUT); + break; + } +} + static struct hid_ll_driver usb_hid_driver = { .parse = usbhid_parse, .start = usbhid_start, @@ -1251,6 +1263,7 @@ static struct hid_ll_driver usb_hid_driver = { .close = usbhid_close, .power = usbhid_power, .hidinput_input_event = usb_hidinput_input_event, + .request = usbhid_request, }; static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id) diff --git a/include/linux/hid.h b/include/linux/hid.h index e14b465b1146..261c713d4842 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -662,6 +662,7 @@ struct hid_driver { * @hidinput_input_event: event input event (e.g. ff or leds) * @parse: this method is called only once to parse the device data, * shouldn't allocate anything to not leak memory + * @request: send report request to device (e.g. feature report) */ struct hid_ll_driver { int (*start)(struct hid_device *hdev); @@ -676,6 +677,10 @@ struct hid_ll_driver { unsigned int code, int value); int (*parse)(struct hid_device *hdev); + + void (*request)(struct hid_device *hdev, + struct hid_report *report, int reqtype); + }; #define PM_HINT_FULLON 1<<5 @@ -883,6 +888,21 @@ static inline int hid_hw_power(struct hid_device *hdev, int level) return hdev->ll_driver->power ? hdev->ll_driver->power(hdev, level) : 0; } + +/** + * hid_hw_request - send report request to device + * + * @hdev: hid device + * @report: report to send + * @reqtype: hid request type + */ +static inline void hid_hw_request(struct hid_device *hdev, + struct hid_report *report, int reqtype) +{ + if (hdev->ll_driver->request) + hdev->ll_driver->request(hdev, report, reqtype); +} + int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, int interrupt); -- cgit From 3373443befa73ee60e4275e7699b26058b01455a Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Mon, 25 Feb 2013 11:31:44 +0100 Subject: HID: Extend the interface with wait io request Some drivers need to wait for an io from the underlying device, creating an unwanted dependency on the underlying transport layer. This patch adds wait() to the interface, thereby removing usbhid from the lion share of the drivers. Signed-off-by: Henrik Rydberg Signed-off-by: Benjamin Tissoires Reviewed-by: Mika Westerberg Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 1 + include/linux/hid.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 366fd09d257d..99d95d3368b5 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -1264,6 +1264,7 @@ static struct hid_ll_driver usb_hid_driver = { .power = usbhid_power, .hidinput_input_event = usb_hidinput_input_event, .request = usbhid_request, + .wait = usbhid_wait_io, }; static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id) diff --git a/include/linux/hid.h b/include/linux/hid.h index 261c713d4842..7071eb3d36c7 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -663,6 +663,7 @@ struct hid_driver { * @parse: this method is called only once to parse the device data, * shouldn't allocate anything to not leak memory * @request: send report request to device (e.g. feature report) + * @wait: wait for buffered io to complete (send/recv reports) */ struct hid_ll_driver { int (*start)(struct hid_device *hdev); @@ -681,6 +682,8 @@ struct hid_ll_driver { void (*request)(struct hid_device *hdev, struct hid_report *report, int reqtype); + int (*wait)(struct hid_device *hdev); + }; #define PM_HINT_FULLON 1<<5 @@ -903,6 +906,17 @@ static inline void hid_hw_request(struct hid_device *hdev, hdev->ll_driver->request(hdev, report, reqtype); } +/** + * hid_hw_wait - wait for buffered io to complete + * + * @hdev: hid device + */ +static inline void hid_hw_wait(struct hid_device *hdev) +{ + if (hdev->ll_driver->wait) + hdev->ll_driver->wait(hdev); +} + int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, int interrupt); -- cgit From d6b0c58048d2c8c6f4955c37f670125b2792cd14 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 23 Feb 2013 13:11:14 -0800 Subject: devres: allow adding custom actions to the stack Sometimes drivers need to execute one-off actions in their error handling or device teardown paths. An example would be toggling a GPIO line to reset the controlled device into predefined state. To allow performing such actions when using managed resources let's allow adding them to stack/group of devres resources. Acked-by: Tejun Heo Acked-by: Greg Kroah-Hartman Signed-off-by: Dmitry Torokhov --- drivers/base/devres.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/device.h | 4 +++ 2 files changed, 78 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 8731979d668a..724957a13d48 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -670,6 +670,80 @@ int devres_release_group(struct device *dev, void *id) } EXPORT_SYMBOL_GPL(devres_release_group); +/* + * Custom devres actions allow inserting a simple function call + * into the teadown sequence. + */ + +struct action_devres { + void *data; + void (*action)(void *); +}; + +static int devm_action_match(struct device *dev, void *res, void *p) +{ + struct action_devres *devres = res; + struct action_devres *target = p; + + return devres->action == target->action && + devres->data == target->data; +} + +static void devm_action_release(struct device *dev, void *res) +{ + struct action_devres *devres = res; + + devres->action(devres->data); +} + +/** + * devm_add_action() - add a custom action to list of managed resources + * @dev: Device that owns the action + * @action: Function that should be called + * @data: Pointer to data passed to @action implementation + * + * This adds a custom action to the list of managed resources so that + * it gets executed as part of standard resource unwinding. + */ +int devm_add_action(struct device *dev, void (*action)(void *), void *data) +{ + struct action_devres *devres; + + devres = devres_alloc(devm_action_release, + sizeof(struct action_devres), GFP_KERNEL); + if (!devres) + return -ENOMEM; + + devres->data = data; + devres->action = action; + + devres_add(dev, devres); + return 0; +} +EXPORT_SYMBOL_GPL(devm_add_action); + +/** + * devm_remove_action() - removes previously added custom action + * @dev: Device that owns the action + * @action: Function implementing the action + * @data: Pointer to data passed to @action implementation + * + * Removes instance of @action previously added by devm_add_action(). + * Both action and data should match one of the existing entries. + */ +void devm_remove_action(struct device *dev, void (*action)(void *), void *data) +{ + struct action_devres devres = { + .data = data, + .action = action, + }; + + WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match, + &devres)); + +} +EXPORT_SYMBOL_GPL(devm_remove_action); + /* * Managed kzalloc/kfree */ diff --git a/include/linux/device.h b/include/linux/device.h index 86ef6ab553b1..854b247bf5f9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -567,6 +567,10 @@ extern void devm_kfree(struct device *dev, void *p); void __iomem *devm_request_and_ioremap(struct device *dev, struct resource *res); +/* allows to add/remove a custom action to devres stack */ +int devm_add_action(struct device *dev, void (*action)(void *), void *data); +void devm_remove_action(struct device *dev, void (*action)(void *), void *data); + struct device_dma_parameters { /* * a low level driver may set these to teach IOMMU code about -- cgit From c849a6143bec520aff2a6646518b0d041402428b Mon Sep 17 00:00:00 2001 From: Andrew de los Reyes Date: Mon, 18 Feb 2013 09:20:21 -0800 Subject: HID: Separate struct hid_device's driver_lock into two locks. This patch separates struct hid_device's driver_lock into two. The goal is to allow hid device drivers to receive input during their probe() or remove() function calls. This is necessary because some drivers need to communicate with the device to determine parameters needed during probe (e.g., size of a multi-touch surface), and if possible, may perfer to communicate with a device on host-initiated disconnect (e.g., to put it into a low-power state). Historically, three functions used driver_lock: - hid_device_probe: blocks to acquire lock - hid_device_remove: blocks to acquire lock - hid_input_report: if locked returns -EBUSY, else acquires lock This patch adds another lock (driver_input_lock) which is used to block input from occurring. The lock behavior is now: - hid_device_probe: blocks to acq. driver_lock, then driver_input_lock - hid_device_remove: blocks to acq. driver_lock, then driver_input_lock - hid_input_report: if driver_input_lock locked returns -EBUSY, else acquires driver_input_lock This patch also adds two helper functions to be called during probe() or remove(): hid_device_io_start() and hid_device_io_stop(). These functions lock and unlock, respectively, driver_input_lock; they also make a note of whether they did so that hid-core knows if a driver has changed the lock state. This patch results in no behavior change for existing devices and drivers. However, during a probe() or remove() function call in a driver, that driver may now selectively call hid_device_io_start() to let input events come through, then optionally call hid_device_io_stop() to stop them. Signed-off-by: Andrew de los Reyes Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 24 +++++++++++++++++++++--- include/linux/hid.h | 46 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index ff75cabf7393..680068c0c46a 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1267,7 +1267,7 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i if (!hid) return -ENODEV; - if (down_trylock(&hid->driver_lock)) + if (down_trylock(&hid->driver_input_lock)) return -EBUSY; if (!hid->driver) { @@ -1324,7 +1324,7 @@ nomem: ret = hid_report_raw_event(hid, type, data, size, interrupt); unlock: - up(&hid->driver_lock); + up(&hid->driver_input_lock); return ret; } EXPORT_SYMBOL_GPL(hid_input_report); @@ -1845,6 +1845,11 @@ static int hid_device_probe(struct device *dev) if (down_interruptible(&hdev->driver_lock)) return -EINTR; + if (down_interruptible(&hdev->driver_input_lock)) { + ret = -EINTR; + goto unlock_driver_lock; + } + hdev->io_started = false; if (!hdev->driver) { id = hid_match_device(hdev, hdrv); @@ -1867,6 +1872,9 @@ static int hid_device_probe(struct device *dev) } } unlock: + if (!hdev->io_started) + up(&hdev->driver_input_lock); +unlock_driver_lock: up(&hdev->driver_lock); return ret; } @@ -1875,9 +1883,15 @@ static int hid_device_remove(struct device *dev) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); struct hid_driver *hdrv; + int ret = 0; if (down_interruptible(&hdev->driver_lock)) return -EINTR; + if (down_interruptible(&hdev->driver_input_lock)) { + ret = -EINTR; + goto unlock_driver_lock; + } + hdev->io_started = false; hdrv = hdev->driver; if (hdrv) { @@ -1889,8 +1903,11 @@ static int hid_device_remove(struct device *dev) hdev->driver = NULL; } + if (!hdev->io_started) + up(&hdev->driver_input_lock); +unlock_driver_lock: up(&hdev->driver_lock); - return 0; + return ret; } static ssize_t modalias_show(struct device *dev, struct device_attribute *a, @@ -2329,6 +2346,7 @@ struct hid_device *hid_allocate_device(void) init_waitqueue_head(&hdev->debug_wait); INIT_LIST_HEAD(&hdev->debug_list); sema_init(&hdev->driver_lock, 1); + sema_init(&hdev->driver_input_lock, 1); return hdev; } diff --git a/include/linux/hid.h b/include/linux/hid.h index e14b465b1146..895b85639dec 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -456,7 +456,8 @@ struct hid_device { /* device report descriptor */ unsigned country; /* HID country */ struct hid_report_enum report_enum[HID_REPORT_TYPES]; - struct semaphore driver_lock; /* protects the current driver */ + struct semaphore driver_lock; /* protects the current driver, except during input */ + struct semaphore driver_input_lock; /* protects the current driver */ struct device dev; /* device */ struct hid_driver *driver; struct hid_ll_driver *ll_driver; @@ -477,6 +478,7 @@ struct hid_device { /* device report descriptor */ unsigned int status; /* see STAT flags above */ unsigned claimed; /* Claimed by hidinput, hiddev? */ unsigned quirks; /* Various quirks the device can pull on us */ + bool io_started; /* Protected by driver_lock. If IO has started */ struct list_head inputs; /* The list of inputs */ void *hiddev; /* The hiddev structure */ @@ -599,6 +601,10 @@ struct hid_usage_id { * @resume: invoked on resume if device was not reset (NULL means nop) * @reset_resume: invoked on resume if device was reset (NULL means nop) * + * probe should return -errno on error, or 0 on success. During probe, + * input will not be passed to raw_event unless hid_device_io_start is + * called. + * * raw_event and event should return 0 on no action performed, 1 when no * further processing should be done and negative on error * @@ -737,6 +743,44 @@ const struct hid_device_id *hid_match_id(struct hid_device *hdev, const struct hid_device_id *id); s32 hid_snto32(__u32 value, unsigned n); +/** + * hid_device_io_start - enable HID input during probe, remove + * + * @hid - the device + * + * This should only be called during probe or remove and only be + * called by the thread calling probe or remove. It will allow + * incoming packets to be delivered to the driver. + */ +static inline void hid_device_io_start(struct hid_device *hid) { + if (hid->io_started) { + dev_warn(&hid->dev, "io already started"); + return; + } + hid->io_started = true; + up(&hid->driver_input_lock); +} + +/** + * hid_device_io_stop - disable HID input during probe, remove + * + * @hid - the device + * + * Should only be called after hid_device_io_start. It will prevent + * incoming packets from going to the driver for the duration of + * probe, remove. If called during probe, packets will still go to the + * driver after probe is complete. This function should only be called + * by the thread calling probe or remove. + */ +static inline void hid_device_io_stop(struct hid_device *hid) { + if (!hid->io_started) { + dev_warn(&hid->dev, "io already stopped"); + return; + } + hid->io_started = false; + down(&hid->driver_input_lock); +} + /** * hid_map_usage - map usage input bits * -- cgit From c8801a8e715d7793e1e7bcd2f6fe132234741753 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 Mar 2012 16:35:48 +0000 Subject: regulator: core: Mark all get and enable calls as __must_check It's generally important that devices have power when they expect it so drivers really ought to be checking for errors on the power up paths. Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 7bc732ce6e50..145022a83085 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -141,18 +141,18 @@ void regulator_put(struct regulator *regulator); void devm_regulator_put(struct regulator *regulator); /* regulator output control and status */ -int regulator_enable(struct regulator *regulator); +int __must_check regulator_enable(struct regulator *regulator); int regulator_disable(struct regulator *regulator); int regulator_force_disable(struct regulator *regulator); int regulator_is_enabled(struct regulator *regulator); int regulator_disable_deferred(struct regulator *regulator, int ms); -int regulator_bulk_get(struct device *dev, int num_consumers, - struct regulator_bulk_data *consumers); -int devm_regulator_bulk_get(struct device *dev, int num_consumers, - struct regulator_bulk_data *consumers); -int regulator_bulk_enable(int num_consumers, - struct regulator_bulk_data *consumers); +int __must_check regulator_bulk_get(struct device *dev, int num_consumers, + struct regulator_bulk_data *consumers); +int __must_check devm_regulator_bulk_get(struct device *dev, int num_consumers, + struct regulator_bulk_data *consumers); +int __must_check regulator_bulk_enable(int num_consumers, + struct regulator_bulk_data *consumers); int regulator_bulk_disable(int num_consumers, struct regulator_bulk_data *consumers); int regulator_bulk_force_disable(int num_consumers, -- cgit From f19b00da8ed37db4e3891fe534fcf3a605a0e562 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Mon, 18 Feb 2013 06:50:39 +0000 Subject: regulator: core: support shared enable GPIO concept A Regulator can be enabled by external GPIO pin. This is configurable in the regulator_config. At this moment, the GPIO can be owned by only one regulator device. In some devices, multiple regulators are enabled by shared one GPIO pin. This patch extends this limitation, enabling shared enable GPIO of regulators. New list for enable GPIO: 'regulator_ena_gpio_list' This manages enable GPIO list. New structure for supporting shared enable GPIO: 'regulator_enable_gpio' The enable count is used for balancing GPIO control count. This count is incremented when GPIO is enabled. On the other hand, it's decremented when GPIO is disabled. Reference count: 'request_count' The reference count, 'request_count' is incremented/decremented on requesting/freeing the GPIO. This count makes sure only free the GPIO when it has no users. How it works If the GPIO is already used, skip requesting new GPIO usage. The GPIO is new one, request GPIO function and add it to the list of enable GPIO. This list is used for balancing enable GPIO count and pin control. Updating a GPIO and invert code moved 'ena_gpio' and 'ena_gpio_invert' of the regulator_config were moved to new function, regulator_ena_gpio_request(). Use regulator_enable_pin structure rather than regulator_dev. Signed-off-by: Milo(Woogyom) Kim Reviewed-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/core.c | 86 +++++++++++++++++++++++++++++++++++----- include/linux/regulator/driver.h | 2 + 2 files changed, 78 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index da9782bd27d0..71d6adc4eeab 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -51,6 +51,7 @@ static DEFINE_MUTEX(regulator_list_mutex); static LIST_HEAD(regulator_list); static LIST_HEAD(regulator_map_list); +static LIST_HEAD(regulator_ena_gpio_list); static bool has_full_constraints; static bool board_wants_dummy_regulator; @@ -68,6 +69,19 @@ struct regulator_map { struct regulator_dev *regulator; }; +/* + * struct regulator_enable_gpio + * + * Management for shared enable GPIO pin + */ +struct regulator_enable_gpio { + struct list_head list; + int gpio; + u32 enable_count; /* a number of enabled shared GPIO */ + u32 request_count; /* a number of requested shared GPIO */ + unsigned int ena_gpio_invert:1; +}; + /* * struct regulator * @@ -1456,6 +1470,65 @@ void devm_regulator_put(struct regulator *regulator) } EXPORT_SYMBOL_GPL(devm_regulator_put); +/* Manage enable GPIO list. Same GPIO pin can be shared among regulators */ +static int regulator_ena_gpio_request(struct regulator_dev *rdev, + const struct regulator_config *config) +{ + struct regulator_enable_gpio *pin; + int ret; + + list_for_each_entry(pin, ®ulator_ena_gpio_list, list) { + if (pin->gpio == config->ena_gpio) { + rdev_dbg(rdev, "GPIO %d is already used\n", + config->ena_gpio); + goto update_ena_gpio_to_rdev; + } + } + + ret = gpio_request_one(config->ena_gpio, + GPIOF_DIR_OUT | config->ena_gpio_flags, + rdev_get_name(rdev)); + if (ret) + return ret; + + pin = kzalloc(sizeof(struct regulator_enable_gpio), GFP_KERNEL); + if (pin == NULL) { + gpio_free(config->ena_gpio); + return -ENOMEM; + } + + pin->gpio = config->ena_gpio; + pin->ena_gpio_invert = config->ena_gpio_invert; + list_add(&pin->list, ®ulator_ena_gpio_list); + +update_ena_gpio_to_rdev: + pin->request_count++; + rdev->ena_pin = pin; + return 0; +} + +static void regulator_ena_gpio_free(struct regulator_dev *rdev) +{ + struct regulator_enable_gpio *pin, *n; + + if (!rdev->ena_pin) + return; + + /* Free the GPIO only in case of no use */ + list_for_each_entry_safe(pin, n, ®ulator_ena_gpio_list, list) { + if (pin->gpio == rdev->ena_pin->gpio) { + if (pin->request_count <= 1) { + pin->request_count = 0; + gpio_free(pin->gpio); + list_del(&pin->list); + kfree(pin); + } else { + pin->request_count--; + } + } + } +} + static int _regulator_do_enable(struct regulator_dev *rdev) { int ret, delay; @@ -3435,18 +3508,13 @@ regulator_register(const struct regulator_desc *regulator_desc, dev_set_drvdata(&rdev->dev, rdev); if (config->ena_gpio && gpio_is_valid(config->ena_gpio)) { - ret = gpio_request_one(config->ena_gpio, - GPIOF_DIR_OUT | config->ena_gpio_flags, - rdev_get_name(rdev)); + ret = regulator_ena_gpio_request(rdev, config); if (ret != 0) { rdev_err(rdev, "Failed to request enable GPIO%d: %d\n", config->ena_gpio, ret); goto wash; } - rdev->ena_gpio = config->ena_gpio; - rdev->ena_gpio_invert = config->ena_gpio_invert; - if (config->ena_gpio_flags & GPIOF_OUT_INIT_HIGH) rdev->ena_gpio_state = 1; @@ -3522,8 +3590,7 @@ unset_supplies: scrub: if (rdev->supply) _regulator_put(rdev->supply); - if (rdev->ena_gpio) - gpio_free(rdev->ena_gpio); + regulator_ena_gpio_free(rdev); kfree(rdev->constraints); wash: device_unregister(&rdev->dev); @@ -3558,8 +3625,7 @@ void regulator_unregister(struct regulator_dev *rdev) unset_regulator_supplies(rdev); list_del(&rdev->list); kfree(rdev->constraints); - if (rdev->ena_gpio) - gpio_free(rdev->ena_gpio); + regulator_ena_gpio_free(rdev); device_unregister(&rdev->dev); mutex_unlock(®ulator_list_mutex); } diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 23070fd83872..a467d11dd67d 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -22,6 +22,7 @@ struct regmap; struct regulator_dev; struct regulator_init_data; +struct regulator_enable_gpio; enum regulator_status { REGULATOR_STATUS_OFF, @@ -300,6 +301,7 @@ struct regulator_dev { struct dentry *debugfs; + struct regulator_enable_gpio *ena_pin; int ena_gpio; unsigned int ena_gpio_invert:1; unsigned int ena_gpio_state:1; -- cgit From 7b74d149247c8972da1cec3e4c70b67049aaeb69 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Mon, 18 Feb 2013 06:50:55 +0000 Subject: regulator: core: use regulator_ena_pin member The regulator_dev has regulator_enable_gpio structure. 'ena_gpio' and 'ena_gpio_invert' were moved to in regulator_enable_gpio. regulator_dev ---> regulator_enable_gpio .ena_gpio .gpio .ena_gpio_invert .ena_gpio_invert Pointer, 'ena_pin' is used for checking valid enable GPIO pin. Signed-off-by: Milo(Woogyom) Kim Reviewed-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/core.c | 6 +++--- include/linux/regulator/driver.h | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 57d434d3145a..6c8c82406cd9 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1945,7 +1945,7 @@ EXPORT_SYMBOL_GPL(regulator_disable_regmap); static int _regulator_is_enabled(struct regulator_dev *rdev) { /* A GPIO control always takes precedence */ - if (rdev->ena_gpio) + if (rdev->ena_pin) return rdev->ena_gpio_state; /* If we don't know then assume that the regulator is always on */ @@ -3344,7 +3344,7 @@ static int add_regulator_attributes(struct regulator_dev *rdev) if (status < 0) return status; } - if (rdev->ena_gpio || ops->is_enabled) { + if (rdev->ena_pin || ops->is_enabled) { status = device_create_file(dev, &dev_attr_state); if (status < 0) return status; @@ -3556,7 +3556,7 @@ regulator_register(const struct regulator_desc *regulator_desc, if (config->ena_gpio_flags & GPIOF_OUT_INIT_HIGH) rdev->ena_gpio_state = 1; - if (rdev->ena_gpio_invert) + if (config->ena_gpio_invert) rdev->ena_gpio_state = !rdev->ena_gpio_state; } diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index a467d11dd67d..7b7aeec04f86 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -302,8 +302,6 @@ struct regulator_dev { struct dentry *debugfs; struct regulator_enable_gpio *ena_pin; - int ena_gpio; - unsigned int ena_gpio_invert:1; unsigned int ena_gpio_state:1; }; -- cgit From 07fe6e00f6cca6fef85a14a1dc3ed4f2e35d3f0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Jan 2013 15:03:44 -0500 Subject: get rid of duplicate logics in __SC_....[1-6] definitions All those guys have the same form - "take a list of type/name pairs, apply some macro to each of them". Abstract that part away, convert all __SC_FOO##x(__VA_ARGS__) to __MAP(x,__SC_FOO,__VA_ARGS__). Signed-off-by: Al Viro --- include/linux/compat.h | 18 ++++------- include/linux/syscalls.h | 82 +++++++++++++++++++----------------------------- 2 files changed, 38 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 76a87fb57ac2..8c1dfc8d830d 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -27,12 +27,6 @@ #define __SC_DELOUSE(t,v) ((t)(unsigned long)(v)) #endif -#define __SC_CCAST1(t1, a1) __SC_DELOUSE(t1,a1) -#define __SC_CCAST2(t2, a2, ...) __SC_DELOUSE(t2,a2), __SC_CCAST1(__VA_ARGS__) -#define __SC_CCAST3(t3, a3, ...) __SC_DELOUSE(t3,a3), __SC_CCAST2(__VA_ARGS__) -#define __SC_CCAST4(t4, a4, ...) __SC_DELOUSE(t4,a4), __SC_CCAST3(__VA_ARGS__) -#define __SC_CCAST5(t5, a5, ...) __SC_DELOUSE(t5,a5), __SC_CCAST4(__VA_ARGS__) -#define __SC_CCAST6(t6, a6, ...) __SC_DELOUSE(t6,a6), __SC_CCAST5(__VA_ARGS__) #define COMPAT_SYSCALL_DEFINE1(name, ...) \ COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE2(name, ...) \ @@ -49,19 +43,19 @@ #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__)); \ - static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ - asmlinkage long compat_SyS##name(__SC_LONG##x(__VA_ARGS__)) \ + asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ { \ - return (long) C_SYSC##name(__SC_CCAST##x(__VA_ARGS__)); \ + return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ } \ SYSCALL_ALIAS(compat_sys##name, compat_SyS##name); \ - static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__)) + static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__)) + asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 313a8e0a6553..f9411f0c1c80 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -78,49 +78,31 @@ struct sigaltstack; #include #include -#define __SC_DECL1(t1, a1) t1 a1 -#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) -#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__) -#define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__) -#define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__) -#define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__) - -#define __SC_LONG1(t1, a1) long a1 -#define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__) -#define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__) -#define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__) -#define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__) -#define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__) - -#define __SC_CAST1(t1, a1) (t1) a1 -#define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__) -#define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__) -#define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__) -#define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__) -#define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__) - -#define __SC_TEST(type) BUILD_BUG_ON(sizeof(type) > sizeof(long)) -#define __SC_TEST1(t1, a1) __SC_TEST(t1) -#define __SC_TEST2(t2, a2, ...) __SC_TEST(t2); __SC_TEST1(__VA_ARGS__) -#define __SC_TEST3(t3, a3, ...) __SC_TEST(t3); __SC_TEST2(__VA_ARGS__) -#define __SC_TEST4(t4, a4, ...) __SC_TEST(t4); __SC_TEST3(__VA_ARGS__) -#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) -#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +/* + * __MAP - apply a macro to syscall arguments + * __MAP(n, m, t1, a1, t2, a2, ..., tn, an) will expand to + * m(t1, a1), m(t2, a2), ..., m(tn, an) + * The first argument must be equal to the amount of type/name + * pairs given. Note that this list of pairs (i.e. the arguments + * of __MAP starting at the third one) is in the same format as + * for SYSCALL_DEFINE/COMPAT_SYSCALL_DEFINE + */ +#define __MAP1(m,t,a) m(t,a) +#define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__) +#define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__) +#define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__) +#define __MAP5(m,t,a,...) m(t,a), __MAP4(m,__VA_ARGS__) +#define __MAP6(m,t,a,...) m(t,a), __MAP5(m,__VA_ARGS__) +#define __MAP(n,...) __MAP##n(__VA_ARGS__) + +#define __SC_DECL(t, a) t a +#define __SC_LONG(t, a) long a +#define __SC_CAST(t, a) (t) a +#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(sizeof(type) > sizeof(long)) #ifdef CONFIG_FTRACE_SYSCALLS -#define __SC_STR_ADECL1(t, a) #a -#define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) -#define __SC_STR_ADECL3(t, a, ...) #a, __SC_STR_ADECL2(__VA_ARGS__) -#define __SC_STR_ADECL4(t, a, ...) #a, __SC_STR_ADECL3(__VA_ARGS__) -#define __SC_STR_ADECL5(t, a, ...) #a, __SC_STR_ADECL4(__VA_ARGS__) -#define __SC_STR_ADECL6(t, a, ...) #a, __SC_STR_ADECL5(__VA_ARGS__) - -#define __SC_STR_TDECL1(t, a) #t -#define __SC_STR_TDECL2(t, a, ...) #t, __SC_STR_TDECL1(__VA_ARGS__) -#define __SC_STR_TDECL3(t, a, ...) #t, __SC_STR_TDECL2(__VA_ARGS__) -#define __SC_STR_TDECL4(t, a, ...) #t, __SC_STR_TDECL3(__VA_ARGS__) -#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) -#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) +#define __SC_STR_ADECL(t, a) #a +#define __SC_STR_TDECL(t, a) #t extern struct ftrace_event_class event_class_syscall_enter; extern struct ftrace_event_class event_class_syscall_exit; @@ -217,10 +199,10 @@ extern struct trace_event_functions exit_syscall_print_funcs; #ifdef CONFIG_FTRACE_SYSCALLS #define SYSCALL_DEFINEx(x, sname, ...) \ static const char *types_##sname[] = { \ - __SC_STR_TDECL##x(__VA_ARGS__) \ + __MAP(x,__SC_STR_TDECL,__VA_ARGS__) \ }; \ static const char *args_##sname[] = { \ - __SC_STR_ADECL##x(__VA_ARGS__) \ + __MAP(x,__SC_STR_ADECL,__VA_ARGS__) \ }; \ SYSCALL_METADATA(sname, x); \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) @@ -234,21 +216,21 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_DEFINE(name) static inline long SYSC_##name #define __SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ - static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ - asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ + asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - __SC_TEST##x(__VA_ARGS__); \ - return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__)); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ } \ SYSCALL_ALIAS(sys##name, SyS##name); \ - static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)) + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ #define SYSCALL_DEFINE(name) asmlinkage long sys_##name #define __SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)) + asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ -- cgit From 4a0fd5bf0fd0795af8f1be3b261f5cf146a4cb9b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Jan 2013 15:16:58 -0500 Subject: teach SYSCALL_DEFINE how to deal with long long/unsigned long long ... and convert a bunch of SYSCALL_DEFINE ones to SYSCALL_DEFINE, killing the boilerplate crap around them. Signed-off-by: Al Viro --- arch/s390/kernel/sys_s390.c | 14 ++------------ fs/dcookies.c | 9 +-------- fs/notify/fanotify/fanotify_user.c | 17 +++-------------- fs/open.c | 28 +++------------------------- fs/read_write.c | 24 ++++-------------------- fs/sync.c | 26 ++++---------------------- include/linux/syscalls.h | 5 +++-- mm/fadvise.c | 18 ++---------------- mm/readahead.c | 9 +-------- 9 files changed, 23 insertions(+), 127 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index d0964d22adb5..23eb222c1658 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -132,19 +132,9 @@ SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) * to * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len */ -SYSCALL_DEFINE(s390_fallocate)(int fd, int mode, loff_t offset, - u32 len_high, u32 len_low) +SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset, + u32, len_high, u32, len_low) { return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_s390_fallocate(long fd, long mode, loff_t offset, - long len_high, long len_low) -{ - return SYSC_s390_fallocate((int) fd, (int) mode, offset, - (u32) len_high, (u32) len_low); -} -SYSCALL_ALIAS(sys_s390_fallocate, SyS_s390_fallocate); -#endif - #endif diff --git a/fs/dcookies.c b/fs/dcookies.c index 17c779967828..f08375b97ffb 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -145,7 +145,7 @@ out: /* And here is where the userspace process can look up the cookie value * to retrieve the path. */ -SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len) +SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len) { unsigned long cookie = (unsigned long)cookie64; int err = -EINVAL; @@ -201,13 +201,6 @@ out: mutex_unlock(&dcookie_mutex); return err; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_lookup_dcookie(u64 cookie64, long buf, long len) -{ - return SYSC_lookup_dcookie(cookie64, (char __user *) buf, (size_t) len); -} -SYSCALL_ALIAS(sys_lookup_dcookie, SyS_lookup_dcookie); -#endif static int dcookie_init(void) { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 5d8444268a16..d0be29fa94cf 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -755,9 +755,9 @@ out_destroy_group: return fd; } -SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, - __u64 mask, int dfd, - const char __user * pathname) +SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, + __u64, mask, int, dfd, + const char __user *, pathname) { struct inode *inode = NULL; struct vfsmount *mnt = NULL; @@ -857,17 +857,6 @@ fput_and_out: return ret; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_fanotify_mark(long fanotify_fd, long flags, __u64 mask, - long dfd, long pathname) -{ - return SYSC_fanotify_mark((int) fanotify_fd, (unsigned int) flags, - mask, (int) dfd, - (const char __user *) pathname); -} -SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark); -#endif - /* * fanotify_user_setup - Our initialization function. Note that we cannot return * error because we have compiled-in VFS hooks. So an (unlikely) failure here diff --git a/fs/open.c b/fs/open.c index 68354466879f..a53922450448 100644 --- a/fs/open.c +++ b/fs/open.c @@ -212,32 +212,18 @@ COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) /* LFS versions of truncate are only needed on 32 bit machines */ #if BITS_PER_LONG == 32 -SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length) +SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length) { return do_sys_truncate(path, length); } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_truncate64(long path, loff_t length) -{ - return SYSC_truncate64((const char __user *) path, length); -} -SYSCALL_ALIAS(sys_truncate64, SyS_truncate64); -#endif -SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length) +SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length) { long ret = do_sys_ftruncate(fd, length, 0); /* avoid REGPARM breakage on x86: */ asmlinkage_protect(2, ret, fd, length); return ret; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_ftruncate64(long fd, loff_t length) -{ - return SYSC_ftruncate64((unsigned int) fd, length); -} -SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64); -#endif #endif /* BITS_PER_LONG == 32 */ @@ -299,7 +285,7 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) return ret; } -SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) +SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) { struct fd f = fdget(fd); int error = -EBADF; @@ -311,14 +297,6 @@ SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) return error; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len) -{ - return SYSC_fallocate((int)fd, (int)mode, offset, len); -} -SYSCALL_ALIAS(sys_fallocate, SyS_fallocate); -#endif - /* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and diff --git a/fs/read_write.c b/fs/read_write.c index a698eff457fb..dcfd58d95f44 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -487,8 +487,8 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, return ret; } -SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, - size_t count, loff_t pos) +SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, + size_t, count, loff_t, pos) { struct fd f; ssize_t ret = -EBADF; @@ -506,17 +506,9 @@ SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, return ret; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) -{ - return SYSC_pread64((unsigned int) fd, (char __user *) buf, - (size_t) count, pos); -} -SYSCALL_ALIAS(sys_pread64, SyS_pread64); -#endif -SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, - size_t count, loff_t pos) +SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, + size_t, count, loff_t, pos) { struct fd f; ssize_t ret = -EBADF; @@ -534,14 +526,6 @@ SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, return ret; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) -{ - return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, - (size_t) count, pos); -} -SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); -#endif /* * Reduce an iovec's length in-place. Return the resulting number of segments diff --git a/fs/sync.c b/fs/sync.c index 2c5d6639a66a..905f3f6b3d85 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -283,8 +283,8 @@ EXPORT_SYMBOL(generic_write_sync); * already-instantiated disk blocks, there are no guarantees here that the data * will be available after a crash. */ -SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, - unsigned int flags) +SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, + unsigned int, flags) { int ret; struct fd f; @@ -365,29 +365,11 @@ out_put: out: return ret; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes, - long flags) -{ - return SYSC_sync_file_range((int) fd, offset, nbytes, - (unsigned int) flags); -} -SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range); -#endif /* It would be nice if people remember that not all the world's an i386 when they introduce new system calls */ -SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, - loff_t offset, loff_t nbytes) +SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags, + loff_t, offset, loff_t, nbytes) { return sys_sync_file_range(fd, offset, nbytes, flags); } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_sync_file_range2(long fd, long flags, - loff_t offset, loff_t nbytes) -{ - return SYSC_sync_file_range2((int) fd, (unsigned int) flags, - offset, nbytes); -} -SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); -#endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f9411f0c1c80..3e07b92efbf6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -96,9 +96,10 @@ struct sigaltstack; #define __MAP(n,...) __MAP##n(__VA_ARGS__) #define __SC_DECL(t, a) t a -#define __SC_LONG(t, a) long a +#define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL)) +#define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (t) a -#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(sizeof(type) > sizeof(long)) +#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL(t, a) #a diff --git a/mm/fadvise.c b/mm/fadvise.c index 7e092689a12a..3bcfd81db45e 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -25,7 +25,7 @@ * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could * deactivate the pages and clear PG_Referenced. */ -SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) +SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) { struct fd f = fdget(fd); struct address_space *mapping; @@ -145,26 +145,12 @@ out: fdput(f); return ret; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_fadvise64_64(long fd, loff_t offset, loff_t len, long advice) -{ - return SYSC_fadvise64_64((int) fd, offset, len, (int) advice); -} -SYSCALL_ALIAS(sys_fadvise64_64, SyS_fadvise64_64); -#endif #ifdef __ARCH_WANT_SYS_FADVISE64 -SYSCALL_DEFINE(fadvise64)(int fd, loff_t offset, size_t len, int advice) +SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) { return sys_fadvise64_64(fd, offset, len, advice); } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_fadvise64(long fd, loff_t offset, long len, long advice) -{ - return SYSC_fadvise64((int) fd, offset, (size_t)len, (int)advice); -} -SYSCALL_ALIAS(sys_fadvise64, SyS_fadvise64); -#endif #endif diff --git a/mm/readahead.c b/mm/readahead.c index 7963f2391236..daed28dd5830 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -576,7 +576,7 @@ do_readahead(struct address_space *mapping, struct file *filp, return 0; } -SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) +SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) { ssize_t ret; struct fd f; @@ -595,10 +595,3 @@ SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) } return ret; } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_readahead(long fd, loff_t offset, long count) -{ - return SYSC_readahead((int) fd, offset, (size_t) count); -} -SYSCALL_ALIAS(sys_readahead, SyS_readahead); -#endif -- cgit From e1b5bb6d1236d4ad2084c53aa83dde7cdf6f8eea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Jan 2013 17:16:07 -0500 Subject: consolidate cond_syscall and SYSCALL_ALIAS declarations take them to asm/linkage.h, with default in linux/linkage.h Signed-off-by: Al Viro --- arch/alpha/include/asm/linkage.h | 4 +++- arch/alpha/include/asm/unistd.h | 12 ------------ arch/arm/include/asm/unistd.h | 8 -------- arch/avr32/include/asm/unistd.h | 8 -------- arch/blackfin/include/asm/unistd.h | 8 -------- arch/cris/include/asm/unistd.h | 8 -------- arch/frv/include/asm/unistd.h | 10 ---------- arch/h8300/include/asm/linkage.h | 2 -- arch/h8300/include/asm/unistd.h | 7 ------- arch/ia64/include/asm/linkage.h | 4 ++++ arch/ia64/include/asm/unistd.h | 10 ---------- arch/m32r/include/asm/unistd.h | 10 ---------- arch/m68k/include/asm/unistd.h | 8 -------- arch/microblaze/include/asm/unistd.h | 8 -------- arch/mips/include/asm/linkage.h | 3 +++ arch/mips/include/asm/unistd.h | 8 -------- arch/mn10300/include/asm/unistd.h | 10 ---------- arch/parisc/include/asm/unistd.h | 8 -------- arch/powerpc/include/asm/linkage.h | 13 +++++++++++++ arch/powerpc/include/asm/unistd.h | 6 ------ arch/powerpc/include/uapi/asm/linkage.h | 6 ------ arch/s390/include/asm/unistd.h | 8 -------- arch/sh/include/asm/unistd.h | 8 -------- arch/sparc/include/asm/unistd.h | 8 -------- arch/x86/include/asm/unistd.h | 8 -------- arch/xtensa/include/asm/unistd.h | 8 -------- include/asm-generic/unistd.h | 17 ----------------- include/linux/linkage.h | 21 +++++++++++++++++++++ include/linux/syscalls.h | 14 -------------- 29 files changed, 44 insertions(+), 209 deletions(-) create mode 100644 arch/powerpc/include/asm/linkage.h delete mode 100644 arch/powerpc/include/uapi/asm/linkage.h (limited to 'include/linux') diff --git a/arch/alpha/include/asm/linkage.h b/arch/alpha/include/asm/linkage.h index 291c2d01c44f..7cfd06e8c935 100644 --- a/arch/alpha/include/asm/linkage.h +++ b/arch/alpha/include/asm/linkage.h @@ -1,6 +1,8 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -/* Nothing to see here... */ +#define cond_syscall(x) asm(".weak\t" #x "\n" #x " = sys_ni_syscall") +#define SYSCALL_ALIAS(alias, name) \ + asm ( #alias " = " #name "\n\t.globl " #alias) #endif diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 6d6fe7ab5473..43baee17acdf 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -18,16 +18,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* "Conditional" syscalls. What we want is - - __attribute__((weak,alias("sys_ni_syscall"))) - - but that raises the problem of what type to give the symbol. If we use - a prototype, it'll conflict with the definition given in this file and - others. If we use __typeof, we discover that not all symbols actually - have declarations. If we use no prototype, then we get warnings from - -Wstrict-prototypes. Ho hum. */ - -#define cond_syscall(x) asm(".weak\t" #x "\n" #x " = sys_ni_syscall") - #endif /* _ALPHA_UNISTD_H */ diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index e4ddfb39ca34..141baa3f9a72 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -43,14 +43,6 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - /* * Unimplemented (or alternatively implemented) syscalls */ diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h index dc4d5a931112..c1eb080e45fe 100644 --- a/arch/avr32/include/asm/unistd.h +++ b/arch/avr32/include/asm/unistd.h @@ -41,12 +41,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); - #endif /* __ASM_AVR32_UNISTD_H */ diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index 04e83ea8d5cc..c35414bdf7bd 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -20,12 +20,4 @@ #define __ARCH_WANT_SYS_NICE #define __ARCH_WANT_SYS_VFORK -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t_" #x "\n\t.set\t_" #x ",_sys_ni_syscall"); - #endif /* __ASM_BFIN_UNISTD_H */ diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h index be57a988bfb9..0ff3f6889842 100644 --- a/arch/cris/include/asm/unistd.h +++ b/arch/cris/include/asm/unistd.h @@ -34,12 +34,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #endif /* _ASM_CRIS_UNISTD_H_ */ diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index 4cfcc7bba25a..70ec7293dce7 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -31,14 +31,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#ifndef cond_syscall -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") -#endif - #endif /* _ASM_UNISTD_H_ */ diff --git a/arch/h8300/include/asm/linkage.h b/arch/h8300/include/asm/linkage.h index 6f4df7d46180..1d81604fb0ad 100644 --- a/arch/h8300/include/asm/linkage.h +++ b/arch/h8300/include/asm/linkage.h @@ -2,7 +2,5 @@ #define _H8300_LINKAGE_H #undef SYMBOL_NAME_LABEL -#undef SYMBOL_NAME #define SYMBOL_NAME_LABEL(_name_) _##_name_##: -#define SYMBOL_NAME(_name_) _##_name_ #endif diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h index 6721856d841b..ab671ecf5196 100644 --- a/arch/h8300/include/asm/unistd.h +++ b/arch/h8300/include/asm/unistd.h @@ -33,11 +33,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - */ -#define cond_syscall(name) \ - asm (".weak\t_" #name "\n" \ - ".set\t_" #name ",_sys_ni_syscall"); - #endif /* _ASM_H8300_UNISTD_H_ */ diff --git a/arch/ia64/include/asm/linkage.h b/arch/ia64/include/asm/linkage.h index ef22a45c1890..787575701f1c 100644 --- a/arch/ia64/include/asm/linkage.h +++ b/arch/ia64/include/asm/linkage.h @@ -11,4 +11,8 @@ #endif +#define cond_syscall(x) asm(".weak\t" #x "#\n" #x "#\t=\tsys_ni_syscall#") +#define SYSCALL_ALIAS(alias, name) \ + asm ( #alias "# = " #name "#\n\t.globl " #alias "#") + #endif diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 096373800f73..afd45e0d552e 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -46,15 +46,5 @@ asmlinkage unsigned long sys_mmap2( struct pt_regs; asmlinkage long sys_ia64_pipe(void); -/* - * "Conditional" syscalls - * - * Note, this macro can only be used in the file which defines sys_ni_syscall, i.e., in - * kernel/sys_ni.c. This version causes warnings because the declaration isn't a - * proper prototype, but we can't use __typeof__ either, because not all cond_syscall() - * declarations have prototypes at the moment. - */ -#define cond_syscall(x) asmlinkage long x (void) __attribute__((weak,alias("sys_ni_syscall"))) - #endif /* !__ASSEMBLY__ */ #endif /* _ASM_IA64_UNISTD_H */ diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h index 555629b05267..59db80193454 100644 --- a/arch/m32r/include/asm/unistd.h +++ b/arch/m32r/include/asm/unistd.h @@ -48,14 +48,4 @@ #define __IGNORE_getresgid #define __IGNORE_chown -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#ifndef cond_syscall -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") -#endif - #endif /* _ASM_M32R_UNISTD_H */ diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 6cd92671ca5e..014f288fc813 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -32,12 +32,4 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #endif /* _ASM_M68K_UNISTD_H_ */ diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index b3778391d9cc..6dece2d002dc 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -37,13 +37,5 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); - #endif /* __ASSEMBLY__ */ #endif /* _ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/mips/include/asm/linkage.h b/arch/mips/include/asm/linkage.h index e9a940d1b0c6..2767dda9e309 100644 --- a/arch/mips/include/asm/linkage.h +++ b/arch/mips/include/asm/linkage.h @@ -6,5 +6,8 @@ #endif #define __weak __attribute__((weak)) +#define cond_syscall(x) asm(".weak\t" #x "\n" #x "\t=\tsys_ni_syscall") +#define SYSCALL_ALIAS(alias, name) \ + asm ( #alias " = " #name "\n\t.globl " #alias) #endif diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index 64f661e32879..63c9c886173a 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -63,12 +63,4 @@ #endif /* !__ASSEMBLY__ */ -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n" #x "\t=\tsys_ni_syscall") - #endif /* _ASM_UNISTD_H */ diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index 7f9d9adfa51e..9d4e2d1ef90e 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -45,14 +45,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#ifndef cond_syscall -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); -#endif - #endif /* _ASM_UNISTD_H */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index ae9a46cbfd92..74d835820ee7 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -170,12 +170,4 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ #undef STR -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #endif /* _ASM_PARISC_UNISTD_H_ */ diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h new file mode 100644 index 000000000000..b36f650a13ff --- /dev/null +++ b/arch/powerpc/include/asm/linkage.h @@ -0,0 +1,13 @@ +#ifndef _ASM_POWERPC_LINKAGE_H +#define _ASM_POWERPC_LINKAGE_H + +#ifdef CONFIG_PPC64 +#define cond_syscall(x) \ + asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ + "\t.weak ." #x "\n\t.set ." #x ", .sys_ni_syscall\n") +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ + "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) +#endif + +#endif /* _ASM_POWERPC_LINKAGE_H */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index f25b5c45c435..91586d979c99 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -56,11 +56,5 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - */ -#define cond_syscall(x) \ - asmlinkage long x (void) __attribute__((weak,alias("sys_ni_syscall"))) - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/include/uapi/asm/linkage.h b/arch/powerpc/include/uapi/asm/linkage.h deleted file mode 100644 index e1c4ac1cc4ba..000000000000 --- a/arch/powerpc/include/uapi/asm/linkage.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_POWERPC_LINKAGE_H -#define _ASM_POWERPC_LINKAGE_H - -/* Nothing to see here... */ - -#endif /* _ASM_POWERPC_LINKAGE_H */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index a6667a952969..651886353551 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -54,12 +54,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #endif /* _ASM_S390_UNISTD_H_ */ diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h index 5e90fa2b7eed..e77816c4b9bc 100644 --- a/arch/sh/include/asm/unistd.h +++ b/arch/sh/include/asm/unistd.h @@ -30,12 +30,4 @@ # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #include diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 5356810bd7e7..dfa53fdd5cbc 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -45,12 +45,4 @@ #define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #endif /* _SPARC_UNISTD_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 3d5df1c4447f..c2a48139c340 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -50,12 +50,4 @@ # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #endif /* _ASM_X86_UNISTD_H */ diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index c38834de9ac7..cb4c2ce8d447 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -4,14 +4,6 @@ #define __ARCH_WANT_SYS_CLONE #include -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); - #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_LLSEEK diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 4077b5d9ff81..0501fa3f783d 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -9,20 +9,3 @@ #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_LLSEEK #endif - -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#ifndef cond_syscall -#ifdef CONFIG_SYMBOL_PREFIX -#define __SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX -#else -#define __SYMBOL_PREFIX -#endif -#define cond_syscall(x) asm(".weak\t" __SYMBOL_PREFIX #x "\n\t" \ - ".set\t" __SYMBOL_PREFIX #x "," \ - __SYMBOL_PREFIX "sys_ni_syscall") -#endif diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 807f1e533226..829d66c67fc2 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -2,6 +2,7 @@ #define _LINUX_LINKAGE_H #include +#include #include #ifdef __cplusplus @@ -14,6 +15,26 @@ #define asmlinkage CPP_ASMLINKAGE #endif +#ifndef SYMBOL_NAME +#ifdef CONFIG_SYMBOL_PREFIX +#define SYMBOL_NAME(x) CONFIG_SYMBOL_PREFIX ## x +#else +#define SYMBOL_NAME(x) x +#endif +#endif +#define __SYMBOL_NAME(x) __stringify(SYMBOL_NAME(x)) + +#ifndef cond_syscall +#define cond_syscall(x) asm(".weak\t" __SYMBOL_NAME(x) \ + "\n\t.set\t" __SYMBOL_NAME(x) "," __SYMBOL_NAME(sys_ni_syscall)); +#endif + +#ifndef SYSCALL_ALIAS +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " __SYMBOL_NAME(alias) \ + "\n\t.set\t" __SYMBOL_NAME(alias) "," __SYMBOL_NAME(name)) +#endif + #define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) #define __page_aligned_bss __section(.bss..page_aligned) __aligned(PAGE_SIZE) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3e07b92efbf6..87584373305d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -183,20 +183,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) -#ifdef CONFIG_PPC64 -#define SYSCALL_ALIAS(alias, name) \ - asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ - "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) -#else -#if defined(CONFIG_ALPHA) || defined(CONFIG_MIPS) -#define SYSCALL_ALIAS(alias, name) \ - asm ( #alias " = " #name "\n\t.globl " #alias) -#else -#define SYSCALL_ALIAS(alias, name) \ - asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) -#endif -#endif - #ifdef CONFIG_FTRACE_SYSCALLS #define SYSCALL_DEFINEx(x, sname, ...) \ static const char *types_##sname[] = { \ -- cgit From 22d1a35da0e247a006c286842a1846acb4ffed4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Jan 2013 17:18:07 -0500 Subject: make HAVE_SYSCALL_WRAPPERS unconditional Signed-off-by: Al Viro --- arch/Kconfig | 3 --- arch/alpha/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/tile/Kconfig | 1 - include/linux/compat.h | 9 --------- include/linux/syscalls.h | 10 ---------- ipc/sem.c | 2 -- 10 files changed, 30 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 5a1779c93940..892d6176fcf3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -157,9 +157,6 @@ config ARCH_USE_BUILTIN_BSWAP instructions should set this. And it shouldn't hurt to set it on architectures that don't have such instructions. -config HAVE_SYSCALL_WRAPPERS - bool - config KRETPROBES def_bool y depends on KPROBES && HAVE_KRETPROBES diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 5833aa441481..5469f7b444ab 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -4,7 +4,6 @@ config ALPHA select HAVE_AOUT select HAVE_IDE select HAVE_OPROFILE - select HAVE_SYSCALL_WRAPPERS select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ae9c716c46bb..32eb3d67bbef 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1737,7 +1737,6 @@ config 32BIT config 64BIT bool "64-bit kernel" depends on CPU_SUPPORTS_64BIT_KERNEL && SYS_SUPPORTS_64BIT_KERNEL - select HAVE_SYSCALL_WRAPPERS help Select this option if you want to build a 64-bit kernel. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b89d7eb730a2..f460e32fe2a0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -113,7 +113,6 @@ config PPC select USE_GENERIC_SMP_HELPERS if SMP select HAVE_OPROFILE select HAVE_DEBUG_KMEMLEAK - select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_PERF_EVENTS diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4b505370a1d5..f6cc1528df89 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -131,7 +131,6 @@ config S390 select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS - select HAVE_SYSCALL_WRAPPERS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_TO_BUS diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 289127d5241c..67388cdb18c1 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -62,7 +62,6 @@ config SPARC64 select HAVE_RCU_TABLE_FREE if SMP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_SYSCALL_WRAPPERS select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index ff496ab1e794..95bd2ef6c943 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -16,7 +16,6 @@ config TILE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW select HAVE_DEBUG_BUGVERBOSE - select HAVE_SYSCALL_WRAPPERS if TILEGX select HAVE_VIRT_TO_BUS select SYS_HYPERVISOR select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/include/linux/compat.h b/include/linux/compat.h index 8c1dfc8d830d..110132527e4c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -40,8 +40,6 @@ #define COMPAT_SYSCALL_DEFINE6(name, ...) \ COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS - #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -52,13 +50,6 @@ SYSCALL_ALIAS(compat_sys##name, compat_SyS##name); \ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ - -#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) - -#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ - #ifndef compat_user_stack_pointer #define compat_user_stack_pointer() current_user_stack_pointer() #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 87584373305d..3b6fc13cb46a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -198,8 +198,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) #endif -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS - #define SYSCALL_DEFINE(name) static inline long SYSC_##name #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -213,14 +211,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; SYSCALL_ALIAS(sys##name, SyS##name); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ - -#define SYSCALL_DEFINE(name) asmlinkage long sys_##name -#define __SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) - -#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ - asmlinkage long sys_time(time_t __user *tloc); asmlinkage long sys_stime(time_t __user *tptr); asmlinkage long sys_gettimeofday(struct timeval __user *tv, diff --git a/ipc/sem.c b/ipc/sem.c index 58d31f1c1eb5..e7236df7a470 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1156,13 +1156,11 @@ SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg) return -EINVAL; } } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS asmlinkage long SyS_semctl(int semid, int semnum, int cmd, union semun arg) { return SYSC_semctl((int) semid, (int) semnum, (int) cmd, arg); } SYSCALL_ALIAS(sys_semctl, SyS_semctl); -#endif /* If the task doesn't already have a undo_list, then allocate one * here. We guarantee there is only one thread using this undo list, -- cgit From 2cf0966683430b6468f36ca20515a33ca7f2403c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Jan 2013 15:25:54 -0500 Subject: make SYSCALL_DEFINE-generated wrappers do asmlinkage_protect ... and switch i386 to HAVE_SYSCALL_WRAPPERS, killing open-coded uses of asmlinkage_protect() in a bunch of syscalls. Signed-off-by: Al Viro --- arch/x86/include/asm/syscalls.h | 4 +-- arch/x86/kernel/tls.c | 14 ++++------- arch/x86/um/tls_32.c | 5 ++-- fs/aio.c | 2 -- fs/open.c | 24 +++--------------- include/linux/syscalls.h | 6 ++++- kernel/exit.c | 5 ---- kernel/fork.c | 5 +--- kernel/uid16.c | 55 +++++++++-------------------------------- 9 files changed, 31 insertions(+), 89 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 6cf0a9cc60cd..5f87b35fd2ef 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -27,8 +27,8 @@ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); long sys_rt_sigreturn(void); /* kernel/tls.c */ -asmlinkage int sys_set_thread_area(struct user_desc __user *); -asmlinkage int sys_get_thread_area(struct user_desc __user *); +asmlinkage long sys_set_thread_area(struct user_desc __user *); +asmlinkage long sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 9d9d2f9e77a5..f7fec09e3e3a 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -3,13 +3,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include "tls.h" @@ -89,11 +89,9 @@ int do_set_thread_area(struct task_struct *p, int idx, return 0; } -asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) +SYSCALL_DEFINE1(set_thread_area, struct user_desc __user *, u_info) { - int ret = do_set_thread_area(current, -1, u_info, 1); - asmlinkage_protect(1, ret, u_info); - return ret; + return do_set_thread_area(current, -1, u_info, 1); } @@ -139,11 +137,9 @@ int do_get_thread_area(struct task_struct *p, int idx, return 0; } -asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) +SYSCALL_DEFINE1(get_thread_area, struct user_desc __user *, u_info) { - int ret = do_get_thread_area(current, -1, u_info); - asmlinkage_protect(1, ret, u_info); - return ret; + return do_get_thread_area(current, -1, u_info); } int regset_tls_active(struct task_struct *target, diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index 5f5feff3d24c..80ffa5b9982d 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -274,7 +275,7 @@ clear: goto out; } -int sys_set_thread_area(struct user_desc __user *user_desc) +SYSCALL_DEFINE1(set_thread_area, struct user_desc __user *, user_desc) { struct user_desc info; int idx, ret; @@ -322,7 +323,7 @@ int ptrace_set_thread_area(struct task_struct *child, int idx, return set_tls_entry(child, &info, idx, 0); } -int sys_get_thread_area(struct user_desc __user *user_desc) +SYSCALL_DEFINE1(get_thread_area, struct user_desc __user *, user_desc) { struct user_desc info; int idx, ret; diff --git a/fs/aio.c b/fs/aio.c index 3f941f2a3059..c3ebb98a527b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1790,7 +1790,5 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, ret = read_events(ioctx, min_nr, nr, events, timeout); put_ioctx(ioctx); } - - asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout); return ret; } diff --git a/fs/open.c b/fs/open.c index a53922450448..8c741002f947 100644 --- a/fs/open.c +++ b/fs/open.c @@ -197,10 +197,7 @@ out: SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) { - long ret = do_sys_ftruncate(fd, length, 1); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(2, ret, fd, length); - return ret; + return do_sys_ftruncate(fd, length, 1); } #ifdef CONFIG_COMPAT @@ -219,10 +216,7 @@ SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length) SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length) { - long ret = do_sys_ftruncate(fd, length, 0); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(2, ret, fd, length); - return ret; + return do_sys_ftruncate(fd, length, 0); } #endif /* BITS_PER_LONG == 32 */ @@ -961,29 +955,19 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) { - long ret; - if (force_o_largefile()) flags |= O_LARGEFILE; - ret = do_sys_open(AT_FDCWD, filename, flags, mode); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(3, ret, filename, flags, mode); - return ret; + return do_sys_open(AT_FDCWD, filename, flags, mode); } SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) { - long ret; - if (force_o_largefile()) flags |= O_LARGEFILE; - ret = do_sys_open(dfd, filename, flags, mode); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(4, ret, dfd, filename, flags, mode); - return ret; + return do_sys_open(dfd, filename, flags, mode); } #ifndef __alpha__ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3b6fc13cb46a..9660a8bdcbbe 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,6 +99,7 @@ struct sigaltstack; #define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (t) a +#define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) #ifdef CONFIG_FTRACE_SYSCALLS @@ -200,13 +201,16 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_DEFINE(name) static inline long SYSC_##name +#define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ + long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ __MAP(x,__SC_TEST,__VA_ARGS__); \ - return SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ } \ SYSCALL_ALIAS(sys##name, SyS##name); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) diff --git a/kernel/exit.c b/kernel/exit.c index 51e485ca9935..25d0108d7452 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1629,9 +1629,6 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, } put_pid(pid); - - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(5, ret, which, upid, infop, options, ru); return ret; } @@ -1669,8 +1666,6 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, ret = do_wait(&wo); put_pid(pid); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(4, ret, upid, stat_addr, options, ru); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index 8d932b1c9056..e1f34abe5887 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1674,10 +1674,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int, tls_val) #endif { - long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); - asmlinkage_protect(5, ret, clone_flags, newsp, - parent_tidptr, child_tidptr, tls_val); - return ret; + return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); } #endif diff --git a/kernel/uid16.c b/kernel/uid16.c index d7948eb10225..f6c83d7ef000 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -18,67 +18,43 @@ SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { - long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(3, ret, filename, user, group); - return ret; + return sys_chown(filename, low2highuid(user), low2highgid(group)); } SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { - long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(3, ret, filename, user, group); - return ret; + return sys_lchown(filename, low2highuid(user), low2highgid(group)); } SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) { - long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(3, ret, fd, user, group); - return ret; + return sys_fchown(fd, low2highuid(user), low2highgid(group)); } SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid) { - long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(2, ret, rgid, egid); - return ret; + return sys_setregid(low2highgid(rgid), low2highgid(egid)); } SYSCALL_DEFINE1(setgid16, old_gid_t, gid) { - long ret = sys_setgid(low2highgid(gid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(1, ret, gid); - return ret; + return sys_setgid(low2highgid(gid)); } SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid) { - long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(2, ret, ruid, euid); - return ret; + return sys_setreuid(low2highuid(ruid), low2highuid(euid)); } SYSCALL_DEFINE1(setuid16, old_uid_t, uid) { - long ret = sys_setuid(low2highuid(uid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(1, ret, uid); - return ret; + return sys_setuid(low2highuid(uid)); } SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid) { - long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), + return sys_setresuid(low2highuid(ruid), low2highuid(euid), low2highuid(suid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(3, ret, ruid, euid, suid); - return ret; } SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruidp, old_uid_t __user *, euidp, old_uid_t __user *, suidp) @@ -100,11 +76,8 @@ SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruidp, old_uid_t __user *, euid SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid) { - long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), + return sys_setresgid(low2highgid(rgid), low2highgid(egid), low2highgid(sgid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(3, ret, rgid, egid, sgid); - return ret; } @@ -127,18 +100,12 @@ SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgidp, old_gid_t __user *, egid SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid) { - long ret = sys_setfsuid(low2highuid(uid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(1, ret, uid); - return ret; + return sys_setfsuid(low2highuid(uid)); } SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid) { - long ret = sys_setfsgid(low2highgid(gid)); - /* avoid REGPARM breakage on x86: */ - asmlinkage_protect(1, ret, gid); - return ret; + return sys_setfsgid(low2highgid(gid)); } static int groups16_to_user(old_gid_t __user *grouplist, -- cgit From 19f4fc3aee180000fe45952691bbe69dde1d9e95 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 02:17:03 -0500 Subject: convert sendfile{,64} to COMPAT_SYSCALL_DEFINE Signed-off-by: Al Viro --- arch/mips/kernel/linux32.c | 20 ----------------- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/parisc/kernel/sys_parisc32.c | 19 ---------------- arch/parisc/kernel/syscall_table.S | 4 ++-- arch/powerpc/include/asm/systbl.h | 4 ++-- arch/powerpc/kernel/sys_ppc32.c | 18 ---------------- arch/s390/kernel/compat_linux.c | 42 ------------------------------------ arch/s390/kernel/compat_linux.h | 4 ---- arch/s390/kernel/compat_wrapper.S | 14 ------------ arch/s390/kernel/syscalls.S | 4 ++-- arch/sparc/kernel/sys32.S | 1 - arch/sparc/kernel/systbls_64.S | 2 +- arch/x86/ia32/sys_ia32.c | 20 ----------------- arch/x86/include/asm/sys_ia32.h | 1 - arch/x86/syscalls/syscall_32.tbl | 2 +- fs/compat.c | 22 ------------------- fs/read_write.c | 44 ++++++++++++++++++++++++++++++++++++-- fs/read_write.h | 2 -- include/linux/compat.h | 2 ++ 20 files changed, 54 insertions(+), 175 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 8eeee1c860c0..b0cc2a7df59f 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -226,26 +226,6 @@ SYSCALL_DEFINE1(32_personality, unsigned long, personality) return ret; } -SYSCALL_DEFINE4(32_sendfile, long, out_fd, long, in_fd, - compat_off_t __user *, offset, s32, count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; -} - asmlinkage ssize_t sys32_readahead(int fd, u32 pad0, u64 a2, u64 a3, size_t count) { diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 693d60b0855f..9b4df498fc5b 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -143,7 +143,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_setitimer PTR sys_alarm PTR sys_getpid - PTR sys_32_sendfile + PTR compat_sys_sendfile PTR sys_socket /* 6040 */ PTR sys_connect PTR sys_accept diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index af8887f779f1..c1a70e805751 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -399,7 +399,7 @@ sys_call_table: PTR sys_capget PTR sys_capset /* 4205 */ PTR compat_sys_sigaltstack - PTR sys_32_sendfile + PTR compat_sys_sendfile PTR sys_ni_syscall PTR sys_ni_syscall PTR sys_mips_mmap2 /* 4210 */ diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 051c8b90231f..035ab3f94814 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -60,25 +60,6 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, return -ENOSYS; } -/* Note: it is necessary to treat out_fd and in_fd as unsigned ints, with the - * corresponding cast to a signed int to insure that the proper conversion - * (sign extension) between the register representation of a signed int (msr in - * 32-bit mode) and the register representation of a signed int (msr in 64-bit - * mode) is performed. - */ -asmlinkage long sys32_sendfile(u32 out_fd, u32 in_fd, - compat_off_t __user *offset, compat_size_t count) -{ - return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count); -} - -asmlinkage long sys32_sendfile64(u32 out_fd, u32 in_fd, - compat_loff_t __user *offset, compat_size_t count) -{ - return sys_sendfile64((int)out_fd, (int)in_fd, - (loff_t __user *)offset, count); -} - asmlinkage long sys32_semctl(int semid, int semnum, int cmd, union semun arg) { union semun u; diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index f57dc137b8dd..f232672a9e20 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -198,7 +198,7 @@ ENTRY_SAME(madvise) ENTRY_SAME(clone_wrapper) /* 120 */ ENTRY_SAME(setdomainname) - ENTRY_DIFF(sendfile) + ENTRY_COMP(sendfile) /* struct sockaddr... */ ENTRY_SAME(recvfrom) /* struct timex contains longs */ @@ -304,7 +304,7 @@ ENTRY_SAME(gettid) ENTRY_OURS(readahead) ENTRY_SAME(tkill) - ENTRY_DIFF(sendfile64) + ENTRY_COMP(sendfile64) ENTRY_COMP(futex) /* 210 */ ENTRY_COMP(sched_setaffinity) ENTRY_COMP(sched_getaffinity) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 535b6d8a41cc..634db7d2dc92 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -190,7 +190,7 @@ SYSCALL_SPU(getcwd) SYSCALL_SPU(capget) SYSCALL_SPU(capset) COMPAT_SYS(sigaltstack) -SYSX_SPU(sys_sendfile,compat_sys_sendfile_wrapper,sys_sendfile) +COMPAT_SYS_SPU(sendfile) SYSCALL(ni_syscall) SYSCALL(ni_syscall) PPC_SYS(vfork) @@ -230,7 +230,7 @@ COMPAT_SYS_SPU(sched_setaffinity) COMPAT_SYS_SPU(sched_getaffinity) SYSCALL(ni_syscall) SYSCALL(ni_syscall) -SYSX(sys_ni_syscall,compat_sys_sendfile64_wrapper,sys_sendfile64) +SYS32ONLY(sendfile64) COMPAT_SYS_SPU(io_setup) SYSCALL_SPU(io_destroy) COMPAT_SYS_SPU(io_getevents) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index d0bafc0cdf06..6e7c2509bd2d 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -128,24 +128,6 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt } #endif -/* Note: it is necessary to treat out_fd and in_fd as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd, - compat_off_t __user *offset, u32 count) -{ - return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count); -} - -asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, - compat_loff_t __user *offset, u32 count) -{ - return sys_sendfile((int)out_fd, (int)in_fd, - (off_t __user *)offset, count); -} - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 19f26de27fae..fbd29c70a297 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -373,48 +373,6 @@ asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 coun return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); } -asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, - offset ? (off_t __force __user *) &of : NULL, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; -} - -asmlinkage long sys32_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, s32 count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, - offset ? (loff_t __force __user *) &lof : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; -} - struct stat64_emu31 { unsigned long long st_dev; unsigned int __pad1; diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 00d92a5a6f6c..bce0b7aec8f9 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -106,10 +106,6 @@ long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, long sys32_pwrite64(unsigned int fd, const char __user *ubuf, size_t count, u32 poshi, u32 poslo); compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count); -long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, - size_t count); -long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, - s32 count); long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf); long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 626cc6f0f446..a1dda9c67efe 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -666,13 +666,6 @@ ENTRY(sys32_capset_wrapper) llgtr %r3,%r3 # const cap_user_data_t jg sys_capset # branch to system call -ENTRY(sys32_sendfile_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # __kernel_off_emu31_t * - llgfr %r5,%r5 # size_t - jg sys32_sendfile # branch to system call - #sys32_vfork_wrapper # done in vfork_glue ENTRY(sys32_truncate64_wrapper) @@ -1348,13 +1341,6 @@ ENTRY(sys32_readahead_wrapper) lgfr %r5,%r5 # s32 jg sys32_readahead # branch to system call -ENTRY(sys32_sendfile64_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # compat_loff_t * - lgfr %r5,%r5 # s32 - jg sys32_sendfile64 # branch to system call - ENTRY(sys_tkill_wrapper) lgfr %r2,%r2 # pid_t lgfr %r3,%r3 # int diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2695bb89699e..5f3f7fbc5465 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -195,7 +195,7 @@ SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack) -SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper) +SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) NI_SYSCALL /* streams1 */ NI_SYSCALL /* streams2 */ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ @@ -231,7 +231,7 @@ SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper) -SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64_wrapper) +SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index 240a3cecc11e..6c65d69c6635 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -46,7 +46,6 @@ SIGN1(sys32_io_submit, compat_sys_io_submit, %o1) SIGN1(sys32_mq_open, compat_sys_mq_open, %o1) SIGN1(sys32_select, compat_sys_select, %o0) SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) -SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 088134834dab..a1444d0d08ee 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -25,7 +25,7 @@ sys_call_table32: /*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16 /*25*/ .word sys32_vmsplice, compat_sys_ptrace, sys_alarm, compat_sys_sigaltstack, sys_pause /*30*/ .word compat_sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice - .word sys_chown, sys_sync, sys_kill, compat_sys_newstat, sys32_sendfile + .word sys_chown, sys_sync, sys_kill, compat_sys_newstat, compat_sys_sendfile /*40*/ .word compat_sys_newlstat, sys_dup, sys_sparc_pipe, compat_sys_times, sys_getuid .word sys_umount, sys_setgid16, sys_getgid16, sys_signal, sys_geteuid16 /*50*/ .word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index ad7a20cbc699..ad6ca0472722 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -194,26 +194,6 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, } -asmlinkage long sys32_sendfile(int out_fd, int in_fd, - compat_off_t __user *offset, s32 count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - return ret; -} - /* * Some system calls that need sign extended arguments. This could be * done by a generic wrapper. diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 8459efc39686..6d944e4bb524 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -41,7 +41,6 @@ asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); -asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); long sys32_kill(int, int); long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index e6d55f0064df..6a00b1257d68 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -193,7 +193,7 @@ 184 i386 capget sys_capget 185 i386 capset sys_capset 186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack -187 i386 sendfile sys_sendfile sys32_sendfile +187 i386 sendfile sys_sendfile compat_sys_sendfile 188 i386 getpmsg 189 i386 putpmsg 190 i386 vfork sys_vfork stub32_vfork diff --git a/fs/compat.c b/fs/compat.c index cc09312f9aed..2ae2a98891cd 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1718,25 +1718,3 @@ COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, return do_handle_open(mountdirfd, handle, flags); } #endif - -#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE -asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, - compat_off_t __user *offset, compat_size_t count) -{ - loff_t pos; - off_t off; - ssize_t ret; - - if (offset) { - if (unlikely(get_user(off, offset))) - return -EFAULT; - pos = off; - ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); - if (unlikely(put_user(pos, offset))) - return -EFAULT; - return ret; - } - - return do_sendfile(out_fd, in_fd, NULL, count, 0); -} -#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */ diff --git a/fs/read_write.c b/fs/read_write.c index dcfd58d95f44..f738e4dccfab 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -853,8 +853,8 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, return ret; } -ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, - loff_t max) +static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, + size_t count, loff_t max) { struct fd in, out; struct inode *in_inode, *out_inode; @@ -978,3 +978,43 @@ SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, si return do_sendfile(out_fd, in_fd, NULL, count, 0); } + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, + compat_off_t __user *, offset, compat_size_t, count) +{ + loff_t pos; + off_t off; + ssize_t ret; + + if (offset) { + if (unlikely(get_user(off, offset))) + return -EFAULT; + pos = off; + ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); + if (unlikely(put_user(pos, offset))) + return -EFAULT; + return ret; + } + + return do_sendfile(out_fd, in_fd, NULL, count, 0); +} + +COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, + compat_loff_t __user *, offset, compat_size_t, count) +{ + loff_t pos; + ssize_t ret; + + if (offset) { + if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) + return -EFAULT; + ret = do_sendfile(out_fd, in_fd, &pos, count, 0); + if (unlikely(put_user(pos, offset))) + return -EFAULT; + return ret; + } + + return do_sendfile(out_fd, in_fd, NULL, count, 0); +} +#endif diff --git a/fs/read_write.h b/fs/read_write.h index d3e00ef67420..d07b954c6e0c 100644 --- a/fs/read_write.h +++ b/fs/read_write.h @@ -12,5 +12,3 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, unsigned long nr_segs, loff_t *ppos, io_fn_t fn); -ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, - loff_t max); diff --git a/include/linux/compat.h b/include/linux/compat.h index 110132527e4c..ad299afcd488 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -670,6 +670,8 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); +asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, + compat_loff_t __user *offset, compat_size_t count); asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr); -- cgit From 35280bd4a3fa841897e2638437607fdec6c34f31 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 14:52:17 -0500 Subject: switch epoll_pwait to COMPAT_SYSCALL_DEFINE Signed-off-by: Al Viro --- arch/s390/kernel/compat_wrapper.S | 10 -------- arch/s390/kernel/syscalls.S | 2 +- fs/compat.c | 49 --------------------------------------- fs/eventpoll.c | 47 +++++++++++++++++++++++++++++++++++++ include/linux/compat.h | 5 ++-- 5 files changed, 50 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index a1dda9c67efe..52bea71d93e6 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1270,16 +1270,6 @@ ENTRY(sys_getcpu_wrapper) llgtr %r4,%r4 # struct getcpu_cache * jg sys_getcpu -ENTRY(compat_sys_epoll_pwait_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgtr %r6,%r6 # compat_sigset_t * - llgf %r0,164(%r15) # compat_size_t - stg %r0,160(%r15) - jg compat_sys_epoll_pwait - ENTRY(compat_sys_utimes_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct compat_timeval * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 5f3f7fbc5465..63d6b4343193 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -320,7 +320,7 @@ SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper) NI_SYSCALL /* 310 sys_move_pages */ SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) -SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper) +SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ diff --git a/fs/compat.c b/fs/compat.c index 2ae2a98891cd..45137a3832f3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -1659,54 +1658,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, return ret; } -#ifdef CONFIG_EPOLL - -asmlinkage long compat_sys_epoll_pwait(int epfd, - struct compat_epoll_event __user *events, - int maxevents, int timeout, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize) -{ - long err; - compat_sigset_t csigmask; - sigset_t ksigmask, sigsaved; - - /* - * If the caller wants a certain signal mask to be set during the wait, - * we apply it here. - */ - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) - return -EFAULT; - sigset_from_compat(&ksigmask, &csigmask); - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - err = sys_epoll_wait(epfd, events, maxevents, timeout); - - /* - * If we changed the signal mask, we need to restore the original one. - * In case we've got a signal while waiting, we do not restore the - * signal mask yet, and we allow do_signal() to deliver the signal on - * the way back to userspace, before the signal mask is restored. - */ - if (sigmask) { - if (err == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - } - - return err; -} - -#endif /* CONFIG_EPOLL */ - #ifdef CONFIG_FHANDLE /* * Exactly like fs/open.c:sys_open_by_handle_at(), except that it diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9fec1836057a..495d15558f42 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -40,6 +40,7 @@ #include #include #include +#include /* * LOCKING: @@ -1940,6 +1941,52 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, return error; } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, + struct epoll_event __user *, events, + int, maxevents, int, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + long err; + compat_sigset_t csigmask; + sigset_t ksigmask, sigsaved; + + /* + * If the caller wants a certain signal mask to be set during the wait, + * we apply it here. + */ + if (sigmask) { + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) + return -EFAULT; + sigset_from_compat(&ksigmask, &csigmask); + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + err = sys_epoll_wait(epfd, events, maxevents, timeout); + + /* + * If we changed the signal mask, we need to restore the original one. + * In case we've got a signal while waiting, we do not restore the + * signal mask yet, and we allow do_signal() to deliver the signal on + * the way back to userspace, before the signal mask is restored. + */ + if (sigmask) { + if (err == -EINTR) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } else + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + } + + return err; +} +#endif + static int __init eventpoll_init(void) { struct sysinfo si; diff --git a/include/linux/compat.h b/include/linux/compat.h index ad299afcd488..cdec8f2e9e21 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -432,10 +432,9 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, /* * epoll (fs/eventpoll.c) compat bits follow ... */ -struct epoll_event; -#define compat_epoll_event epoll_event +struct epoll_event; /* fortunately, this one is fixed-layout */ asmlinkage long compat_sys_epoll_pwait(int epfd, - struct compat_epoll_event __user *events, + struct epoll_event __user *events, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); -- cgit From d5dc77bfeeab0b03a32e3db5e31e2f64605634ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Feb 2013 18:42:04 -0500 Subject: consolidate compat lookup_dcookie() Signed-off-by: Al Viro --- arch/arm64/kernel/sys32.S | 7 ------- arch/mips/kernel/linux32.c | 6 ------ arch/mips/kernel/scall64-o32.S | 2 +- arch/parisc/kernel/sys_parisc32.c | 7 ------- arch/parisc/kernel/syscall_table.S | 2 +- arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/kernel/sys_ppc32.c | 7 ------- arch/s390/kernel/compat_wrapper.S | 7 ------- arch/s390/kernel/syscalls.S | 2 +- arch/sparc/kernel/sys_sparc32.c | 8 -------- arch/sparc/kernel/systbls_64.S | 2 +- arch/tile/kernel/compat.c | 5 ----- arch/x86/ia32/sys_ia32.c | 6 ------ arch/x86/include/asm/sys_ia32.h | 1 - arch/x86/syscalls/syscall_32.tbl | 2 +- fs/dcookies.c | 12 ++++++++++++ include/linux/compat.h | 1 + kernel/sys_ni.c | 1 + 18 files changed, 20 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index 9416d045a687..db01aa978c41 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -84,13 +84,6 @@ compat_sys_readahead_wrapper: b sys_readahead ENDPROC(compat_sys_readahead_wrapper) -compat_sys_lookup_dcookie: - orr x0, x0, x1, lsl #32 - mov w1, w2 - mov w2, w3 - b sys_lookup_dcookie -ENDPROC(compat_sys_lookup_dcookie) - compat_sys_fadvise64_64_wrapper: mov w6, w1 orr x1, x2, x3, lsl #32 diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index b0cc2a7df59f..6852d4876f82 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -259,12 +259,6 @@ asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_a2, merge_64(len_a4, len_a5)); } -asmlinkage long sys32_lookup_dcookie(u32 a0, u32 a1, char __user *buf, - size_t len) -{ - return sys_lookup_dcookie(merge_64(a0, a1), buf, len); -} - SYSCALL_DEFINE6(32_fanotify_mark, int, fanotify_fd, unsigned int, flags, u64, a3, u64, a4, int, dfd, const char __user *, pathname) { diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index c1a70e805751..91c8c6ea7b09 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -439,7 +439,7 @@ sys_call_table: PTR compat_sys_io_submit PTR sys_io_cancel /* 4245 */ PTR sys_exit_group - PTR sys32_lookup_dcookie + PTR compat_sys_lookup_dcookie PTR sys_epoll_create PTR sys_epoll_ctl PTR sys_epoll_wait /* 4250 */ diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 035ab3f94814..46bdf6080fe4 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -75,13 +75,6 @@ asmlinkage long sys32_semctl(int semid, int semnum, int cmd, union semun arg) return sys_semctl (semid, semnum, cmd, arg); } -long sys32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char __user *buf, - size_t len) -{ - return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low, - buf, len); -} - asmlinkage long compat_sys_fanotify_mark(int fan_fd, int flags, u32 mask_hi, u32 mask_lo, int fd, const char __user *pathname) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index f232672a9e20..30c9a3bba1cc 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -318,7 +318,7 @@ ENTRY_SAME(alloc_hugepages) /* 220 */ ENTRY_SAME(free_hugepages) ENTRY_SAME(exit_group) - ENTRY_DIFF(lookup_dcookie) + ENTRY_COMP(lookup_dcookie) ENTRY_SAME(epoll_create) ENTRY_SAME(epoll_ctl) /* 225 */ ENTRY_SAME(epoll_wait) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 634db7d2dc92..afef04d6ee52 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -239,7 +239,7 @@ SYSCALL_SPU(io_cancel) SYSCALL(set_tid_address) SYSX_SPU(sys_fadvise64,ppc32_fadvise64,sys_fadvise64) SYSCALL(exit_group) -SYSX(sys_lookup_dcookie,ppc32_lookup_dcookie,sys_lookup_dcookie) +COMPAT_SYS(lookup_dcookie) SYSCALL_SPU(epoll_create) SYSCALL_SPU(epoll_ctl) SYSCALL_SPU(epoll_wait) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 6e7c2509bd2d..e695230ca181 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -177,13 +177,6 @@ asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long h return sys_ftruncate(fd, (high << 32) | low); } -long ppc32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char __user *buf, - size_t len) -{ - return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low, - buf, len); -} - long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, size_t len, int advice) { diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 68117a3dd252..6d4958ea390b 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -926,13 +926,6 @@ ENTRY(sys_epoll_wait_wrapper) lgfr %r5,%r5 # int jg sys_epoll_wait # branch to system call -ENTRY(sys32_lookup_dcookie_wrapper) - sllg %r2,%r2,32 # get high word of 64bit dcookie - or %r2,%r3 # get low word of 64bit dcookie - llgtr %r3,%r4 # char * - llgfr %r4,%r5 # size_t - jg sys_lookup_dcookie - ENTRY(sys32_fadvise64_wrapper) lgfr %r2,%r2 # int sllg %r3,%r3,32 # get high word of 64bit loff_t diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 102254a4397d..9154e17f25b9 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -118,7 +118,7 @@ SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */ +SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */ SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) NI_SYSCALL /* old "idle" system call */ NI_SYSCALL /* vm86old for i386 */ diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f38f2280fade..5d4ee8374c84 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -303,14 +303,6 @@ long compat_sys_fadvise64_64(int fd, advice); } -long sys32_lookup_dcookie(unsigned long cookie_high, - unsigned long cookie_low, - char __user *buf, size_t len) -{ - return sys_lookup_dcookie((cookie_high << 32) | cookie_low, - buf, len); -} - long compat_sync_file_range(int fd, unsigned long off_high, unsigned long off_low, unsigned long nb_high, unsigned long nb_low, int flags) { return sys_sync_file_range(fd, diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 46d575b6f696..8fd932080215 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -59,7 +59,7 @@ sys_call_table32: /*190*/ .word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl .word sys_epoll_wait, sys_ioprio_set, sys_getppid, compat_sys_sparc_sigaction, sys_sgetmask /*200*/ .word sys_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir - .word sys32_readahead, sys32_socketcall, sys_syslog, sys32_lookup_dcookie, sys32_fadvise64 + .word sys32_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, sys32_fadvise64 /*210*/ .word sys32_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo .word compat_sys_ipc, sys32_sigreturn, sys_clone, sys_ioprio_get, compat_sys_adjtimex /*220*/ .word compat_sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 7f72401b4f45..c262a02d8efa 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -54,11 +54,6 @@ long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low); } -long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len) -{ - return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len); -} - long compat_sys_sync_file_range2(int fd, unsigned int flags, u32 offset_lo, u32 offset_hi, u32 nbytes_lo, u32 nbytes_hi) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index ad6ca0472722..c0df976b0b71 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -226,12 +226,6 @@ long sys32_vm86_warning(void) return -ENOSYS; } -long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, - char __user *buf, size_t len) -{ - return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); -} - asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count) { diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 6d944e4bb524..2b0e0c2d5379 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -45,7 +45,6 @@ asmlinkage long sys32_personality(unsigned long); long sys32_kill(int, int); long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); long sys32_vm86_warning(void); -long sys32_lookup_dcookie(u32, u32, char __user *, size_t); asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 6a00b1257d68..0b55cd773e4c 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -259,7 +259,7 @@ 250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) 252 i386 exit_group sys_exit_group -253 i386 lookup_dcookie sys_lookup_dcookie sys32_lookup_dcookie +253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie 254 i386 epoll_create sys_epoll_create 255 i386 epoll_ctl sys_epoll_ctl 256 i386 epoll_wait sys_epoll_wait diff --git a/fs/dcookies.c b/fs/dcookies.c index f08375b97ffb..ab5954b50267 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /* The dcookies are allocated from a kmem_cache and @@ -202,6 +203,17 @@ out: return err; } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, size_t, len) +{ +#ifdef __BIG_ENDIAN + return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len); +#else + return sys_lookup_dcookie(((u64)w1 << 32) | w0, buf, len); +#endif +} +#endif + static int dcookie_init(void) { struct list_head * d; diff --git a/include/linux/compat.h b/include/linux/compat.h index cdec8f2e9e21..482c9e65b5bf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -429,6 +429,7 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, compat_long_t addr, compat_long_t data); +asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, size_t); /* * epoll (fs/eventpoll.c) compat bits follow ... */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 395084d4ce16..b50e2a003c5a 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -20,6 +20,7 @@ cond_syscall(sys_quotactl); cond_syscall(sys32_quotactl); cond_syscall(sys_acct); cond_syscall(sys_lookup_dcookie); +cond_syscall(compat_sys_lookup_dcookie); cond_syscall(sys_swapon); cond_syscall(sys_swapoff); cond_syscall(sys_kexec_load); -- cgit From 56e41d3c5aa84d679eebdb3cb8a70b03c5fbd6c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Jan 2013 23:15:25 -0500 Subject: merge compat sys_ipc instances Signed-off-by: Al Viro --- arch/mips/kernel/linux32.c | 69 --------------------------------------- arch/mips/kernel/scall64-o32.S | 2 +- arch/powerpc/kernel/sys_ppc32.c | 67 ------------------------------------- arch/s390/kernel/compat_linux.c | 44 ++----------------------- arch/s390/kernel/compat_linux.h | 1 - arch/s390/kernel/compat_wrapper.S | 8 ----- arch/s390/kernel/syscalls.S | 2 +- arch/sparc/kernel/sys_sparc32.c | 65 ------------------------------------ arch/x86/ia32/Makefile | 3 -- arch/x86/ia32/ipc32.c | 54 ------------------------------ arch/x86/include/asm/sys_ia32.h | 3 -- arch/x86/syscalls/syscall_32.tbl | 2 +- include/linux/compat.h | 1 + ipc/compat.c | 44 +++++++++++++++++++++++++ kernel/sys_ni.c | 2 +- 15 files changed, 52 insertions(+), 315 deletions(-) delete mode 100644 arch/x86/ia32/ipc32.c (limited to 'include/linux') diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 6852d4876f82..7c57b8d7b255 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -119,75 +119,6 @@ SYSCALL_DEFINE6(32_pwrite, unsigned int, fd, const char __user *, buf, return sys_pwrite64(fd, buf, count, merge_64(a4, a5)); } -#ifdef CONFIG_SYSVIPC - -SYSCALL_DEFINE6(32_ipc, u32, call, long, first, long, second, long, third, - unsigned long, ptr, unsigned long, fifth) -{ - int version, err; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - switch (call) { - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - err = sys_semtimedop(first, compat_ptr(ptr), second, NULL); - break; - case SEMTIMEDOP: - err = compat_sys_semtimedop(first, compat_ptr(ptr), second, - compat_ptr(fifth)); - break; - case SEMGET: - err = sys_semget(first, second, third); - break; - case SEMCTL: - err = compat_sys_semctl(first, second, third, compat_ptr(ptr)); - break; - case MSGSND: - err = compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); - break; - case MSGRCV: - err = compat_sys_msgrcv(first, second, fifth, third, - version, compat_ptr(ptr)); - break; - case MSGGET: - err = sys_msgget((key_t) first, second); - break; - case MSGCTL: - err = compat_sys_msgctl(first, second, compat_ptr(ptr)); - break; - case SHMAT: - err = compat_sys_shmat(first, second, third, version, - compat_ptr(ptr)); - break; - case SHMDT: - err = sys_shmdt(compat_ptr(ptr)); - break; - case SHMGET: - err = sys_shmget(first, (unsigned)second, third); - break; - case SHMCTL: - err = compat_sys_shmctl(first, second, compat_ptr(ptr)); - break; - default: - err = -EINVAL; - break; - } - - return err; -} - -#else - -SYSCALL_DEFINE6(32_ipc, u32, call, int, first, int, second, int, third, - u32, ptr, u32, fifth) -{ - return -ENOSYS; -} - -#endif /* CONFIG_SYSVIPC */ - #ifdef CONFIG_MIPS32_N32 SYSCALL_DEFINE4(n32_semctl, int, semid, int, semnum, int, cmd, u32, arg) { diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 91c8c6ea7b09..103bfe570fe8 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -309,7 +309,7 @@ sys_call_table: PTR compat_sys_wait4 PTR sys_swapoff /* 4115 */ PTR compat_sys_sysinfo - PTR sys_32_ipc + PTR compat_sys_ipc PTR sys_fsync PTR sys32_sigreturn PTR __sys_clone /* 4120 */ diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index e695230ca181..d78ad7b6c464 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -61,73 +61,6 @@ asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); } -#ifdef CONFIG_SYSVIPC -long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, - u32 fifth) -{ - int version; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - switch (call) { - - case SEMTIMEDOP: - if (fifth) - /* sign extend semid */ - return compat_sys_semtimedop((int)first, - compat_ptr(ptr), second, - compat_ptr(fifth)); - /* else fall through for normal semop() */ - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - /* sign extend semid */ - return sys_semtimedop((int)first, compat_ptr(ptr), second, - NULL); - case SEMGET: - /* sign extend key, nsems */ - return sys_semget((int)first, (int)second, third); - case SEMCTL: - /* sign extend semid, semnum */ - return compat_sys_semctl((int)first, (int)second, third, - compat_ptr(ptr)); - - case MSGSND: - /* sign extend msqid */ - return compat_sys_msgsnd((int)first, (int)second, third, - compat_ptr(ptr)); - case MSGRCV: - /* sign extend msqid, msgtyp */ - return compat_sys_msgrcv((int)first, second, (int)fifth, - third, version, compat_ptr(ptr)); - case MSGGET: - /* sign extend key */ - return sys_msgget((int)first, second); - case MSGCTL: - /* sign extend msqid */ - return compat_sys_msgctl((int)first, second, compat_ptr(ptr)); - - case SHMAT: - /* sign extend shmid */ - return compat_sys_shmat((int)first, second, third, version, - compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - /* sign extend key_t */ - return sys_shmget((int)first, second, third); - case SHMCTL: - /* sign extend shmid */ - return compat_sys_shmctl((int)first, second, compat_ptr(ptr)); - - default: - return -ENOSYS; - } - - return -ENOSYS; -} -#endif - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index fbd29c70a297..8b6e4f5288a2 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -288,51 +288,13 @@ asmlinkage long sys32_getegid16(void) return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } -/* - * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. - * - * This is really horribly ugly. - */ #ifdef CONFIG_SYSVIPC -asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) +COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, compat_uptr_t, ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - switch (call) { - case SEMTIMEDOP: - return compat_sys_semtimedop(first, compat_ptr(ptr), - second, compat_ptr(third)); - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - return sys_semtimedop(first, compat_ptr(ptr), - second, NULL); - case SEMGET: - return sys_semget(first, second, third); - case SEMCTL: - return compat_sys_semctl(first, second, third, - compat_ptr(ptr)); - case MSGSND: - return compat_sys_msgsnd(first, second, third, - compat_ptr(ptr)); - case MSGRCV: - return compat_sys_msgrcv(first, second, 0, third, - 0, compat_ptr(ptr)); - case MSGGET: - return sys_msgget((key_t) first, second); - case MSGCTL: - return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: - return compat_sys_shmat(first, second, third, - 0, compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - return sys_shmget(first, (unsigned)second, third); - case SHMCTL: - return compat_sys_shmctl(first, second, compat_ptr(ptr)); - } - - return -ENOSYS; + return compat_sys_ipc(call, first, second, third, ptr, third); } #endif diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index bce0b7aec8f9..976518c0592a 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -94,7 +94,6 @@ long sys32_getuid16(void); long sys32_geteuid16(void); long sys32_getgid16(void); long sys32_getegid16(void); -long sys32_ipc(u32 call, int first, int second, int third, u32 ptr); long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low); long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 6d4958ea390b..17644c8e10e1 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -388,14 +388,6 @@ ENTRY(compat_sys_sysinfo_wrapper) llgtr %r2,%r2 # struct sysinfo_emu31 * jg compat_sys_sysinfo # branch to system call -ENTRY(sys32_ipc_wrapper) - llgfr %r2,%r2 # uint - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # u32 - jg sys32_ipc # branch to system call - ENTRY(sys32_fsync_wrapper) llgfr %r2,%r2 # unsigned int jg sys_fsync # branch to system call diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9154e17f25b9..d2baabed7148 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -125,7 +125,7 @@ NI_SYSCALL /* vm86old for i386 */ SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) -SYSCALL(sys_s390_ipc,sys_s390_ipc,sys32_ipc_wrapper) +SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */ diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 5d4ee8374c84..d546188b13df 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -49,71 +49,6 @@ #include #include -#ifdef CONFIG_SYSVIPC -asmlinkage long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth) -{ - int version; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - switch (call) { - case SEMTIMEDOP: - if (fifth) - /* sign extend semid */ - return compat_sys_semtimedop((int)first, - compat_ptr(ptr), second, - compat_ptr(fifth)); - /* else fall through for normal semop() */ - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - /* sign extend semid */ - return sys_semtimedop((int)first, compat_ptr(ptr), second, - NULL); - case SEMGET: - /* sign extend key, nsems */ - return sys_semget((int)first, (int)second, third); - case SEMCTL: - /* sign extend semid, semnum */ - return compat_sys_semctl((int)first, (int)second, third, - compat_ptr(ptr)); - - case MSGSND: - /* sign extend msqid */ - return compat_sys_msgsnd((int)first, (int)second, third, - compat_ptr(ptr)); - case MSGRCV: - /* sign extend msqid, msgtyp */ - return compat_sys_msgrcv((int)first, second, (int)fifth, - third, version, compat_ptr(ptr)); - case MSGGET: - /* sign extend key */ - return sys_msgget((int)first, second); - case MSGCTL: - /* sign extend msqid */ - return compat_sys_msgctl((int)first, second, compat_ptr(ptr)); - - case SHMAT: - /* sign extend shmid */ - return compat_sys_shmat((int)first, second, third, version, - compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - /* sign extend key_t */ - return sys_shmget((int)first, second, third); - case SHMCTL: - /* sign extend shmid */ - return compat_sys_shmctl((int)first, second, compat_ptr(ptr)); - - default: - return -ENOSYS; - } - - return -ENOSYS; -} -#endif - asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) { if ((int)high < 0) diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index 455646e0e532..e785b422b766 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile @@ -5,9 +5,6 @@ obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o -sysv-$(CONFIG_SYSVIPC) := ipc32.o -obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) - obj-$(CONFIG_IA32_AOUT) += ia32_aout.o audit-class-$(CONFIG_AUDIT) := audit.o diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c deleted file mode 100644 index 29cdcd02ead3..000000000000 --- a/arch/x86/ia32/ipc32.c +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage long sys32_ipc(u32 call, int first, int second, int third, - compat_uptr_t ptr, u32 fifth) -{ - int version; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - switch (call) { - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - return sys_semtimedop(first, compat_ptr(ptr), second, NULL); - case SEMTIMEDOP: - return compat_sys_semtimedop(first, compat_ptr(ptr), second, - compat_ptr(fifth)); - case SEMGET: - return sys_semget(first, second, third); - case SEMCTL: - return compat_sys_semctl(first, second, third, compat_ptr(ptr)); - - case MSGSND: - return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); - case MSGRCV: - return compat_sys_msgrcv(first, second, fifth, third, - version, compat_ptr(ptr)); - case MSGGET: - return sys_msgget((key_t) first, second); - case MSGCTL: - return compat_sys_msgctl(first, second, compat_ptr(ptr)); - - case SHMAT: - return compat_sys_shmat(first, second, third, version, - compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - return sys_shmget(first, (unsigned)second, third); - case SHMCTL: - return compat_sys_shmctl(first, second, compat_ptr(ptr)); - } - return -ENOSYS; -} diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 2b0e0c2d5379..df8ad3b3920a 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -57,9 +57,6 @@ asmlinkage long sys32_fallocate(int, int, unsigned, asmlinkage long sys32_sigreturn(void); asmlinkage long sys32_rt_sigreturn(void); -/* ia32/ipc32.c */ -asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); - asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int, const char __user *); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 0b55cd773e4c..0f6f5becab0d 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -123,7 +123,7 @@ 114 i386 wait4 sys_wait4 compat_sys_wait4 115 i386 swapoff sys_swapoff 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo -117 i386 ipc sys_ipc sys32_ipc +117 i386 ipc sys_ipc compat_sys_ipc 118 i386 fsync sys_fsync 119 i386 sigreturn sys_sigreturn stub32_sigreturn 120 i386 clone sys_clone stub32_clone diff --git a/include/linux/compat.h b/include/linux/compat.h index 482c9e65b5bf..79a4781ac502 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -318,6 +318,7 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, int version, void __user *uptr); long compat_sys_shmat(int first, int second, compat_uptr_t third, int version, void __user *uptr); +asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32); #else long compat_sys_semctl(int semid, int semnum, int cmd, int arg); long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp, diff --git a/ipc/compat.c b/ipc/compat.c index 2547f29dcd1b..1da2e2eb9d70 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -368,6 +368,50 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill); } + +COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, + u32, third, compat_uptr_t, ptr, u32, fifth) +{ + int version; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + return sys_semtimedop(first, compat_ptr(ptr), second, NULL); + case SEMTIMEDOP: + return compat_sys_semtimedop(first, compat_ptr(ptr), second, + compat_ptr(fifth)); + case SEMGET: + return sys_semget(first, second, third); + case SEMCTL: + return compat_sys_semctl(first, second, third, compat_ptr(ptr)); + + case MSGSND: + return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); + case MSGRCV: + return compat_sys_msgrcv(first, second, fifth, third, + version, compat_ptr(ptr)); + case MSGGET: + return sys_msgget(first, second); + case MSGCTL: + return compat_sys_msgctl(first, second, compat_ptr(ptr)); + + case SHMAT: + return compat_sys_shmat(first, second, third, version, + compat_ptr(ptr)); + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + return sys_shmget(first, (unsigned)second, third); + case SHMCTL: + return compat_sys_shmctl(first, second, compat_ptr(ptr)); + } + + return -ENOSYS; +} #else long compat_sys_semctl(int semid, int semnum, int cmd, int arg) { diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index b50e2a003c5a..bfd6787b355a 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -156,7 +156,7 @@ cond_syscall(compat_sys_process_vm_writev); cond_syscall(sys_pciconfig_read); cond_syscall(sys_pciconfig_write); cond_syscall(sys_pciconfig_iobase); -cond_syscall(sys32_ipc); +cond_syscall(compat_sys_s390_ipc); cond_syscall(ppc_rtas); cond_syscall(sys_spu_run); cond_syscall(sys_spu_create); -- cgit From 0e65a81b105a3f646793d46740ad90fa5c067986 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Feb 2013 14:36:44 -0500 Subject: get rid of compat_sys_semctl() and friends in case of ARCH_WANT_OLD_COMPAT_IPC Signed-off-by: Al Viro --- arch/mips/kernel/linux32.c | 24 ------- arch/mips/kernel/scall64-n32.S | 6 +- include/linux/compat.h | 17 ++--- ipc/compat.c | 158 +++++++++++++++++------------------------ 4 files changed, 73 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 7c57b8d7b255..d1d576b765f5 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -119,30 +119,6 @@ SYSCALL_DEFINE6(32_pwrite, unsigned int, fd, const char __user *, buf, return sys_pwrite64(fd, buf, count, merge_64(a4, a5)); } -#ifdef CONFIG_MIPS32_N32 -SYSCALL_DEFINE4(n32_semctl, int, semid, int, semnum, int, cmd, u32, arg) -{ - /* compat_sys_semctl expects a pointer to union semun */ - u32 __user *uptr = compat_alloc_user_space(sizeof(u32)); - if (put_user(arg, uptr)) - return -EFAULT; - return compat_sys_semctl(semid, semnum, cmd, uptr); -} - -SYSCALL_DEFINE4(n32_msgsnd, int, msqid, u32, msgp, unsigned int, msgsz, - int, msgflg) -{ - return compat_sys_msgsnd(msqid, msgsz, msgflg, compat_ptr(msgp)); -} - -SYSCALL_DEFINE5(n32_msgrcv, int, msqid, u32, msgp, size_t, msgsz, - int, msgtyp, int, msgflg) -{ - return compat_sys_msgrcv(msqid, msgsz, msgtyp, msgflg, IPC_64, - compat_ptr(msgp)); -} -#endif - SYSCALL_DEFINE1(32_personality, unsigned long, personality) { unsigned int p = personality & 0xffffffff; diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 9b4df498fc5b..edcb6594e7b5 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -168,11 +168,11 @@ EXPORT(sysn32_call_table) PTR sys_newuname PTR sys_semget PTR sys_semop - PTR sys_n32_semctl + PTR compat_sys_semctl PTR sys_shmdt /* 6065 */ PTR sys_msgget - PTR sys_n32_msgsnd - PTR sys_n32_msgrcv + PTR compat_sys_msgsnd + PTR compat_sys_msgrcv PTR compat_sys_msgctl PTR compat_sys_fcntl /* 6070 */ PTR sys_flock diff --git a/include/linux/compat.h b/include/linux/compat.h index 79a4781ac502..2bfe67329dc4 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -311,22 +311,13 @@ asmlinkage long compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, compat_size_t __user *len_ptr); -#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC -long compat_sys_semctl(int first, int second, int third, void __user *uptr); -long compat_sys_msgsnd(int first, int second, int third, void __user *uptr); -long compat_sys_msgrcv(int first, int second, int msgtyp, int third, - int version, void __user *uptr); -long compat_sys_shmat(int first, int second, compat_uptr_t third, int version, - void __user *uptr); asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32); -#else -long compat_sys_semctl(int semid, int semnum, int cmd, int arg); -long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp, +asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); +asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); +asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); -long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp, +asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, long msgtyp, int msgflg); -long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); -#endif long compat_sys_msgctl(int first, int second, void __user *uptr); long compat_sys_shmctl(int first, int second, void __user *uptr); long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, diff --git a/ipc/compat.c b/ipc/compat.c index 1da2e2eb9d70..6cb6a4df86e4 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -306,7 +306,7 @@ static long do_compat_semctl(int first, int second, int third, u32 pad) return err; } -long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) +static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) { struct compat_msgbuf __user *msgp = dest; size_t msgsz; @@ -320,59 +320,16 @@ long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) return msgsz; } -#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC -long compat_sys_semctl(int first, int second, int third, void __user *uptr) -{ - u32 pad; - - if (!uptr) - return -EINVAL; - if (get_user(pad, (u32 __user *) uptr)) - return -EFAULT; - return do_compat_semctl(first, second, third, pad); -} - -long compat_sys_msgsnd(int first, int second, int third, void __user *uptr) -{ - struct compat_msgbuf __user *up = uptr; - long type; - - if (first < 0) - return -EINVAL; - if (second < 0) - return -EINVAL; - - if (get_user(type, &up->mtype)) - return -EFAULT; - - return do_msgsnd(first, type, up->mtext, second, third); -} - -long compat_sys_msgrcv(int first, int second, int msgtyp, int third, - int version, void __user *uptr) -{ - if (first < 0) - return -EINVAL; - if (second < 0) - return -EINVAL; - - if (!version) { - struct compat_ipc_kludge ipck; - if (!uptr) - return -EINVAL; - if (copy_from_user (&ipck, uptr, sizeof(ipck))) - return -EFAULT; - uptr = compat_ptr(ipck.msgp); - msgtyp = ipck.msgtyp; - } - return do_msgrcv(first, uptr, second, msgtyp, third, - compat_do_msg_fill); -} +#ifndef COMPAT_SHMLBA +#define COMPAT_SHMLBA SHMLBA +#endif +#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, u32, third, compat_uptr_t, ptr, u32, fifth) { int version; + u32 pad; version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; @@ -387,21 +344,59 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, case SEMGET: return sys_semget(first, second, third); case SEMCTL: - return compat_sys_semctl(first, second, third, compat_ptr(ptr)); + if (!ptr) + return -EINVAL; + if (get_user(pad, (u32 __user *) compat_ptr(ptr))) + return -EFAULT; + return do_compat_semctl(first, second, third, pad); + + case MSGSND: { + struct compat_msgbuf __user *up = compat_ptr(ptr); + compat_long_t type; + + if (first < 0 || second < 0) + return -EINVAL; - case MSGSND: - return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); - case MSGRCV: - return compat_sys_msgrcv(first, second, fifth, third, - version, compat_ptr(ptr)); + if (get_user(type, &up->mtype)) + return -EFAULT; + + return do_msgsnd(first, type, up->mtext, second, third); + } + case MSGRCV: { + void __user *uptr = compat_ptr(ptr); + + if (first < 0 || second < 0) + return -EINVAL; + + if (!version) { + struct compat_ipc_kludge ipck; + if (!uptr) + return -EINVAL; + if (copy_from_user (&ipck, uptr, sizeof(ipck))) + return -EFAULT; + uptr = compat_ptr(ipck.msgp); + fifth = ipck.msgtyp; + } + return do_msgrcv(first, uptr, second, fifth, third, + compat_do_msg_fill); + } case MSGGET: return sys_msgget(first, second); case MSGCTL: return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: - return compat_sys_shmat(first, second, third, version, - compat_ptr(ptr)); + case SHMAT: { + int err; + unsigned long raddr; + + if (version == 1) + return -EINVAL; + err = do_shmat(first, compat_ptr(ptr), second, &raddr, + COMPAT_SHMLBA); + if (err < 0) + return err; + return put_user(raddr, (compat_ulong_t *)compat_ptr(third)); + } case SHMDT: return sys_shmdt(compat_ptr(ptr)); case SHMGET: @@ -412,29 +407,30 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return -ENOSYS; } -#else -long compat_sys_semctl(int semid, int semnum, int cmd, int arg) +#endif + +COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) { return do_compat_semctl(semid, semnum, cmd, arg); } -long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp, - compat_ssize_t msgsz, int msgflg) +COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp, + compat_ssize_t, msgsz, int, msgflg) { + struct compat_msgbuf __user *up = compat_ptr(msgp); compat_long_t mtype; - if (get_user(mtype, &msgp->mtype)) + if (get_user(mtype, &up->mtype)) return -EFAULT; - return do_msgsnd(msqid, mtype, msgp->mtext, (ssize_t)msgsz, msgflg); + return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg); } -long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp, - compat_ssize_t msgsz, long msgtyp, int msgflg) +COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp, + compat_ssize_t, msgsz, long, msgtyp, int, msgflg) { - return do_msgrcv(msqid, msgp, (ssize_t)msgsz, msgtyp, msgflg, - compat_do_msg_fill); + return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, msgtyp, + msgflg, compat_do_msg_fill); } -#endif static inline int get_compat_msqid64(struct msqid64_ds *m64, struct compat_msqid64_ds __user *up64) @@ -552,28 +548,7 @@ long compat_sys_msgctl(int first, int second, void __user *uptr) return err; } -#ifndef COMPAT_SHMLBA -#define COMPAT_SHMLBA SHMLBA -#endif - -#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC -long compat_sys_shmat(int first, int second, compat_uptr_t third, int version, - void __user *uptr) -{ - int err; - unsigned long raddr; - compat_ulong_t __user *uaddr; - - if (version == 1) - return -EINVAL; - err = do_shmat(first, uptr, second, &raddr, COMPAT_SHMLBA); - if (err < 0) - return err; - uaddr = compat_ptr(third); - return put_user(raddr, uaddr); -} -#else -long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg) +COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) { unsigned long ret; long err; @@ -584,7 +559,6 @@ long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg) force_successful_syscall_return(); return (long)ret; } -#endif static inline int get_compat_shmid64_ds(struct shmid64_ds *s64, struct compat_shmid64_ds __user *up64) -- cgit From 4b377bab29e6a241db42f27541e7fb63713ee178 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Mar 2013 10:47:59 -0500 Subject: make do_mremap() static The extern in sys_sparc_64.c was a rudiment of time when do_mremap() used to exist in MMU case (it doesn't anymore). As for !MMU one, nothing uses it outside of mm/nommu.c... Signed-off-by: Al Viro --- arch/sparc/kernel/sys_sparc_64.c | 4 ---- include/linux/mm.h | 3 --- mm/nommu.c | 3 +-- 3 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 708bc29d36a8..42beb6fc4ad8 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -470,10 +470,6 @@ SYSCALL_DEFINE2(64_munmap, unsigned long, addr, size_t, len) return vm_munmap(addr, len); } - -extern unsigned long do_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7acc9dc73c9f..f4c8aa990442 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1079,9 +1079,6 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, bool need_rmap_locks); -extern unsigned long do_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa); diff --git a/mm/nommu.c b/mm/nommu.c index e19328087534..66737e0584ae 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1770,7 +1770,7 @@ unsigned long vm_brk(unsigned long addr, unsigned long len) * * MREMAP_FIXED is not supported under NOMMU conditions */ -unsigned long do_mremap(unsigned long addr, +static unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { @@ -1805,7 +1805,6 @@ unsigned long do_mremap(unsigned long addr, vma->vm_end = vma->vm_start + new_len; return vma->vm_start; } -EXPORT_SYMBOL(do_mremap); SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, -- cgit From 45d9550a0e7e9230606ca3c4c6f4dc6297848b2f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 19 Feb 2013 12:17:01 -0800 Subject: workqueue: allow more off-queue flag space When a work item is off-queue, its work->data contains WORK_STRUCT_* and WORK_OFFQ_* flags. As WORK_OFFQ_* flags are used only while a work item is off-queue, it can occupy bits of work->data which aren't used while off-queue. WORK_OFFQ_* currently only use bits used by on-queue CWQ pointer. As color bits aren't used while off-queue, there's no reason to not use them. Lower WORK_OFFQ_FLAG_BASE from WORK_STRUCT_FLAG_BITS to WORK_STRUCT_COLOR_SHIFT thus giving 4 more bits to off-queue flag space which is also used to record worker_pool ID while off-queue. This doesn't introduce any visible behavior difference. tj: Rewrote the description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 8afab27cdbc2..5bd030f630a9 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -68,7 +68,7 @@ enum { WORK_STRUCT_COLOR_BITS, /* data contains off-queue information when !WORK_STRUCT_PWQ */ - WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), -- cgit From 65dff759d2948cf18e2029fc5c0c595b8b7da3a5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 1 Mar 2013 15:01:56 +0800 Subject: cgroup: fix cgroup_path() vs rename() race rename() will change dentry->d_name. The result of this race can be worse than seeing partially rewritten name, but we might access a stale pointer because rename() will re-allocate memory to hold a longer name. As accessing dentry->name must be protected by dentry->d_lock or parent inode's i_mutex, while on the other hand cgroup-path() can be called with some irq-safe spinlocks held, we can't generate cgroup path using dentry->d_name. Alternatively we make a copy of dentry->d_name and save it in cgrp->name when a cgroup is created, and update cgrp->name at rename(). v5: use flexible array instead of zero-size array. v4: - allocate root_cgroup_name and all root_cgroup->name points to it. - add cgroup_name() wrapper. v3: use kfree_rcu() instead of synchronize_rcu() in user-visible path. v2: make cgrp->name RCU safe. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- block/blk-cgroup.h | 2 - include/linux/cgroup.h | 24 +++++++++++ kernel/cgroup.c | 106 +++++++++++++++++++++++++++++++++++-------------- 3 files changed, 100 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index f2b292925ccd..4e595ee8c915 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -247,9 +247,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) { int ret; - rcu_read_lock(); ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); - rcu_read_unlock(); if (ret) strncpy(buf, "", buflen); return ret; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 900af5964f55..75c6ec1ba1ba 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -150,6 +150,11 @@ enum { CGRP_CPUSET_CLONE_CHILDREN, }; +struct cgroup_name { + struct rcu_head rcu_head; + char name[]; +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -172,6 +177,19 @@ struct cgroup { struct cgroup *parent; /* my parent */ struct dentry *dentry; /* cgroup fs entry, RCU protected */ + /* + * This is a copy of dentry->d_name, and it's needed because + * we can't use dentry->d_name in cgroup_path(). + * + * You must acquire rcu_read_lock() to access cgrp->name, and + * the only place that can change it is rename(), which is + * protected by parent dir's i_mutex. + * + * Normally you should use cgroup_name() wrapper rather than + * access it directly. + */ + struct cgroup_name __rcu *name; + /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; @@ -404,6 +422,12 @@ struct cgroup_scanner { void *data; }; +/* Caller should hold rcu_read_lock() */ +static inline const char *cgroup_name(const struct cgroup *cgrp) +{ + return rcu_dereference(cgrp->name)->name; +} + int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a32f9432666c..50682168abc2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -238,6 +238,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock); /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ #define dummytop (&rootnode.top_cgroup) +static struct cgroup_name root_cgroup_name = { .name = "/" }; + /* This flag indicates whether tasks in the fork and exit paths should * check for fork/exit handlers to call. This avoids us having to do * extra work in the fork/exit path if none of the subsystems need to @@ -859,6 +861,17 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) return inode; } +static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry) +{ + struct cgroup_name *name; + + name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL); + if (!name) + return NULL; + strcpy(name->name, dentry->d_name.name); + return name; +} + static void cgroup_free_fn(struct work_struct *work) { struct cgroup *cgrp = container_of(work, struct cgroup, free_work); @@ -889,6 +902,7 @@ static void cgroup_free_fn(struct work_struct *work) simple_xattrs_free(&cgrp->xattrs); ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); + kfree(rcu_dereference_raw(cgrp->name)); kfree(cgrp); } @@ -1421,6 +1435,7 @@ static void init_cgroup_root(struct cgroupfs_root *root) INIT_LIST_HEAD(&root->allcg_list); root->number_of_cgroups = 1; cgrp->root = root; + cgrp->name = &root_cgroup_name; cgrp->top_cgroup = cgrp; init_cgroup_housekeeping(cgrp); list_add_tail(&cgrp->allcg_node, &root->allcg_list); @@ -1769,49 +1784,45 @@ static struct kobject *cgroup_kobj; * @buf: the buffer to write the path into * @buflen: the length of the buffer * - * Called with cgroup_mutex held or else with an RCU-protected cgroup - * reference. Writes path of cgroup into buf. Returns 0 on success, - * -errno on error. + * Writes path of cgroup into buf. Returns 0 on success, -errno on error. + * + * We can't generate cgroup path using dentry->d_name, as accessing + * dentry->name must be protected by irq-unsafe dentry->d_lock or parent + * inode's i_mutex, while on the other hand cgroup_path() can be called + * with some irq-safe spinlocks held. */ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) { - struct dentry *dentry = cgrp->dentry; + int ret = -ENAMETOOLONG; char *start; - rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), - "cgroup_path() called without proper locking"); - - if (cgrp == dummytop) { - /* - * Inactive subsystems have no dentry for their root - * cgroup - */ - strcpy(buf, "/"); - return 0; - } - start = buf + buflen - 1; - *start = '\0'; - for (;;) { - int len = dentry->d_name.len; + rcu_read_lock(); + while (cgrp) { + const char *name = cgroup_name(cgrp); + int len; + + len = strlen(name); if ((start -= len) < buf) - return -ENAMETOOLONG; - memcpy(start, dentry->d_name.name, len); - cgrp = cgrp->parent; - if (!cgrp) - break; + goto out; + memcpy(start, name, len); - dentry = cgrp->dentry; if (!cgrp->parent) - continue; + break; + if (--start < buf) - return -ENAMETOOLONG; + goto out; *start = '/'; + + cgrp = cgrp->parent; } + ret = 0; memmove(buf, start, buf + buflen - start); - return 0; +out: + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL_GPL(cgroup_path); @@ -2537,13 +2548,40 @@ static int cgroup_file_release(struct inode *inode, struct file *file) static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { + int ret; + struct cgroup_name *name, *old_name; + struct cgroup *cgrp; + + /* + * It's convinient to use parent dir's i_mutex to protected + * cgrp->name. + */ + lockdep_assert_held(&old_dir->i_mutex); + if (!S_ISDIR(old_dentry->d_inode->i_mode)) return -ENOTDIR; if (new_dentry->d_inode) return -EEXIST; if (old_dir != new_dir) return -EIO; - return simple_rename(old_dir, old_dentry, new_dir, new_dentry); + + cgrp = __d_cgrp(old_dentry); + + name = cgroup_alloc_name(new_dentry); + if (!name) + return -ENOMEM; + + ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry); + if (ret) { + kfree(name); + return ret; + } + + old_name = cgrp->name; + rcu_assign_pointer(cgrp->name, name); + + kfree_rcu(old_name, rcu_head); + return 0; } static struct simple_xattrs *__d_xattrs(struct dentry *dentry) @@ -4158,6 +4196,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, umode_t mode) { struct cgroup *cgrp; + struct cgroup_name *name; struct cgroupfs_root *root = parent->root; int err = 0; struct cgroup_subsys *ss; @@ -4168,9 +4207,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (!cgrp) return -ENOMEM; + name = cgroup_alloc_name(dentry); + if (!name) + goto err_free_cgrp; + rcu_assign_pointer(cgrp->name, name); + cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL); if (cgrp->id < 0) - goto err_free_cgrp; + goto err_free_name; /* * Only live parents can have children. Note that the liveliness @@ -4276,6 +4320,8 @@ err_free_all: deactivate_super(sb); err_free_id: ida_simple_remove(&root->cgroup_ida, cgrp->id); +err_free_name: + kfree(rcu_dereference_raw(cgrp->name)); err_free_cgrp: kfree(cgrp); return err; -- cgit From 462fce46065ec4b200c08619c047b9e5a8fd154a Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:41:56 +0900 Subject: KVM: set_memory_region: Drop user_alloc from prepare/commit_memory_region() X86 does not use this any more. The remaining user, s390's !user_alloc check, can be simply removed since KVM_SET_MEMORY_REGION ioctl is no longer supported. Note: fixed powerpc's indentations with spaces to suppress checkpatch errors. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/arm/kvm/arm.c | 6 ++---- arch/ia64/kvm/kvm-ia64.c | 6 ++---- arch/powerpc/kvm/powerpc.c | 12 +++++------- arch/s390/kvm/kvm-s390.c | 9 ++------- arch/x86/kvm/x86.c | 6 ++---- include/linux/kvm_host.h | 6 ++---- virt/kvm/kvm_main.c | 4 ++-- 7 files changed, 17 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5a936988eb24..24cb5f66787d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -231,16 +231,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index ad3126a58644..cbc5b0417dab 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1579,8 +1579,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { unsigned long i; unsigned long pfn; @@ -1610,8 +1609,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { return; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 934413cd3a1b..22b33159fbc4 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -411,18 +411,16 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_memory_slot *memslot, + struct kvm_memory_slot old, + struct kvm_userspace_memory_region *mem) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) { kvmppc_core_commit_memory_region(kvm, mem, old); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cf35a0a79e7..07ac302ce246 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -975,8 +975,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a @@ -997,16 +996,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; - if (!user_alloc) - return -EINVAL; - return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { int rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 811c5c9c8880..26216bb4403f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6907,8 +6907,7 @@ out_free: int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int npages = memslot->npages; @@ -6938,8 +6937,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cad77fe09d77..b4757a1cc4c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -464,12 +464,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc); + struct kvm_memory_slot old); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); /* flush all memory translations */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index adc68feb5c5a..fd3037010e75 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -875,7 +875,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); + r = kvm_arch_prepare_memory_region(kvm, &new, old, mem); if (r) goto out_slots; @@ -915,7 +915,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); + kvm_arch_commit_memory_region(kvm, mem, old); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- cgit From 47ae31e257c548abdb199e0d26723139a9a967ba Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:43:00 +0900 Subject: KVM: set_memory_region: Drop user_alloc from set_memory_region() Except ia64's stale code, KVM_SET_MEMORY_REGION support, this is only used for sanity checks in __kvm_set_memory_region() which can easily be changed to use slot id instead. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 18 ------------------ arch/x86/kvm/vmx.c | 6 +++--- include/linux/kvm_host.h | 10 +++------- virt/kvm/kvm_main.c | 18 +++++++----------- 4 files changed, 13 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index cbc5b0417dab..43701f0c0f71 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -942,24 +942,6 @@ long kvm_arch_vm_ioctl(struct file *filp, int r = -ENOTTY; switch (ioctl) { - case KVM_SET_MEMORY_REGION: { - struct kvm_memory_region kvm_mem; - struct kvm_userspace_memory_region kvm_userspace_mem; - - r = -EFAULT; - if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) - goto out; - kvm_userspace_mem.slot = kvm_mem.slot; - kvm_userspace_mem.flags = kvm_mem.flags; - kvm_userspace_mem.guest_phys_addr = - kvm_mem.guest_phys_addr; - kvm_userspace_mem.memory_size = kvm_mem.memory_size; - r = kvm_vm_ioctl_set_memory_region(kvm, - &kvm_userspace_mem, false); - if (r) - goto out; - break; - } case KVM_CREATE_IRQCHIP: r = -EFAULT; r = kvm_ioapic_init(kvm); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b09ff2..58fb7c27e3b5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3694,7 +3694,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -3724,7 +3724,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -4364,7 +4364,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem, false); + ret = kvm_set_memory_region(kvm, &tss_mem); if (ret) return ret; kvm->arch.tss_addr = addr; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b4757a1cc4c4..84a994c7a5c5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -453,11 +453,9 @@ id_to_memslot(struct kvm_memslots *slots, int id) } int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); @@ -553,9 +551,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fd3037010e75..5b3e41b81f0d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -745,8 +745,7 @@ enum kvm_mr_change { * Must be called holding mmap_sem for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -767,7 +766,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; /* We can read the guest memory with __xxx_user() later on. */ - if (user_alloc && + if ((mem->slot < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok(VERIFY_WRITE, (void __user *)(unsigned long)mem->userspace_addr, @@ -932,26 +931,23 @@ out: EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem, user_alloc); + r = __kvm_set_memory_region(kvm, mem); mutex_unlock(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; - return kvm_set_memory_region(kvm, mem, user_alloc); + return kvm_set_memory_region(kvm, mem); } int kvm_get_dirty_log(struct kvm *kvm, @@ -2198,7 +2194,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); break; } case KVM_GET_DIRTY_LOG: { -- cgit From 74d0727cb7aaaea48a6353209093be26abc8d160 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:43:44 +0900 Subject: KVM: set_memory_region: Make kvm_mr_change available to arch code This will be used for cleaning up prepare/commit_memory_region() later. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 18 ++++++++++++++++++ virt/kvm/kvm_main.c | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 84a994c7a5c5..8eaf61f7b02d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -452,6 +452,24 @@ id_to_memslot(struct kvm_memslots *slots, int id) return slot; } +/* + * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: + * - create a new memory slot + * - delete an existing memory slot + * - modify an existing memory slot + * -- move it in the guest physical memory space + * -- just change its flags + * + * Since flags can be changed by some of these operations, the following + * differentiation is the best we can do for __kvm_set_memory_region(): + */ +enum kvm_mr_change { + KVM_MR_CREATE, + KVM_MR_DELETE, + KVM_MR_MOVE, + KVM_MR_FLAGS_ONLY, +}; + int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5b3e41b81f0d..c7979ed41923 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -718,24 +718,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, return old_memslots; } -/* - * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: - * - create a new memory slot - * - delete an existing memory slot - * - modify an existing memory slot - * -- move it in the guest physical memory space - * -- just change its flags - * - * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): - */ -enum kvm_mr_change { - KVM_MR_CREATE, - KVM_MR_DELETE, - KVM_MR_MOVE, - KVM_MR_FLAGS_ONLY, -}; - /* * Allocate some memory and give it an address in the guest physical address * space. -- cgit From 7b6195a91d60909a2834ab7181e2b9476e6fe749 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:44:34 +0900 Subject: KVM: set_memory_region: Refactor prepare_memory_region() This patch drops the parameter old, a copy of the old memory slot, and adds a new parameter named change to know the change being requested. This not only cleans up the code but also removes extra copying of the memory slot structure. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/arm/kvm/arm.c | 4 ++-- arch/ia64/kvm/kvm-ia64.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/x86.c | 10 ++++------ include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 2 +- 7 files changed, 15 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 24cb5f66787d..96ebab7a1959 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -230,8 +230,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return 0; } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 43701f0c0f71..5c2b07e8c3d6 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1560,8 +1560,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { unsigned long i; unsigned long pfn; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 22b33159fbc4..8aa51cd67c28 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -412,8 +412,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 07ac302ce246..4288780c86b8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -974,8 +974,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26216bb4403f..7198234fa088 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6906,23 +6906,21 @@ out_free: int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { - int npages = memslot->npages; - /* * Only private memory slots need to be mapped here since * KVM_SET_MEMORY_REGION ioctl is no longer supported. */ - if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { + if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { unsigned long userspace_addr; /* * MAP_SHARED to prevent internal slot pages from being moved * by fork()/COW. */ - userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, + userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8eaf61f7b02d..caa72cf7e8e7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -479,8 +479,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem); + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c7979ed41923..8f85bae862c7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -856,7 +856,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem); + r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); if (r) goto out_slots; -- cgit From 8482644aea11e0647867732319ccf35879a9acc2 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:45:25 +0900 Subject: KVM: set_memory_region: Refactor commit_memory_region() This patch makes the parameter old a const pointer to the old memory slot and adds a new parameter named change to know the change being requested: the former is for removing extra copying and the latter is for cleaning up the code. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/arm/kvm/arm.c | 3 ++- arch/ia64/kvm/kvm-ia64.c | 3 ++- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_hv.c | 4 ++-- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 2 +- arch/powerpc/kvm/powerpc.c | 3 ++- arch/s390/kvm/kvm-s390.c | 3 ++- arch/x86/kvm/x86.c | 15 ++++++++------- include/linux/kvm_host.h | 3 ++- virt/kvm/kvm_main.c | 2 +- 11 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 96ebab7a1959..b32dc446e802 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -238,7 +238,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5c2b07e8c3d6..7a54455dde39 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1591,7 +1591,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { return; } diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657adf416..44fa9ad1d62c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old); + const struct kvm_memory_slot *old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 80dcc53a1aba..1e521baf9a7d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1639,12 +1639,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - if (npages && old.npages) { + if (npages && old->npages) { /* * If modifying a memslot, reset all the rmap dirty bits. * If this is a new memslot, we don't need to do anything diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 5e93438afb06..286e23e6b92d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1283,7 +1283,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 020923e43134..eb88fa621073 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1531,7 +1531,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8aa51cd67c28..7b5d4d20cdc5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -420,7 +420,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { kvmppc_core_commit_memory_region(kvm, mem, old); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4288780c86b8..6cae4ad647a9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1001,7 +1001,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { int rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7198234fa088..35b491229c3a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6935,16 +6935,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { - int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; + int nr_mmu_pages = 0; - if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { + if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { int ret; - ret = vm_munmap(old.userspace_addr, - old.npages * PAGE_SIZE); + ret = vm_munmap(old->userspace_addr, + old->npages * PAGE_SIZE); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " @@ -6961,13 +6962,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Existing largepage mappings are destroyed here and new ones will * not be created until the end of the logging. */ - if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) kvm_mmu_slot_remove_write_access(kvm, mem->slot); /* * If memory slot is created, or moved, we need to clear all * mmio sptes. */ - if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { kvm_mmu_zap_all(kvm); kvm_reload_remote_mmus(kvm); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index caa72cf7e8e7..ac584cc53581 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -483,7 +483,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old); + const struct kvm_memory_slot *old, + enum kvm_mr_change change); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); /* flush all memory translations */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8f85bae862c7..0e919a1d4d56 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -896,7 +896,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old); + kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- cgit From 51dcdafcb720a9d1fd73b597d0ccf48837abc59f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 5 Mar 2013 14:16:00 +0800 Subject: regulator: core: Add enable_is_inverted flag to indicate set enable_mask bits to disable Add enable_is_inverted flag to indicate set enable_mask bits to disable when using regulator_enable_regmap and friends APIs. Signed-off-by: Axel Lin Reviewed-by: Haojian Zhuang Signed-off-by: Mark Brown --- drivers/regulator/core.c | 24 ++++++++++++++++++++---- include/linux/regulator/driver.h | 3 +++ 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 154bc8f0c1a0..d887b9f5b213 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1794,7 +1794,10 @@ int regulator_is_enabled_regmap(struct regulator_dev *rdev) if (ret != 0) return ret; - return (val & rdev->desc->enable_mask) != 0; + if (rdev->desc->enable_is_inverted) + return (val & rdev->desc->enable_mask) == 0; + else + return (val & rdev->desc->enable_mask) != 0; } EXPORT_SYMBOL_GPL(regulator_is_enabled_regmap); @@ -1809,9 +1812,15 @@ EXPORT_SYMBOL_GPL(regulator_is_enabled_regmap); */ int regulator_enable_regmap(struct regulator_dev *rdev) { + unsigned int val; + + if (rdev->desc->enable_is_inverted) + val = 0; + else + val = rdev->desc->enable_mask; + return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, - rdev->desc->enable_mask, - rdev->desc->enable_mask); + rdev->desc->enable_mask, val); } EXPORT_SYMBOL_GPL(regulator_enable_regmap); @@ -1826,8 +1835,15 @@ EXPORT_SYMBOL_GPL(regulator_enable_regmap); */ int regulator_disable_regmap(struct regulator_dev *rdev) { + unsigned int val; + + if (rdev->desc->enable_is_inverted) + val = rdev->desc->enable_mask; + else + val = 0; + return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, - rdev->desc->enable_mask, 0); + rdev->desc->enable_mask, val); } EXPORT_SYMBOL_GPL(regulator_disable_regmap); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 7df93f52db08..07ea8f1a127e 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -199,6 +199,8 @@ enum regulator_type { * output when using regulator_set_voltage_sel_regmap * @enable_reg: Register for control when using regmap enable/disable ops * @enable_mask: Mask for control when using regmap enable/disable ops + * @enable_is_inverted: A flag to indicate set enable_mask bits to disable + * when using regulator_enable_regmap and friends APIs. * @bypass_reg: Register for control when using regmap set_bypass * @bypass_mask: Mask for control when using regmap set_bypass * @@ -228,6 +230,7 @@ struct regulator_desc { unsigned int apply_bit; unsigned int enable_reg; unsigned int enable_mask; + bool enable_is_inverted; unsigned int bypass_reg; unsigned int bypass_mask; -- cgit From 7d8e0bf56a66bab08d2f316dd87e56c08cecb899 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 10:57:03 +0800 Subject: cgroup: avoid accessing modular cgroup subsys structure without locking subsys[i] is set to NULL in cgroup_unload_subsys() at modular unload, and that's protected by cgroup_mutex, and then the memory *subsys[i] resides will be freed. So this is unsafe without any locking: if (!ss || ss->module) ... v2: - add a comment for enum cgroup_subsys_id - simplify the comment in cgroup_exit() Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 17 ++++++++++++++--- kernel/cgroup.c | 28 ++++++++++++++-------------- 2 files changed, 28 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 75c6ec1ba1ba..5f76829dd75e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -44,14 +44,25 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss); extern const struct file_operations proc_cgroup_operations; -/* Define the enumeration of all builtin cgroup subsystems */ +/* + * Define the enumeration of all cgroup subsystems. + * + * We define ids for builtin subsystems and then modular ones. + */ #define SUBSYS(_x) _x ## _subsys_id, -#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option) enum cgroup_subsys_id { +#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) +#include +#undef IS_SUBSYS_ENABLED + CGROUP_BUILTIN_SUBSYS_COUNT, + + __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1, + +#define IS_SUBSYS_ENABLED(option) IS_MODULE(option) #include +#undef IS_SUBSYS_ENABLED CGROUP_SUBSYS_COUNT, }; -#undef IS_SUBSYS_ENABLED #undef SUBSYS /* Per-subsystem/per-cgroup state maintained by the system. */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9df799d5d31c..7a6c4c72ca55 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4940,17 +4940,17 @@ void cgroup_post_fork(struct task_struct *child) * and addition to css_set. */ if (need_forkexit_callback) { - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + /* + * fork/exit callbacks are supported only for builtin + * subsystems, and the builtin section of the subsys + * array is immutable, so we don't need to lock the + * subsys array here. On the other hand, modular section + * of the array can be freed at module unload, so we + * can't touch that. + */ + for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; - /* - * fork/exit callbacks are supported only for - * builtin subsystems and we don't need further - * synchronization as they never go away. - */ - if (!ss || ss->module) - continue; - if (ss->fork) ss->fork(child); } @@ -5015,13 +5015,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) tsk->cgroups = &init_css_set; if (run_callbacks && need_forkexit_callback) { - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + /* + * fork/exit callbacks are supported only for builtin + * subsystems, see cgroup_post_fork() for details. + */ + for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; - /* modular subsystems can't use callbacks */ - if (!ss || ss->module) - continue; - if (ss->exit) { struct cgroup *old_cgrp = rcu_dereference_raw(cg->subsys[i])->cgroup; -- cgit From 9259826ccb8165f797e4c2c9d17925b41af5f6ae Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 11:37:50 +0800 Subject: res_counter: remove include of cgroup.h from res_counter.h It's not needed at all. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/res_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 5ae8456d9670..a83a849bf1d3 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -13,7 +13,7 @@ * info about what this counter is. */ -#include +#include /* * The core object. the cgroup that wishes to account for some -- cgit From ff794dea52eaaa09017efea688a1d7f92ab0818e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 11:37:56 +0800 Subject: cpuset: remove include of cgroup.h from cpuset.h We don't need to include cgroup.h in cpuset.h. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8c8a60d29407..ccd1de8ad822 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #ifdef CONFIG_CPUSETS -- cgit From e1fd1f490fa4213bd3060efa823a39d299538f72 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Mar 2013 15:04:55 -0500 Subject: get rid of union semop in sys_semctl(2) arguments just have the bugger take unsigned long and deal with SETVAL case (when we use an int member in the union) explicitly. Signed-off-by: Al Viro --- arch/parisc/kernel/sys_parisc32.c | 15 ----- arch/parisc/kernel/syscall_table.S | 2 +- arch/sparc/kernel/sys_sparc_64.c | 2 +- include/linux/syscalls.h | 2 +- ipc/compat.c | 14 +++-- ipc/sem.c | 121 +++++++++++++++++++++++-------------- ipc/syscall.c | 6 +- 7 files changed, 91 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 46bdf6080fe4..f517e08e7f0d 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -60,21 +60,6 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, return -ENOSYS; } -asmlinkage long sys32_semctl(int semid, int semnum, int cmd, union semun arg) -{ - union semun u; - - if (cmd == SETVAL) { - /* Ugh. arg is a union of int,ptr,ptr,ptr, so is 8 bytes. - * The int should be in the first 4, but our argument - * frobbing has left it in the last 4. - */ - u.val = *((int *)&arg + 1); - return sys_semctl (semid, semnum, cmd, u); - } - return sys_semctl (semid, semnum, cmd, arg); -} - asmlinkage long compat_sys_fanotify_mark(int fan_fd, int flags, u32 mask_hi, u32 mask_lo, int fd, const char __user *pathname) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 30c9a3bba1cc..0c9107285e66 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -282,7 +282,7 @@ ENTRY_COMP(recvmsg) ENTRY_SAME(semop) /* 185 */ ENTRY_SAME(semget) - ENTRY_DIFF(semctl) + ENTRY_COMP(semctl) ENTRY_COMP(msgsnd) ENTRY_COMP(msgrcv) ENTRY_SAME(msgget) /* 190 */ diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 42beb6fc4ad8..2daaaa6eda23 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -353,7 +353,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second case SEMCTL: { err = sys_semctl(first, second, (int)third | IPC_64, - (union semun) ptr); + (unsigned long) ptr); goto out; } default: diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9660a8bdcbbe..65c001f7fa0b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -657,7 +657,7 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_semget(key_t key, int nsems, int semflg); asmlinkage long sys_semop(int semid, struct sembuf __user *sops, unsigned nsops); -asmlinkage long sys_semctl(int semid, int semnum, int cmd, union semun arg); +asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg); asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct timespec __user *timeout); diff --git a/ipc/compat.c b/ipc/compat.c index 6cb6a4df86e4..892f6585dd60 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -240,7 +240,7 @@ static inline int put_compat_semid_ds(struct semid64_ds *s, static long do_compat_semctl(int first, int second, int third, u32 pad) { - union semun fourth; + unsigned long fourth; int err, err2; struct semid64_ds s64; struct semid64_ds __user *up64; @@ -249,9 +249,13 @@ static long do_compat_semctl(int first, int second, int third, u32 pad) memset(&s64, 0, sizeof(s64)); if ((third & (~IPC_64)) == SETVAL) - fourth.val = (int) pad; +#ifdef __BIG_ENDIAN + fourth = (unsigned long)pad << 32; +#else + fourth = pad; +#endif else - fourth.__pad = compat_ptr(pad); + fourth = (unsigned long)compat_ptr(pad); switch (third & (~IPC_64)) { case IPC_INFO: case IPC_RMID: @@ -269,7 +273,7 @@ static long do_compat_semctl(int first, int second, int third, u32 pad) case IPC_STAT: case SEM_STAT: up64 = compat_alloc_user_space(sizeof(s64)); - fourth.__pad = up64; + fourth = (unsigned long)up64; err = sys_semctl(first, second, third, fourth); if (err < 0) break; @@ -295,7 +299,7 @@ static long do_compat_semctl(int first, int second, int third, u32 pad) if (err) break; - fourth.__pad = up64; + fourth = (unsigned long)up64; err = sys_semctl(first, second, third, fourth); break; diff --git a/ipc/sem.c b/ipc/sem.c index e7236df7a470..5b167d00efa6 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -799,7 +799,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, } static int semctl_nolock(struct ipc_namespace *ns, int semid, - int cmd, int version, union semun arg) + int cmd, int version, void __user *p) { int err; struct sem_array *sma; @@ -834,7 +834,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, } max_id = ipc_get_maxid(&sem_ids(ns)); up_read(&sem_ids(ns).rw_mutex); - if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) + if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; return (max_id < 0) ? 0: max_id; } @@ -871,7 +871,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, tbuf.sem_ctime = sma->sem_ctime; tbuf.sem_nsems = sma->sem_nsems; sem_unlock(sma); - if (copy_semid_to_user (arg.buf, &tbuf, version)) + if (copy_semid_to_user(p, &tbuf, version)) return -EFAULT; return id; } @@ -883,8 +883,67 @@ out_unlock: return err; } +static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, + unsigned long arg) +{ + struct sem_undo *un; + struct sem_array *sma; + struct sem* curr; + int err; + int nsems; + struct list_head tasks; + int val; +#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) + /* big-endian 64bit */ + val = arg >> 32; +#else + /* 32bit or little-endian 64bit */ + val = arg; +#endif + + sma = sem_lock_check(ns, semid); + if (IS_ERR(sma)) + return PTR_ERR(sma); + + INIT_LIST_HEAD(&tasks); + nsems = sma->sem_nsems; + + err = -EACCES; + if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) + goto out_unlock; + + err = security_sem_semctl(sma, SETVAL); + if (err) + goto out_unlock; + + err = -EINVAL; + if(semnum < 0 || semnum >= nsems) + goto out_unlock; + + curr = &sma->sem_base[semnum]; + + err = -ERANGE; + if (val > SEMVMX || val < 0) + goto out_unlock; + + assert_spin_locked(&sma->sem_perm.lock); + list_for_each_entry(un, &sma->list_id, list_id) + un->semadj[semnum] = 0; + + curr->semval = val; + curr->sempid = task_tgid_vnr(current); + sma->sem_ctime = get_seconds(); + /* maybe some queued-up processes were waiting for this */ + do_smart_update(sma, NULL, 0, 0, &tasks); + err = 0; +out_unlock: + sem_unlock(sma); + wake_up_sem_queue_do(&tasks); + return err; +} + static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, - int cmd, int version, union semun arg) + int cmd, void __user *p) { struct sem_array *sma; struct sem* curr; @@ -903,7 +962,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, err = -EACCES; if (ipcperms(ns, &sma->sem_perm, - (cmd == SETVAL || cmd == SETALL) ? S_IWUGO : S_IRUGO)) + cmd == SETALL ? S_IWUGO : S_IRUGO)) goto out_unlock; err = security_sem_semctl(sma, cmd); @@ -914,7 +973,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, switch (cmd) { case GETALL: { - ushort __user *array = arg.array; + ushort __user *array = p; int i; if(nsems > SEMMSL_FAST) { @@ -957,7 +1016,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, } } - if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { + if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { sem_putref(sma); err = -EFAULT; goto out_free; @@ -991,7 +1050,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, err = 0; goto out_unlock; } - /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ + /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ } err = -EINVAL; if(semnum < 0 || semnum >= nsems) @@ -1012,27 +1071,6 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, case GETZCNT: err = count_semzcnt(sma,semnum); goto out_unlock; - case SETVAL: - { - int val = arg.val; - struct sem_undo *un; - - err = -ERANGE; - if (val > SEMVMX || val < 0) - goto out_unlock; - - assert_spin_locked(&sma->sem_perm.lock); - list_for_each_entry(un, &sma->list_id, list_id) - un->semadj[semnum] = 0; - - curr->semval = val; - curr->sempid = task_tgid_vnr(current); - sma->sem_ctime = get_seconds(); - /* maybe some queued-up processes were waiting for this */ - do_smart_update(sma, NULL, 0, 0, &tasks); - err = 0; - goto out_unlock; - } } out_unlock: sem_unlock(sma); @@ -1076,7 +1114,7 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) * NOTE: no locks must be held, the rw_mutex is taken inside this function. */ static int semctl_down(struct ipc_namespace *ns, int semid, - int cmd, int version, union semun arg) + int cmd, int version, void __user *p) { struct sem_array *sma; int err; @@ -1084,7 +1122,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, struct kern_ipc_perm *ipcp; if(cmd == IPC_SET) { - if (copy_semid_from_user(&semid64, arg.buf, version)) + if (copy_semid_from_user(&semid64, p, version)) return -EFAULT; } @@ -1120,11 +1158,11 @@ out_up: return err; } -SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg) +SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) { - int err = -EINVAL; int version; struct ipc_namespace *ns; + void __user *p = (void __user *)arg; if (semid < 0) return -EINVAL; @@ -1137,30 +1175,23 @@ SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg) case SEM_INFO: case IPC_STAT: case SEM_STAT: - err = semctl_nolock(ns, semid, cmd, version, arg); - return err; + return semctl_nolock(ns, semid, cmd, version, p); case GETALL: case GETVAL: case GETPID: case GETNCNT: case GETZCNT: - case SETVAL: case SETALL: - err = semctl_main(ns,semid,semnum,cmd,version,arg); - return err; + return semctl_main(ns, semid, semnum, cmd, p); + case SETVAL: + return semctl_setval(ns, semid, semnum, arg); case IPC_RMID: case IPC_SET: - err = semctl_down(ns, semid, cmd, version, arg); - return err; + return semctl_down(ns, semid, cmd, version, p); default: return -EINVAL; } } -asmlinkage long SyS_semctl(int semid, int semnum, int cmd, union semun arg) -{ - return SYSC_semctl((int) semid, (int) semnum, (int) cmd, arg); -} -SYSCALL_ALIAS(sys_semctl, SyS_semctl); /* If the task doesn't already have a undo_list, then allocate one * here. We guarantee there is only one thread using this undo list, diff --git a/ipc/syscall.c b/ipc/syscall.c index 0d1e32ce048e..52429489cde0 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -33,12 +33,12 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, case SEMGET: return sys_semget(first, second, third); case SEMCTL: { - union semun fourth; + unsigned long arg; if (!ptr) return -EINVAL; - if (get_user(fourth.__pad, (void __user * __user *) ptr)) + if (get_user(arg, (unsigned long __user *) ptr)) return -EFAULT; - return sys_semctl(first, second, third, fourth); + return sys_semctl(first, second, third, arg); } case MSGSND: -- cgit From 99e621f796d7f0341a51e8cdf32b81663b10b448 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Mar 2013 15:36:40 -0500 Subject: syscalls.h: slightly reduce the jungles of macros a) teach __MAP(num, m, ) to take empty list (with num being 0, of course) b) fold types__... and args__... declaration and initialization into SYSCALL_METADATA(num, ...), making their use conditional on num != 0. That allows to use the SYSCALL_METADATA instead of its near-duplicate in SYSCALL_DEFINE0. c) make SYSCALL_METADATA expand to nothing in case if CONFIG_FTRACE_SYSCALLS is not defined; that allows to make SYSCALL_DEFINE0 and SYSCALL_DEFINEx definitions independent from CONFIG_FTRACE_SYSCALLS. d) kill SYSCALL_DEFINE - no users left (SYSCALL_DEFINE[0-6] is, of course, still alive and well). Signed-off-by: Al Viro --- include/linux/syscalls.h | 49 +++++++++++++++--------------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65c001f7fa0b..4147d700a293 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -87,6 +87,7 @@ struct sigaltstack; * of __MAP starting at the third one) is in the same format as * for SYSCALL_DEFINE/COMPAT_SYSCALL_DEFINE */ +#define __MAP0(m,...) #define __MAP1(m,t,a) m(t,a) #define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__) #define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__) @@ -139,7 +140,13 @@ extern struct trace_event_functions exit_syscall_print_funcs; __attribute__((section("_ftrace_events"))) \ *__event_exit_##sname = &event_exit_##sname; -#define SYSCALL_METADATA(sname, nb) \ +#define SYSCALL_METADATA(sname, nb, ...) \ + static const char *types_##sname[] = { \ + __MAP(nb,__SC_STR_TDECL,__VA_ARGS__) \ + }; \ + static const char *args_##sname[] = { \ + __MAP(nb,__SC_STR_ADECL,__VA_ARGS__) \ + }; \ SYSCALL_TRACE_ENTER_EVENT(sname); \ SYSCALL_TRACE_EXIT_EVENT(sname); \ static struct syscall_metadata __used \ @@ -147,8 +154,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .name = "sys"#sname, \ .syscall_nr = -1, /* Filled in at boot */ \ .nb_args = nb, \ - .types = types_##sname, \ - .args = args_##sname, \ + .types = nb ? types_##sname : NULL, \ + .args = nb ? args_##sname : NULL, \ .enter_event = &event_enter_##sname, \ .exit_event = &event_exit_##sname, \ .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ @@ -156,26 +163,13 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __used \ __attribute__((section("__syscalls_metadata"))) \ *__p_syscall_meta_##sname = &__syscall_meta_##sname; +#else +#define SYSCALL_METADATA(sname, nb, ...) +#endif #define SYSCALL_DEFINE0(sname) \ - SYSCALL_TRACE_ENTER_EVENT(_##sname); \ - SYSCALL_TRACE_EXIT_EVENT(_##sname); \ - static struct syscall_metadata __used \ - __syscall_meta__##sname = { \ - .name = "sys_"#sname, \ - .syscall_nr = -1, /* Filled in at boot */ \ - .nb_args = 0, \ - .enter_event = &event_enter__##sname, \ - .exit_event = &event_exit__##sname, \ - .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \ - }; \ - static struct syscall_metadata __used \ - __attribute__((section("__syscalls_metadata"))) \ - *__p_syscall_meta_##sname = &__syscall_meta__##sname; \ + SYSCALL_METADATA(_##sname, 0); \ asmlinkage long sys_##sname(void) -#else -#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) -#endif #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) @@ -184,22 +178,9 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) -#ifdef CONFIG_FTRACE_SYSCALLS #define SYSCALL_DEFINEx(x, sname, ...) \ - static const char *types_##sname[] = { \ - __MAP(x,__SC_STR_TDECL,__VA_ARGS__) \ - }; \ - static const char *args_##sname[] = { \ - __MAP(x,__SC_STR_ADECL,__VA_ARGS__) \ - }; \ - SYSCALL_METADATA(sname, x); \ + SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) -#else -#define SYSCALL_DEFINEx(x, sname, ...) \ - __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) -#endif - -#define SYSCALL_DEFINE(name) static inline long SYSC_##name #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) #define __SYSCALL_DEFINEx(x, name, ...) \ -- cgit From a0f155e9646d5f1c263f6f9aae880151100243bb Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 28 Feb 2013 12:33:18 +0100 Subject: KVM: Initialize irqfd from kvm_init(). Currently, eventfd introduces module_init/module_exit functions to initialize/cleanup the irqfd workqueue. This only works, however, if no other module_init/module_exit functions are built into the same module. Let's just move the initialization and cleanup to kvm_init and kvm_exit. This way, it is also clearer where kvm startup may fail. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 13 +++++++++++++ virt/kvm/eventfd.c | 7 ++----- virt/kvm/kvm_main.c | 6 ++++++ 3 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ac584cc53581..d50fe173028b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -424,6 +424,19 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); +#ifdef __KVM_HAVE_IOAPIC +int kvm_irqfd_init(void); +void kvm_irqfd_exit(void); +#else +static inline int kvm_irqfd_init(void) +{ + return 0; +} + +static inline void kvm_irqfd_exit(void) +{ +} +#endif int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index adb17f266b28..0b6fe69bb03d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -543,7 +543,7 @@ void kvm_irq_routing_update(struct kvm *kvm, * aggregated from all vm* instances. We need our own isolated single-thread * queue to prevent deadlock against flushing the normal work-queue. */ -static int __init irqfd_module_init(void) +int kvm_irqfd_init(void) { irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); if (!irqfd_cleanup_wq) @@ -552,13 +552,10 @@ static int __init irqfd_module_init(void) return 0; } -static void __exit irqfd_module_exit(void) +void kvm_irqfd_exit(void) { destroy_workqueue(irqfd_cleanup_wq); } - -module_init(irqfd_module_init); -module_exit(irqfd_module_exit); #endif /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0e919a1d4d56..faf05bddd131 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2898,6 +2898,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, int r; int cpu; + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; r = kvm_arch_init(opaque); if (r) goto out_fail; @@ -2978,6 +2981,8 @@ out_free_0a: out_free_0: kvm_arch_exit(); out_fail: + kvm_irqfd_exit(); +out_irqfd: return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -2994,6 +2999,7 @@ void kvm_exit(void) on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); + kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled); } EXPORT_SYMBOL_GPL(kvm_exit); -- cgit From 060f0ce6ff975decd1e0ee318c08e228bccbee1e Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 28 Feb 2013 12:33:19 +0100 Subject: KVM: Introduce KVM_VIRTIO_CCW_NOTIFY_BUS. Add a new bus type for virtio-ccw devices on s390. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d50fe173028b..9fa13ebc3381 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -149,6 +149,7 @@ struct kvm_io_bus { enum kvm_bus { KVM_MMIO_BUS, KVM_PIO_BUS, + KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_NR_BUSES }; -- cgit From 82dc3c63c692b1e1d59378ecee948ac88e034aad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Mar 2013 15:57:22 +0000 Subject: net: introduce NAPI_POLL_WEIGHT Some drivers use a too big NAPI poll weight. This patch adds a NAPI_POLL_WEIGHT default value and issues an error message if a driver attempts to use a bigger weight. Signed-off-by: Eric Dumazet Cc: Eilon Greenstein Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ net/core/dev.c | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3d00fa4b314..896eb4985f97 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1475,6 +1475,11 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) +/* Default NAPI poll() weight + * Device drivers are strongly advised to not use bigger value + */ +#define NAPI_POLL_WEIGHT 64 + /** * netif_napi_add - initialize a napi context * @dev: network device diff --git a/net/core/dev.c b/net/core/dev.c index a06a7a58dd11..96103894ad69 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4057,6 +4057,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->gro_list = NULL; napi->skb = NULL; napi->poll = poll; + if (weight > NAPI_POLL_WEIGHT) + pr_err_once("netif_napi_add() called with weight %d on device %s\n", + weight, dev->name); napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; -- cgit From 1fd9c467b4f7e08beee41f9771396f39265f4c08 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Mar 2013 11:51:55 +0800 Subject: mfd: arizona: Define additional FLL control registers Signed-off-by: Mark Brown --- include/linux/mfd/arizona/registers.h | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index 340355136069..a61ce90ecd3f 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -85,12 +85,14 @@ #define ARIZONA_FLL1_CONTROL_6 0x176 #define ARIZONA_FLL1_LOOP_FILTER_TEST_1 0x177 #define ARIZONA_FLL1_NCO_TEST_0 0x178 +#define ARIZONA_FLL1_CONTROL_7 0x179 #define ARIZONA_FLL1_SYNCHRONISER_1 0x181 #define ARIZONA_FLL1_SYNCHRONISER_2 0x182 #define ARIZONA_FLL1_SYNCHRONISER_3 0x183 #define ARIZONA_FLL1_SYNCHRONISER_4 0x184 #define ARIZONA_FLL1_SYNCHRONISER_5 0x185 #define ARIZONA_FLL1_SYNCHRONISER_6 0x186 +#define ARIZONA_FLL1_SYNCHRONISER_7 0x187 #define ARIZONA_FLL1_SPREAD_SPECTRUM 0x189 #define ARIZONA_FLL1_GPIO_CLOCK 0x18A #define ARIZONA_FLL2_CONTROL_1 0x191 @@ -101,12 +103,14 @@ #define ARIZONA_FLL2_CONTROL_6 0x196 #define ARIZONA_FLL2_LOOP_FILTER_TEST_1 0x197 #define ARIZONA_FLL2_NCO_TEST_0 0x198 +#define ARIZONA_FLL2_CONTROL_7 0x199 #define ARIZONA_FLL2_SYNCHRONISER_1 0x1A1 #define ARIZONA_FLL2_SYNCHRONISER_2 0x1A2 #define ARIZONA_FLL2_SYNCHRONISER_3 0x1A3 #define ARIZONA_FLL2_SYNCHRONISER_4 0x1A4 #define ARIZONA_FLL2_SYNCHRONISER_5 0x1A5 #define ARIZONA_FLL2_SYNCHRONISER_6 0x1A6 +#define ARIZONA_FLL2_SYNCHRONISER_7 0x1A7 #define ARIZONA_FLL2_SPREAD_SPECTRUM 0x1A9 #define ARIZONA_FLL2_GPIO_CLOCK 0x1AA #define ARIZONA_MIC_CHARGE_PUMP_1 0x200 @@ -1677,6 +1681,13 @@ #define ARIZONA_FLL1_FRC_INTEG_VAL_SHIFT 0 /* FLL1_FRC_INTEG_VAL - [11:0] */ #define ARIZONA_FLL1_FRC_INTEG_VAL_WIDTH 12 /* FLL1_FRC_INTEG_VAL - [11:0] */ +/* + * R377 (0x179) - FLL1 Control 7 + */ +#define ARIZONA_FLL1_GAIN_MASK 0x003c /* FLL1_GAIN */ +#define ARIZONA_FLL1_GAIN_SHIFT 2 /* FLL1_GAIN */ +#define ARIZONA_FLL1_GAIN_WIDTH 4 /* FLL1_GAIN */ + /* * R385 (0x181) - FLL1 Synchroniser 1 */ @@ -1723,6 +1734,17 @@ #define ARIZONA_FLL1_CLK_SYNC_SRC_SHIFT 0 /* FLL1_CLK_SYNC_SRC - [3:0] */ #define ARIZONA_FLL1_CLK_SYNC_SRC_WIDTH 4 /* FLL1_CLK_SYNC_SRC - [3:0] */ +/* + * R391 (0x187) - FLL1 Synchroniser 7 + */ +#define ARIZONA_FLL1_SYNC_GAIN_MASK 0x003c /* FLL1_SYNC_GAIN */ +#define ARIZONA_FLL1_SYNC_GAIN_SHIFT 2 /* FLL1_SYNC_GAIN */ +#define ARIZONA_FLL1_SYNC_GAIN_WIDTH 4 /* FLL1_SYNC_GAIN */ +#define ARIZONA_FLL1_SYNC_BW 0x0001 /* FLL1_SYNC_BW */ +#define ARIZONA_FLL1_SYNC_BW_MASK 0x0001 /* FLL1_SYNC_BW */ +#define ARIZONA_FLL1_SYNC_BW_SHIFT 0 /* FLL1_SYNC_BW */ +#define ARIZONA_FLL1_SYNC_BW_WIDTH 1 /* FLL1_SYNC_BW */ + /* * R393 (0x189) - FLL1 Spread Spectrum */ @@ -1815,6 +1837,13 @@ #define ARIZONA_FLL2_FRC_INTEG_VAL_SHIFT 0 /* FLL2_FRC_INTEG_VAL - [11:0] */ #define ARIZONA_FLL2_FRC_INTEG_VAL_WIDTH 12 /* FLL2_FRC_INTEG_VAL - [11:0] */ +/* + * R409 (0x199) - FLL2 Control 7 + */ +#define ARIZONA_FLL2_GAIN_MASK 0x003c /* FLL2_GAIN */ +#define ARIZONA_FLL2_GAIN_SHIFT 2 /* FLL2_GAIN */ +#define ARIZONA_FLL2_GAIN_WIDTH 4 /* FLL2_GAIN */ + /* * R417 (0x1A1) - FLL2 Synchroniser 1 */ @@ -1861,6 +1890,17 @@ #define ARIZONA_FLL2_CLK_SYNC_SRC_SHIFT 0 /* FLL2_CLK_SYNC_SRC - [3:0] */ #define ARIZONA_FLL2_CLK_SYNC_SRC_WIDTH 4 /* FLL2_CLK_SYNC_SRC - [3:0] */ +/* + * R423 (0x1A7) - FLL2 Synchroniser 7 + */ +#define ARIZONA_FLL2_SYNC_GAIN_MASK 0x003c /* FLL2_SYNC_GAIN */ +#define ARIZONA_FLL2_SYNC_GAIN_SHIFT 2 /* FLL2_SYNC_GAIN */ +#define ARIZONA_FLL2_SYNC_GAIN_WIDTH 4 /* FLL2_SYNC_GAIN */ +#define ARIZONA_FLL2_SYNC_BW_MASK 0x0001 /* FLL2_SYNC_BW */ +#define ARIZONA_FLL2_SYNC_BW_MASK 0x0001 /* FLL2_SYNC_BW */ +#define ARIZONA_FLL2_SYNC_BW_SHIFT 0 /* FLL2_SYNC_BW */ +#define ARIZONA_FLL2_SYNC_BW_WIDTH 1 /* FLL2_SYNC_BW */ + /* * R425 (0x1A9) - FLL2 Spread Spectrum */ -- cgit From 19a37d1cd5465c10d669a296a2ea24b4c985363b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:05:28 +0800 Subject: sched: Remove some dummy functions No one will call those functions if CONFIG_SCHED_DEBUG=n. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A748.3050206@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..2715fbb9ea85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -127,18 +127,6 @@ extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); -#else -static inline void -proc_sched_show_task(struct task_struct *p, struct seq_file *m) -{ -} -static inline void proc_sched_set_task(struct task_struct *p) -{ -} -static inline void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) -{ -} #endif /* -- cgit From 090b582f27ac7b6714661020033160130e5297bd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:05:51 +0800 Subject: sched: Remove test_sd_parent() It's unused. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A75F.4070202@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2715fbb9ea85..e880d7d115ef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -959,15 +959,6 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); -/* Test a flag in parent sched domain */ -static inline int test_sd_parent(struct sched_domain *sd, int flag) -{ - if (sd->parent && (sd->parent->flags & flag)) - return 1; - - return 0; -} - unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); -- cgit From cc1f4b1f3faed9f2040eff2a75f510b424b3cf18 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:09 +0800 Subject: sched: Move SCHED_LOAD_SHIFT macros to kernel/sched/sched.h They are used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A771.4070104@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 25 ------------------------- kernel/sched/sched.h | 26 +++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e880d7d115ef..f8826d04fb12 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -755,31 +755,6 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; -/* - * Increase resolution of nice-level calculations for 64-bit architectures. - * The extra resolution improves shares distribution and load balancing of - * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup - * hierarchies, especially on larger systems. This is not a user-visible change - * and does not change the user-interface for setting shares/weights. - * - * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution - * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the - * increased costs. - */ -#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ -# define SCHED_LOAD_RESOLUTION 10 -# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) -# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) -#else -# define SCHED_LOAD_RESOLUTION 0 -# define scale_load(w) (w) -# define scale_load_down(w) (w) -#endif - -#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) -#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) - /* * Increase resolution of cpu_power calculations */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index cc03cfdf469f..709a30cdfd85 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -33,6 +33,31 @@ extern __read_mostly int scheduler_running; */ #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) +/* + * Increase resolution of nice-level calculations for 64-bit architectures. + * The extra resolution improves shares distribution and load balancing of + * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup + * hierarchies, especially on larger systems. This is not a user-visible change + * and does not change the user-interface for setting shares/weights. + * + * We increase resolution only if we have enough bits to allow this increased + * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution + * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the + * increased costs. + */ +#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ +# define SCHED_LOAD_RESOLUTION 10 +# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) +# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) +#else +# define SCHED_LOAD_RESOLUTION 0 +# define scale_load(w) (w) +# define scale_load_down(w) (w) +#endif + +#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) +#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) + #define NICE_0_LOAD SCHED_LOAD_SCALE #define NICE_0_SHIFT SCHED_LOAD_SHIFT @@ -784,7 +809,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) } #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ - static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; -- cgit From 5e6521eaa1ee581a13b904f35b80c5efeb2baccb Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:23 +0800 Subject: sched: Move struct sched_group to kernel/sched/sched.h Move struct sched_group_power and sched_group and related inline functions to kernel/sched/sched.h, as they are used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A77F.2010705@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 58 ++------------------------------------------------- kernel/sched/sched.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8826d04fb12..0d641304c0ff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -780,62 +780,6 @@ enum cpu_idle_type { extern int __weak arch_sd_sibiling_asym_packing(void); -struct sched_group_power { - atomic_t ref; - /* - * CPU power of this group, SCHED_LOAD_SCALE being max power for a - * single CPU. - */ - unsigned int power, power_orig; - unsigned long next_update; - /* - * Number of busy cpus in this group. - */ - atomic_t nr_busy_cpus; - - unsigned long cpumask[0]; /* iteration mask */ -}; - -struct sched_group { - struct sched_group *next; /* Must be a circular list */ - atomic_t ref; - - unsigned int group_weight; - struct sched_group_power *sgp; - - /* - * The CPUs this group covers. - * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ - unsigned long cpumask[0]; -}; - -static inline struct cpumask *sched_group_cpus(struct sched_group *sg) -{ - return to_cpumask(sg->cpumask); -} - -/* - * cpumask masking which cpus in the group are allowed to iterate up the domain - * tree. - */ -static inline struct cpumask *sched_group_mask(struct sched_group *sg) -{ - return to_cpumask(sg->sgp->cpumask); -} - -/** - * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. - * @group: The group whose first cpu is to be returned. - */ -static inline unsigned int group_first_cpu(struct sched_group *group) -{ - return cpumask_first(sched_group_cpus(group)); -} - struct sched_domain_attr { int relax_domain_level; }; @@ -846,6 +790,8 @@ struct sched_domain_attr { extern int sched_domain_level_max; +struct sched_group; + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 709a30cdfd85..1a4a2b19c2f4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -572,6 +572,62 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(int, sd_llc_id); +struct sched_group_power { + atomic_t ref; + /* + * CPU power of this group, SCHED_LOAD_SCALE being max power for a + * single CPU. + */ + unsigned int power, power_orig; + unsigned long next_update; + /* + * Number of busy cpus in this group. + */ + atomic_t nr_busy_cpus; + + unsigned long cpumask[0]; /* iteration mask */ +}; + +struct sched_group { + struct sched_group *next; /* Must be a circular list */ + atomic_t ref; + + unsigned int group_weight; + struct sched_group_power *sgp; + + /* + * The CPUs this group covers. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + */ + unsigned long cpumask[0]; +}; + +static inline struct cpumask *sched_group_cpus(struct sched_group *sg) +{ + return to_cpumask(sg->cpumask); +} + +/* + * cpumask masking which cpus in the group are allowed to iterate up the domain + * tree. + */ +static inline struct cpumask *sched_group_mask(struct sched_group *sg) +{ + return to_cpumask(sg->sgp->cpumask); +} + +/** + * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. + * @group: The group whose first cpu is to be returned. + */ +static inline unsigned int group_first_cpu(struct sched_group *group) +{ + return cpumask_first(sched_group_cpus(group)); +} + extern int group_balance_cpu(struct sched_group *sg); #endif /* CONFIG_SMP */ -- cgit From b13095f07f25464de65f5ce5ea94e16813d67488 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:38 +0800 Subject: sched: Move wake flags to kernel/sched/sched.h They are used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A78E.7040609@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ------- kernel/sched/sched.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0d641304c0ff..863b505ac48e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -920,13 +920,6 @@ struct uts_namespace; struct rq; struct sched_domain; -/* - * wake flags - */ -#define WF_SYNC 0x01 /* waker goes to sleep after wakup */ -#define WF_FORK 0x02 /* child wakeup after fork */ -#define WF_MIGRATED 0x04 /* internal use, task got migrated */ - #define ENQUEUE_WAKEUP 1 #define ENQUEUE_HEAD 2 #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1a4a2b19c2f4..4e5c2afdac91 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -865,6 +865,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) } #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ +/* + * wake flags + */ +#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ +#define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x4 /* internal use, task got migrated */ + static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; -- cgit From c82ba9fa7588dfd02d4dc99ad1af486304bc424c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:55 +0800 Subject: sched: Move struct sched_class to kernel/sched/sched.h It's used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A79F.8090502@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 59 --------------------------------------------------- kernel/sched/sched.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 863b505ac48e..04b834fa14bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -917,65 +917,6 @@ struct mempolicy; struct pipe_inode_info; struct uts_namespace; -struct rq; -struct sched_domain; - -#define ENQUEUE_WAKEUP 1 -#define ENQUEUE_HEAD 2 -#ifdef CONFIG_SMP -#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ -#else -#define ENQUEUE_WAKING 0 -#endif - -#define DEQUEUE_SLEEP 1 - -struct sched_class { - const struct sched_class *next; - - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*yield_task) (struct rq *rq); - bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); - - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); - - struct task_struct * (*pick_next_task) (struct rq *rq); - void (*put_prev_task) (struct rq *rq, struct task_struct *p); - -#ifdef CONFIG_SMP - int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); - void (*migrate_task_rq)(struct task_struct *p, int next_cpu); - - void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); - void (*post_schedule) (struct rq *this_rq); - void (*task_waking) (struct task_struct *task); - void (*task_woken) (struct rq *this_rq, struct task_struct *task); - - void (*set_cpus_allowed)(struct task_struct *p, - const struct cpumask *newmask); - - void (*rq_online)(struct rq *rq); - void (*rq_offline)(struct rq *rq); -#endif - - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_fork) (struct task_struct *p); - - void (*switched_from) (struct rq *this_rq, struct task_struct *task); - void (*switched_to) (struct rq *this_rq, struct task_struct *task); - void (*prio_changed) (struct rq *this_rq, struct task_struct *task, - int oldprio); - - unsigned int (*get_rr_interval) (struct rq *rq, - struct task_struct *task); - -#ifdef CONFIG_FAIR_GROUP_SCHED - void (*task_move_group) (struct task_struct *p, int on_rq); -#endif -}; - struct load_weight { unsigned long weight, inv_weight; }; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4e5c2afdac91..eca526d7afbd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -951,6 +951,61 @@ enum cpuacct_stat_index { CPUACCT_STAT_NSTATS, }; +#define ENQUEUE_WAKEUP 1 +#define ENQUEUE_HEAD 2 +#ifdef CONFIG_SMP +#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ +#else +#define ENQUEUE_WAKING 0 +#endif + +#define DEQUEUE_SLEEP 1 + +struct sched_class { + const struct sched_class *next; + + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); + void (*yield_task) (struct rq *rq); + bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); + + void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); + + struct task_struct * (*pick_next_task) (struct rq *rq); + void (*put_prev_task) (struct rq *rq, struct task_struct *p); + +#ifdef CONFIG_SMP + int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); + void (*migrate_task_rq)(struct task_struct *p, int next_cpu); + + void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); + void (*post_schedule) (struct rq *this_rq); + void (*task_waking) (struct task_struct *task); + void (*task_woken) (struct rq *this_rq, struct task_struct *task); + + void (*set_cpus_allowed)(struct task_struct *p, + const struct cpumask *newmask); + + void (*rq_online)(struct rq *rq); + void (*rq_offline)(struct rq *rq); +#endif + + void (*set_curr_task) (struct rq *rq); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); + void (*task_fork) (struct task_struct *p); + + void (*switched_from) (struct rq *this_rq, struct task_struct *task); + void (*switched_to) (struct rq *this_rq, struct task_struct *task); + void (*prio_changed) (struct rq *this_rq, struct task_struct *task, + int oldprio); + + unsigned int (*get_rr_interval) (struct rq *rq, + struct task_struct *task); + +#ifdef CONFIG_FAIR_GROUP_SCHED + void (*task_move_group) (struct task_struct *p, int on_rq); +#endif +}; #define sched_class_highest (&stop_sched_class) #define for_each_class(class) \ -- cgit From 15f803c94bd92b17708aad9e74226fd0b2c9130c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:07:11 +0800 Subject: sched: Make default_scale_freq_power() static As default_scale_{freq,smt}_power() and update_rt_power() are used in kernel/sched/fair.c only, annotate them as static functions. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A7AF.8010900@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- kernel/sched/fair.c | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 04b834fa14bc..eadd113e1eb2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -880,9 +880,6 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); -unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); -unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); - bool cpus_share_cache(int this_cpu, int that_cpu); #else /* CONFIG_SMP */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a33e5986fc5..9f2311256ae0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4245,7 +4245,7 @@ static inline int get_sd_load_idx(struct sched_domain *sd, return load_idx; } -unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) +static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) { return SCHED_POWER_SCALE; } @@ -4255,7 +4255,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) return default_scale_freq_power(sd, cpu); } -unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) +static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) { unsigned long weight = sd->span_weight; unsigned long smt_gain = sd->smt_gain; @@ -4270,7 +4270,7 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) return default_scale_smt_power(sd, cpu); } -unsigned long scale_rt_power(int cpu) +static unsigned long scale_rt_power(int cpu) { struct rq *rq = cpu_rq(cpu); u64 total, available, age_stamp, avg; -- cgit From 25cc7da7e6336d3bb6a5bad3d3fa96fce9a81d5b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:07:33 +0800 Subject: sched: Move group scheduling functions out of include/linux/sched.h - Make sched_group_{set_,}runtime(), sched_group_{set_,}period() and sched_rt_can_attach() static. - Move sched_{create,destroy,online,offline}_group() to kernel/sched/sched.h. - Remove declaration of sched_group_shares(). Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A7C5.3000708@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 21 --------------------- kernel/sched/core.c | 10 +++++----- kernel/sched/sched.h | 12 ++++++++++++ 3 files changed, 17 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eadd113e1eb2..fc039ceccbea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2512,28 +2512,7 @@ extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #ifdef CONFIG_CGROUP_SCHED - extern struct task_group root_task_group; - -extern struct task_group *sched_create_group(struct task_group *parent); -extern void sched_online_group(struct task_group *tg, - struct task_group *parent); -extern void sched_destroy_group(struct task_group *tg); -extern void sched_offline_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); -#ifdef CONFIG_FAIR_GROUP_SCHED -extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); -extern unsigned long sched_group_shares(struct task_group *tg); -#endif -#ifdef CONFIG_RT_GROUP_SCHED -extern int sched_group_set_rt_runtime(struct task_group *tg, - long rt_runtime_us); -extern long sched_group_rt_runtime(struct task_group *tg); -extern int sched_group_set_rt_period(struct task_group *tg, - long rt_period_us); -extern long sched_group_rt_period(struct task_group *tg); -extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); -#endif #endif /* CONFIG_CGROUP_SCHED */ extern int task_can_switch_user(struct user_struct *up, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393c..9ad26c986441 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7455,7 +7455,7 @@ unlock: return err; } -int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) +static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) { u64 rt_runtime, rt_period; @@ -7467,7 +7467,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -long sched_group_rt_runtime(struct task_group *tg) +static long sched_group_rt_runtime(struct task_group *tg) { u64 rt_runtime_us; @@ -7479,7 +7479,7 @@ long sched_group_rt_runtime(struct task_group *tg) return rt_runtime_us; } -int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) +static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) { u64 rt_runtime, rt_period; @@ -7492,7 +7492,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -long sched_group_rt_period(struct task_group *tg) +static long sched_group_rt_period(struct task_group *tg) { u64 rt_period_us; @@ -7527,7 +7527,7 @@ static int sched_rt_global_constraints(void) return ret; } -int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) { /* Don't accept realtime tasks when there is no way for them to run */ if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eca526d7afbd..304fc1c77143 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -221,6 +221,18 @@ extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int cpu, struct sched_rt_entity *parent); +extern struct task_group *sched_create_group(struct task_group *parent); +extern void sched_online_group(struct task_group *tg, + struct task_group *parent); +extern void sched_destroy_group(struct task_group *tg); +extern void sched_offline_group(struct task_group *tg); + +extern void sched_move_task(struct task_struct *tsk); + +#ifdef CONFIG_FAIR_GROUP_SCHED +extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); +#endif + #else /* CONFIG_CGROUP_SCHED */ struct cfs_bandwidth { }; -- cgit From 877c685607925238e302cd3aa38788dca6c1b226 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 11:38:08 +0800 Subject: perf: Remove include of cgroup.h from perf_event.h Move struct perf_cgroup_info and perf_cgroup to kernel/perf/core.c, and then we can remove include of cgroup.h. Signed-off-by: Li Zefan Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tejun Heo Link: http://lkml.kernel.org/r/513568A0.6020804@huawei.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 18 +----------------- kernel/events/core.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e47ee462c2f2..8737e1cee8b2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -21,7 +21,6 @@ */ #ifdef CONFIG_PERF_EVENTS -# include # include # include #endif @@ -299,22 +298,7 @@ struct swevent_hlist { #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 -#ifdef CONFIG_CGROUP_PERF -/* - * perf_cgroup_info keeps track of time_enabled for a cgroup. - * This is a per-cpu dynamically allocated data structure. - */ -struct perf_cgroup_info { - u64 time; - u64 timestamp; -}; - -struct perf_cgroup { - struct cgroup_subsys_state css; - struct perf_cgroup_info *info; /* timing info, one per cpu */ -}; -#endif - +struct perf_cgroup; struct ring_buffer; /** diff --git a/kernel/events/core.c b/kernel/events/core.c index b0cd86501c30..5976a2a6b4ce 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "internal.h" @@ -233,6 +234,20 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, #ifdef CONFIG_CGROUP_PERF +/* + * perf_cgroup_info keeps track of time_enabled for a cgroup. + * This is a per-cpu dynamically allocated data structure. + */ +struct perf_cgroup_info { + u64 time; + u64 timestamp; +}; + +struct perf_cgroup { + struct cgroup_subsys_state css; + struct perf_cgroup_info *info; +}; + /* * Must ensure cgroup is pinned (css_get) before calling * this function. In other words, we cannot call this function -- cgit From 9a886586c82aa02cb49f8c85e961595716884545 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Feb 2013 19:25:00 +0100 Subject: wireless: move sequence number arithmetic to ieee80211.h Move the sequence number arithmetic code from mac80211 to ieee80211.h so others can use it. Also rename the functions from _seq to _sn, they operate on the sequence number, not the sequence_control field. Also move macros to convert the sequence control to/from the sequence number value from various drivers. Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/3945.h | 4 --- drivers/net/wireless/iwlegacy/4965-mac.c | 13 ++++---- drivers/net/wireless/iwlegacy/common.h | 4 --- drivers/net/wireless/iwlwifi/dvm/tx.c | 11 ++++--- drivers/net/wireless/iwlwifi/iwl-trans.h | 3 -- drivers/net/wireless/iwlwifi/mvm/sta.c | 4 +-- drivers/net/wireless/iwlwifi/mvm/tx.c | 2 +- drivers/net/wireless/iwlwifi/pcie/tx.c | 2 +- drivers/net/wireless/rtlwifi/wifi.h | 3 -- include/linux/ieee80211.h | 28 +++++++++++++++++ net/mac80211/rx.c | 53 +++++++++++++------------------- 11 files changed, 66 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/iwlegacy/3945.h b/drivers/net/wireless/iwlegacy/3945.h index 1d45075e0d5b..9a8703def0ba 100644 --- a/drivers/net/wireless/iwlegacy/3945.h +++ b/drivers/net/wireless/iwlegacy/3945.h @@ -150,10 +150,6 @@ struct il3945_frame { struct list_head list; }; -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) - #define SUP_RATE_11A_MAX_NUM_CHANNELS 8 #define SUP_RATE_11B_MAX_NUM_CHANNELS 4 #define SUP_RATE_11G_MAX_NUM_CHANNELS 12 diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 7941eb3a0166..c092fcbbe965 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -2258,7 +2258,7 @@ il4965_tx_agg_start(struct il_priv *il, struct ieee80211_vif *vif, spin_lock_irqsave(&il->sta_lock, flags); tid_data = &il->stations[sta_id].tid[tid]; - *ssn = SEQ_TO_SN(tid_data->seq_number); + *ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); tid_data->agg.txq_id = txq_id; il_set_swq_id(&il->txq[txq_id], il4965_get_ac_from_tid(tid), txq_id); spin_unlock_irqrestore(&il->sta_lock, flags); @@ -2408,7 +2408,7 @@ il4965_txq_check_empty(struct il_priv *il, int sta_id, u8 tid, int txq_id) /* aggregated HW queue */ if (txq_id == tid_data->agg.txq_id && q->read_ptr == q->write_ptr) { - u16 ssn = SEQ_TO_SN(tid_data->seq_number); + u16 ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); int tx_fifo = il4965_get_fifo_from_tid(tid); D_HT("HW queue empty: continue DELBA flow\n"); il4965_txq_agg_disable(il, txq_id, ssn, tx_fifo); @@ -2627,7 +2627,8 @@ il4965_get_ra_sta_id(struct il_priv *il, struct ieee80211_hdr *hdr) static inline u32 il4965_get_scd_ssn(struct il4965_tx_resp *tx_resp) { - return le32_to_cpup(&tx_resp->u.status + tx_resp->frame_count) & MAX_SN; + return le32_to_cpup(&tx_resp->u.status + + tx_resp->frame_count) & IEEE80211_MAX_SN; } static inline u32 @@ -2717,15 +2718,15 @@ il4965_tx_status_reply_tx(struct il_priv *il, struct il_ht_agg *agg, hdr = (struct ieee80211_hdr *) skb->data; sc = le16_to_cpu(hdr->seq_ctrl); - if (idx != (SEQ_TO_SN(sc) & 0xff)) { + if (idx != (IEEE80211_SEQ_TO_SN(sc) & 0xff)) { IL_ERR("BUG_ON idx doesn't match seq control" " idx=%d, seq_idx=%d, seq=%d\n", idx, - SEQ_TO_SN(sc), hdr->seq_ctrl); + IEEE80211_SEQ_TO_SN(sc), hdr->seq_ctrl); return -1; } D_TX_REPLY("AGG Frame i=%d idx %d seq=%d\n", i, idx, - SEQ_TO_SN(sc)); + IEEE80211_SEQ_TO_SN(sc)); sh = idx - start; if (sh > 64) { diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h index 96f2025d936e..73bd3ef316c8 100644 --- a/drivers/net/wireless/iwlegacy/common.h +++ b/drivers/net/wireless/iwlegacy/common.h @@ -541,10 +541,6 @@ struct il_frame { struct list_head list; }; -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) - enum { CMD_SYNC = 0, CMD_SIZE_NORMAL = 0, diff --git a/drivers/net/wireless/iwlwifi/dvm/tx.c b/drivers/net/wireless/iwlwifi/dvm/tx.c index 6aec2df3bb27..d499a0366fa6 100644 --- a/drivers/net/wireless/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/iwlwifi/dvm/tx.c @@ -418,7 +418,8 @@ int iwlagn_tx_skb(struct iwl_priv *priv, " Tx flags = 0x%08x, agg.state = %d", info->flags, tid_data->agg.state); IWL_ERR(priv, "sta_id = %d, tid = %d seq_num = %d", - sta_id, tid, SEQ_TO_SN(tid_data->seq_number)); + sta_id, tid, + IEEE80211_SEQ_TO_SN(tid_data->seq_number)); goto drop_unlock_sta; } @@ -569,7 +570,7 @@ int iwlagn_tx_agg_stop(struct iwl_priv *priv, struct ieee80211_vif *vif, return 0; } - tid_data->agg.ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->agg.ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); /* There are still packets for this RA / TID in the HW */ if (!test_bit(txq_id, priv->agg_q_alloc)) { @@ -651,7 +652,7 @@ int iwlagn_tx_agg_start(struct iwl_priv *priv, struct ieee80211_vif *vif, spin_lock_bh(&priv->sta_lock); tid_data = &priv->tid_data[sta_id][tid]; - tid_data->agg.ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->agg.ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); tid_data->agg.txq_id = txq_id; *ssn = tid_data->agg.ssn; @@ -911,7 +912,7 @@ static void iwlagn_count_agg_tx_err_status(struct iwl_priv *priv, u16 status) static inline u32 iwlagn_get_scd_ssn(struct iwlagn_tx_resp *tx_resp) { return le32_to_cpup((__le32 *)&tx_resp->status + - tx_resp->frame_count) & MAX_SN; + tx_resp->frame_count) & IEEE80211_MAX_SN; } static void iwl_rx_reply_tx_agg(struct iwl_priv *priv, @@ -1148,7 +1149,7 @@ int iwlagn_rx_reply_tx(struct iwl_priv *priv, struct iwl_rx_cmd_buffer *rxb, if (tx_resp->frame_count == 1) { u16 next_reclaimed = le16_to_cpu(tx_resp->seq_ctl); - next_reclaimed = SEQ_TO_SN(next_reclaimed + 0x10); + next_reclaimed = IEEE80211_SEQ_TO_SN(next_reclaimed + 0x10); if (is_agg) { /* If this is an aggregation queue, we can rely on the diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h index 8c7bec6b9a0b..00bdc5b00af3 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/iwlwifi/iwl-trans.h @@ -114,9 +114,6 @@ * completely agnostic to these differences. * The transport does provide helper functionnality (i.e. SYNC / ASYNC mode), */ -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) #define SEQ_TO_QUEUE(s) (((s) >> 8) & 0x1f) #define QUEUE_TO_SEQ(q) (((q) & 0x1f) << 8) #define SEQ_TO_INDEX(s) ((s) & 0xff) diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 861a7f9f8e7f..52aecf20d0df 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -686,7 +686,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, spin_lock_bh(&mvmsta->lock); tid_data = &mvmsta->tid_data[tid]; - tid_data->ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); tid_data->txq_id = txq_id; *ssn = tid_data->ssn; @@ -779,7 +779,7 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, switch (tid_data->state) { case IWL_AGG_ON: - tid_data->ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); IWL_DEBUG_TX_QUEUES(mvm, "ssn = %d, next_recl = %d\n", diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 6b67ce3f679c..56df249b215e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -641,7 +641,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, next_reclaimed = ssn; } else { /* The next packet to be reclaimed is the one after this one */ - next_reclaimed = SEQ_TO_SN(seq_ctl + 0x10); + next_reclaimed = IEEE80211_SEQ_TO_SN(seq_ctl + 0x10); } IWL_DEBUG_TX_REPLY(mvm, diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 8e9e3212fe78..ad7441dfa6fb 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -1581,7 +1581,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, * Check here that the packets are in the right place on the ring. */ #ifdef CONFIG_IWLWIFI_DEBUG - wifi_seq = SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); WARN_ONCE((iwl_read_prph(trans, SCD_AGGR_SEL) & BIT(txq_id)) && ((wifi_seq & 0xff) != q->write_ptr), "Q: %d WiFi Seq %d tfdNum %d", diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h index f13258a8d995..c3eff32acf6c 100644 --- a/drivers/net/wireless/rtlwifi/wifi.h +++ b/drivers/net/wireless/rtlwifi/wifi.h @@ -2127,9 +2127,6 @@ value to host byte ordering.*/ #define WLAN_FC_GET_TYPE(fc) (le16_to_cpu(fc) & IEEE80211_FCTL_FTYPE) #define WLAN_FC_GET_STYPE(fc) (le16_to_cpu(fc) & IEEE80211_FCTL_STYPE) #define WLAN_FC_MORE_DATA(fc) (le16_to_cpu(fc) & IEEE80211_FCTL_MOREDATA) -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) #define RT_RF_OFF_LEVL_ASPM BIT(0) /*PCI ASPM */ #define RT_RF_OFF_LEVL_CLK_REQ BIT(1) /*PCI clock request */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7e24fe0cfbcd..a0c550fb65a6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -113,6 +113,34 @@ #define IEEE80211_CTL_EXT_SSW_FBACK 0x9000 #define IEEE80211_CTL_EXT_SSW_ACK 0xa000 + +#define IEEE80211_SN_MASK ((IEEE80211_SCTL_SEQ) >> 4) +#define IEEE80211_MAX_SN IEEE80211_SN_MASK +#define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) + +static inline int ieee80211_sn_less(u16 sn1, u16 sn2) +{ + return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); +} + +static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) +{ + return (sn1 + sn2) & IEEE80211_SN_MASK; +} + +static inline u16 ieee80211_sn_inc(u16 sn) +{ + return ieee80211_sn_add(sn, 1); +} + +static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) +{ + return (sn1 - sn2) & IEEE80211_SN_MASK; +} + +#define IEEE80211_SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) +#define IEEE80211_SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) + /* miscellaneous IEEE 802.11 constants */ #define IEEE80211_MAX_FRAG_THRESHOLD 2352 #define IEEE80211_MAX_RTS_THRESHOLD 2353 diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index acf006f2d61a..1f940e2b6f27 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -648,24 +648,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -#define SEQ_MODULO 0x1000 -#define SEQ_MASK 0xfff - -static inline int seq_less(u16 sq1, u16 sq2) -{ - return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1); -} - -static inline u16 seq_inc(u16 sq) -{ - return (sq + 1) & SEQ_MASK; -} - -static inline u16 seq_sub(u16 sq1, u16 sq2) -{ - return (sq1 - sq2) & SEQ_MASK; -} - static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, int index, @@ -687,7 +669,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, __skb_queue_tail(frames, skb); no_frame: - tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); + tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); } static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, @@ -699,8 +681,9 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata lockdep_assert_held(&tid_agg_rx->reorder_lock); - while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) { + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); @@ -727,8 +710,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&tid_agg_rx->reorder_lock); /* release the buffer until next missing frame */ - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; if (!tid_agg_rx->reorder_buf[index] && tid_agg_rx->stored_mpdu_num) { /* @@ -756,19 +739,22 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, * Increment the head seq# also for the skipped slots. */ tid_agg_rx->head_seq_num = - (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK; + (tid_agg_rx->head_seq_num + + skipped) & IEEE80211_SN_MASK; skipped = 0; } } else while (tid_agg_rx->reorder_buf[index]) { ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; } if (tid_agg_rx->stored_mpdu_num) { - j = index = seq_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; + j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % + tid_agg_rx->buf_size; for (; j != (index - 1) % tid_agg_rx->buf_size; j = (j + 1) % tid_agg_rx->buf_size) { @@ -809,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata head_seq_num = tid_agg_rx->head_seq_num; /* frame with out of date sequence number */ - if (seq_less(mpdu_seq_num, head_seq_num)) { + if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { dev_kfree_skb(skb); goto out; } @@ -818,8 +804,9 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata * If frame the sequence number exceeds our buffering window * size release some previous frames to make room for this one. */ - if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { - head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); + if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) { + head_seq_num = ieee80211_sn_inc( + ieee80211_sn_sub(mpdu_seq_num, buf_size)); /* release stored frames up to new head to stack */ ieee80211_release_reorder_frames(sdata, tid_agg_rx, head_seq_num, frames); @@ -827,7 +814,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata /* Now the new frame is always in the range of the reordering buffer */ - index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; + index = ieee80211_sn_sub(mpdu_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; /* check if we already stored this frame */ if (tid_agg_rx->reorder_buf[index]) { @@ -843,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata */ if (mpdu_seq_num == tid_agg_rx->head_seq_num && tid_agg_rx->stored_mpdu_num == 0) { - tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); + tid_agg_rx->head_seq_num = + ieee80211_sn_inc(tid_agg_rx->head_seq_num); ret = false; goto out; } -- cgit From b8a31c9a5afff257cc5dd637cda5fef03e12d67b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 22 Feb 2013 17:28:49 +0100 Subject: ieee80211: mark 802.11 related structs as being 2-byte aligned Regardless of what header features they use, or if they align the IP header or not, 802.11 packets from all drivers guarantee a 2-byte alignment (and there's a debug WARN_ON in case they don't). Annotate packet structs with __aligned(2) to allow the compiler to use 16-bit load/store operations on platforms with extremely inefficient unaligned access (e.g. MIPS). This reduces code size and improves performance on affected platforms and causes no binary code change on others. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a0c550fb65a6..6e352c31fee0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -213,7 +213,7 @@ struct ieee80211_hdr { u8 addr3[6]; __le16 seq_ctrl; u8 addr4[6]; -} __packed; +} __packed __aligned(2); struct ieee80211_hdr_3addr { __le16 frame_control; @@ -222,7 +222,7 @@ struct ieee80211_hdr_3addr { u8 addr2[6]; u8 addr3[6]; __le16 seq_ctrl; -} __packed; +} __packed __aligned(2); struct ieee80211_qos_hdr { __le16 frame_control; @@ -232,7 +232,7 @@ struct ieee80211_qos_hdr { u8 addr3[6]; __le16 seq_ctrl; __le16 qos_ctrl; -} __packed; +} __packed __aligned(2); /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set @@ -609,7 +609,7 @@ struct ieee80211s_hdr { __le32 seqnum; u8 eaddr1[6]; u8 eaddr2[6]; -} __packed; +} __packed __aligned(2); /* Mesh flags */ #define MESH_FLAGS_AE_A4 0x1 @@ -903,7 +903,7 @@ struct ieee80211_mgmt { } u; } __packed action; } u; -} __packed; +} __packed __aligned(2); /* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 @@ -934,20 +934,20 @@ struct ieee80211_rts { __le16 duration; u8 ra[6]; u8 ta[6]; -} __packed; +} __packed __aligned(2); struct ieee80211_cts { __le16 frame_control; __le16 duration; u8 ra[6]; -} __packed; +} __packed __aligned(2); struct ieee80211_pspoll { __le16 frame_control; __le16 aid; u8 bssid[6]; u8 ta[6]; -} __packed; +} __packed __aligned(2); /* TDLS */ -- cgit From c8bb93f5f5d478a01db66127844d1d2dd30abec7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Feb 2013 17:26:44 +0100 Subject: wireless: remove unused VHT MCS defines There's an enum with the same values (but slightly different names except for NOT_SUPPORTED) that is actually used, so remove the defines. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 6e352c31fee0..35c1f96d9365 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1318,11 +1318,6 @@ struct ieee80211_vht_operation { } __packed; -#define IEEE80211_VHT_MCS_ZERO_TO_SEVEN_SUPPORT 0 -#define IEEE80211_VHT_MCS_ZERO_TO_EIGHT_SUPPORT 1 -#define IEEE80211_VHT_MCS_ZERO_TO_NINE_SUPPORT 2 -#define IEEE80211_VHT_MCS_NOT_SUPPORTED 3 - /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 0x00000001 -- cgit From 55d942f4246c79a8f3f17f92c224e641c5c26125 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Mar 2013 13:07:48 +0100 Subject: mac80211: restrict peer's VHT capabilities to own Implement restricting peer VHT capabilities to the device's own capabilities. This is useful when a single driver supports more than one device and the devices have different capabilities (often they will differ in the number of spatial streams), but in particular is also necessary for VHT capability overrides to work correctly -- otherwise it'd be possible to e.g. advertise, due to overrides, that TX-STBC is not supported, but then still use it to TX to the AP because it supports RX-STBC. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +- include/net/mac80211.h | 5 +- net/mac80211/vht.c | 114 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 117 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 35c1f96d9365..4cf0c9e4dd99 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1333,10 +1333,11 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_RXSTBC_2 0x00000200 #define IEEE80211_VHT_CAP_RXSTBC_3 0x00000300 #define IEEE80211_VHT_CAP_RXSTBC_4 0x00000400 +#define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 #define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX 0x00006000 -#define IEEE80211_VHT_CAP_SOUNDING_DIMENTION_MAX 0x00030000 +#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX 0x00030000 #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE 0x00080000 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE 0x00100000 #define IEEE80211_VHT_CAP_VHT_TXOP_PS 0x00200000 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 421c3ac8c521..cdd7cea1fd4c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1228,9 +1228,8 @@ enum ieee80211_sta_rx_bandwidth { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA; restricted to our own TX capabilities - * @vht_cap: VHT capabilities of this STA; Not restricting any capabilities - * of remote STA. Taking as is. + * @ht_cap: HT capabilities of this STA; restricted to our own capabilities + * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities * @wme: indicates whether the STA supports WME. Only valid during AP-mode. * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index cacc1c74556a..171344d4eb7c 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -118,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap own_cap; + u32 cap_info, i; memset(vht_cap, 0, sizeof(*vht_cap)); @@ -133,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_supported = true; - vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); + own_cap = sband->vht_cap; + /* + * If user has specified capability overrides, take care + * of that if the station we're setting up is the AP that + * we advertised a restricted capability set to. Override + * our own capabilities and then use those below. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + ieee80211_apply_vhtcap_overrides(sdata, &own_cap); + + /* take some capabilities as-is */ + cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); + vht_cap->cap = cap_info; + vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_VHT_TXOP_PS | + IEEE80211_VHT_CAP_HTC_VHT | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB | + IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | + IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + + /* and some based on our own capabilities */ + switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; + break; + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + break; + default: + /* nothing */ + break; + } + + /* symmetric capabilities */ + vht_cap->cap |= cap_info & own_cap.cap & + (IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160); + + /* remaining ones */ + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { + vht_cap->cap |= cap_info & + (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | + IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX | + IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); + } + + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK; + + if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC; /* Copy peer MCS info, the driver might need them. */ memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + /* but also restrict MCSes */ + for (i = 0; i < 8; i++) { + u16 own_rx, own_tx, peer_rx, peer_tx; + + own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map); + own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map); + own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); + peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); + peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_rx < peer_tx) + peer_tx = own_rx; + } + + if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_tx < peer_rx) + peer_rx = own_tx; + } + + vht_cap->vht_mcs.rx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2); + + vht_cap->vht_mcs.tx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); + } + + /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: -- cgit From acbba0d0f88e2577b9d92b61b136d13f65831a52 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Mar 2013 01:31:12 +0000 Subject: team: introduce two default team_modeop functions and use them in modes No need to duplicate code for this. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 19 ++++++++++++++++--- drivers/net/team/team_mode_broadcast.c | 14 ++------------ drivers/net/team/team_mode_roundrobin.c | 14 ++------------ include/linux/if_team.h | 5 ++++- 4 files changed, 24 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 05c5efe84591..ece70a4abbb1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -73,11 +73,24 @@ static int team_port_set_orig_dev_addr(struct team_port *port) return __set_port_dev_addr(port->dev, port->orig.dev_addr); } -int team_port_set_team_dev_addr(struct team_port *port) +static int team_port_set_team_dev_addr(struct team *team, + struct team_port *port) +{ + return __set_port_dev_addr(port->dev, team->dev->dev_addr); +} + +int team_modeop_port_enter(struct team *team, struct team_port *port) +{ + return team_port_set_team_dev_addr(team, port); +} +EXPORT_SYMBOL(team_modeop_port_enter); + +void team_modeop_port_change_dev_addr(struct team *team, + struct team_port *port) { - return __set_port_dev_addr(port->dev, port->team->dev->dev_addr); + team_port_set_team_dev_addr(team, port); } -EXPORT_SYMBOL(team_port_set_team_dev_addr); +EXPORT_SYMBOL(team_modeop_port_change_dev_addr); static void team_refresh_port_linkup(struct team_port *port) { diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c index c5db428e73fa..c366cd299c06 100644 --- a/drivers/net/team/team_mode_broadcast.c +++ b/drivers/net/team/team_mode_broadcast.c @@ -46,20 +46,10 @@ static bool bc_transmit(struct team *team, struct sk_buff *skb) return sum_ret; } -static int bc_port_enter(struct team *team, struct team_port *port) -{ - return team_port_set_team_dev_addr(port); -} - -static void bc_port_change_dev_addr(struct team *team, struct team_port *port) -{ - team_port_set_team_dev_addr(port); -} - static const struct team_mode_ops bc_mode_ops = { .transmit = bc_transmit, - .port_enter = bc_port_enter, - .port_change_dev_addr = bc_port_change_dev_addr, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, }; static const struct team_mode bc_mode = { diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index 105135aa8f05..ed63a6bc66ce 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -64,20 +64,10 @@ drop: return false; } -static int rr_port_enter(struct team *team, struct team_port *port) -{ - return team_port_set_team_dev_addr(port); -} - -static void rr_port_change_dev_addr(struct team *team, struct team_port *port) -{ - team_port_set_team_dev_addr(port); -} - static const struct team_mode_ops rr_mode_ops = { .transmit = rr_transmit, - .port_enter = rr_port_enter, - .port_change_dev_addr = rr_port_change_dev_addr, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, }; static const struct team_mode rr_mode = { diff --git a/include/linux/if_team.h b/include/linux/if_team.h index cfd21e3d5506..3283def74483 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -112,6 +112,10 @@ struct team_mode_ops { void (*port_disabled)(struct team *team, struct team_port *port); }; +extern int team_modeop_port_enter(struct team *team, struct team_port *port); +extern void team_modeop_port_change_dev_addr(struct team *team, + struct team_port *port); + enum team_option_type { TEAM_OPTION_TYPE_U32, TEAM_OPTION_TYPE_STRING, @@ -236,7 +240,6 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team, return NULL; } -extern int team_port_set_team_dev_addr(struct team_port *port); extern int team_options_register(struct team *team, const struct team_option *option, size_t option_count); -- cgit From 753f993911b32e479b4fab5d228dc07c11d1e7e7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Mar 2013 01:31:13 +0000 Subject: team: introduce random mode As suggested by Eric Dumazet, allow user to select mode which chooses TX port randomly. Functionality should be more of less similar to round-robin mode with even lower overhead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/Kconfig | 12 ++++++ drivers/net/team/Makefile | 1 + drivers/net/team/team_mode_random.c | 71 +++++++++++++++++++++++++++++++++ drivers/net/team/team_mode_roundrobin.c | 22 +--------- include/linux/if_team.h | 20 ++++++++++ 5 files changed, 105 insertions(+), 21 deletions(-) create mode 100644 drivers/net/team/team_mode_random.c (limited to 'include/linux') diff --git a/drivers/net/team/Kconfig b/drivers/net/team/Kconfig index c3011af68e91..c853d84fd99f 100644 --- a/drivers/net/team/Kconfig +++ b/drivers/net/team/Kconfig @@ -37,6 +37,18 @@ config NET_TEAM_MODE_ROUNDROBIN To compile this team mode as a module, choose M here: the module will be called team_mode_roundrobin. +config NET_TEAM_MODE_RANDOM + tristate "Random mode support" + depends on NET_TEAM + ---help--- + Basic mode where port used for transmitting packets is selected + randomly. + + All added ports are setup to have team's device address. + + To compile this team mode as a module, choose M here: the module + will be called team_mode_random. + config NET_TEAM_MODE_ACTIVEBACKUP tristate "Active-backup mode support" depends on NET_TEAM diff --git a/drivers/net/team/Makefile b/drivers/net/team/Makefile index 975763014e5a..c57e85889751 100644 --- a/drivers/net/team/Makefile +++ b/drivers/net/team/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_NET_TEAM) += team.o obj-$(CONFIG_NET_TEAM_MODE_BROADCAST) += team_mode_broadcast.o obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o +obj-$(CONFIG_NET_TEAM_MODE_RANDOM) += team_mode_random.o obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o obj-$(CONFIG_NET_TEAM_MODE_LOADBALANCE) += team_mode_loadbalance.o diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c new file mode 100644 index 000000000000..9eabfaa22f3e --- /dev/null +++ b/drivers/net/team/team_mode_random.c @@ -0,0 +1,71 @@ +/* + * drivers/net/team/team_mode_random.c - Random mode for team + * Copyright (c) 2013 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +static u32 random_N(unsigned int N) +{ + return reciprocal_divide(random32(), N); +} + +static bool rnd_transmit(struct team *team, struct sk_buff *skb) +{ + struct team_port *port; + int port_index; + + port_index = random_N(team->en_port_count); + port = team_get_port_by_index_rcu(team, port_index); + port = team_get_first_port_txable_rcu(team, port); + if (unlikely(!port)) + goto drop; + if (team_dev_queue_xmit(team, port, skb)) + return false; + return true; + +drop: + dev_kfree_skb_any(skb); + return false; +} + +static const struct team_mode_ops rnd_mode_ops = { + .transmit = rnd_transmit, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, +}; + +static const struct team_mode rnd_mode = { + .kind = "random", + .owner = THIS_MODULE, + .ops = &rnd_mode_ops, +}; + +static int __init rnd_init_module(void) +{ + return team_mode_register(&rnd_mode); +} + +static void __exit rnd_cleanup_module(void) +{ + team_mode_unregister(&rnd_mode); +} + +module_init(rnd_init_module); +module_exit(rnd_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko "); +MODULE_DESCRIPTION("Random mode for team"); +MODULE_ALIAS("team-mode-random"); diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index ed63a6bc66ce..d268e4de781b 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -25,26 +25,6 @@ static struct rr_priv *rr_priv(struct team *team) return (struct rr_priv *) &team->mode_priv; } -static struct team_port *__get_first_port_up(struct team *team, - struct team_port *port) -{ - struct team_port *cur; - - if (team_port_txable(port)) - return port; - cur = port; - list_for_each_entry_continue_rcu(cur, &team->port_list, list) - if (team_port_txable(port)) - return cur; - list_for_each_entry_rcu(cur, &team->port_list, list) { - if (cur == port) - break; - if (team_port_txable(port)) - return cur; - } - return NULL; -} - static bool rr_transmit(struct team *team, struct sk_buff *skb) { struct team_port *port; @@ -52,7 +32,7 @@ static bool rr_transmit(struct team *team, struct sk_buff *skb) port_index = rr_priv(team)->sent_packets++ % team->en_port_count; port = team_get_port_by_index_rcu(team, port_index); - port = __get_first_port_up(team, port); + port = team_get_first_port_txable_rcu(team, port); if (unlikely(!port)) goto drop; if (team_dev_queue_xmit(team, port, skb)) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 3283def74483..4474557904f6 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -240,6 +240,26 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team, return NULL; } +static inline struct team_port * +team_get_first_port_txable_rcu(struct team *team, struct team_port *port) +{ + struct team_port *cur; + + if (likely(team_port_txable(port))) + return port; + cur = port; + list_for_each_entry_continue_rcu(cur, &team->port_list, list) + if (team_port_txable(port)) + return cur; + list_for_each_entry_rcu(cur, &team->port_list, list) { + if (cur == port) + break; + if (team_port_txable(port)) + return cur; + } + return NULL; +} + extern int team_options_register(struct team *team, const struct team_option *option, size_t option_count); -- cgit From 8524982847ff00b66ffb89314c342c51f4138ee7 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 18 Feb 2013 21:47:45 +0100 Subject: ssb: fix unaligned access to mac address The mac address should be aligned to u16 to prevent an unaligned access in drivers/ssb/pci.c where it is casted to __be16. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- include/linux/ssb/ssb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 22958d68ecfe..8b1322296fed 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -26,9 +26,9 @@ struct ssb_sprom_core_pwr_info { struct ssb_sprom { u8 revision; - u8 il0mac[6]; /* MAC address for 802.11b/g */ - u8 et0mac[6]; /* MAC address for Ethernet */ - u8 et1mac[6]; /* MAC address for 802.11a */ + u8 il0mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11b/g */ + u8 et0mac[6] __aligned(sizeof(u16)); /* MAC address for Ethernet */ + u8 et1mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11a */ u8 et0phyaddr; /* MII address for enet0 */ u8 et1phyaddr; /* MII address for enet1 */ u8 et0mdcport; /* MDIO for enet0 */ -- cgit From f04a9d8adf766c480353c0f2427e641251c9b059 Mon Sep 17 00:00:00 2001 From: Rajkumar Kasirajan Date: Wed, 30 May 2012 16:32:37 +0530 Subject: mfd: ab8500-sysctrl: Update correct turn on status In L9540, turn_on_status register is not updated correctly if the device is rebooted with AC/USB charger connected. Due to this, the device boots android instead of entering into charge only mode. Read the AC/USB status register to detect the charger presence and update the turn on status manually. Signed-off-by: Rajkumar Kasirajan Signed-off-by: Per Forlin Signed-off-by: Lee Jones Reviewed-by: Rupesh KUMAR Reviewed-by: Philippe LANGLAIS Tested-by: Rupesh KUMAR Tested-by: Philippe LANGLAIS Acked-by: Samuel Ortiz --- drivers/mfd/ab8500-core.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/mfd/ab8500-sysctrl.c | 2 +- include/linux/mfd/abx500/ab8500.h | 2 ++ 3 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index 7c84ced2e01b..50e6f1e29727 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -111,6 +111,13 @@ #define AB8500_TURN_ON_STATUS 0x00 +#define AB8500_CH_USBCH_STAT1_REG 0x02 +#define VBUS_DET_DBNC100 0x02 +#define VBUS_DET_DBNC1 0x01 + +static DEFINE_SPINLOCK(on_stat_lock); +static u8 turn_on_stat_mask = 0xFF; +static u8 turn_on_stat_set; static bool no_bm; /* No battery management */ module_param(no_bm, bool, S_IRUGO); @@ -1171,6 +1178,15 @@ static ssize_t show_switch_off_status(struct device *dev, return sprintf(buf, "%#x\n", value); } +/* use mask and set to override the register turn_on_stat value */ +void ab8500_override_turn_on_stat(u8 mask, u8 set) +{ + spin_lock(&on_stat_lock); + turn_on_stat_mask = mask; + turn_on_stat_set = set; + spin_unlock(&on_stat_lock); +} + /* * ab8500 has turned on due to (TURN_ON_STATUS): * 0x01 PORnVbat @@ -1194,6 +1210,20 @@ static ssize_t show_turn_on_status(struct device *dev, AB8500_TURN_ON_STATUS, &value); if (ret < 0) return ret; + + /* + * In L9540, turn_on_status register is not updated correctly if + * the device is rebooted with AC/USB charger connected. Due to + * this, the device boots android instead of entering into charge + * only mode. Read the AC/USB status register to detect the charger + * presence and update the turn on status manually. + */ + if (is_ab9540(ab8500)) { + spin_lock(&on_stat_lock); + value = (value & turn_on_stat_mask) | turn_on_stat_set; + spin_unlock(&on_stat_lock); + } + return sprintf(buf, "%#x\n", value); } @@ -1399,6 +1429,15 @@ static int ab8500_probe(struct platform_device *pdev) if (plat && plat->init) plat->init(ab8500); + if (is_ab9540(ab8500)) { + ret = get_register_interruptible(ab8500, AB8500_CHARGER, + AB8500_CH_USBCH_STAT1_REG, &value); + if (ret < 0) + return ret; + if ((value & VBUS_DET_DBNC1) && (value & VBUS_DET_DBNC100)) + ab8500_override_turn_on_stat(~AB8500_POW_KEY_1_ON, + AB8500_VBUS_DET); + } /* Clear and mask all interrupts */ for (i = 0; i < ab8500->mask_size; i++) { diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c index 108fd86552f0..7c773797d267 100644 --- a/drivers/mfd/ab8500-sysctrl.c +++ b/drivers/mfd/ab8500-sysctrl.c @@ -21,7 +21,7 @@ void ab8500_power_off(void) { sigset_t old; sigset_t all; - static char *pss[] = {"ab8500_ac", "ab8500_usb"}; + static char *pss[] = {"ab8500_ac", "pm2301", "ab8500_usb"}; int i; bool charger_present = false; union power_supply_propval val; diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 9db0bda446a0..fdd8be64feeb 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -512,6 +512,8 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab) return (is_ab9540(ab) && (ab->chip_id < AB8500_CUT2P0)); } +void ab8500_override_turn_on_stat(u8 mask, u8 set); + #ifdef CONFIG_AB8500_DEBUG void ab8500_dump_all_banks(struct device *dev); void ab8500_debug_register_interrupt(int line); -- cgit From 734823462590335cbf5c6a1fa5cae84a881dcb43 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Feb 2013 10:06:55 +0000 Subject: mfd: ab8500-gpadc: Add gpadc hw conversion Add the support of gpacd hw conversion and make the number of sample configurable. Signed-off-by: M'boumba Cedric Madianga Signed-off-by: Lee Jones Reviewed-by: Mattias WALLIN Tested-by: Michel JAOUEN Acked-by: Samuel Ortiz --- drivers/mfd/ab8500-debugfs.c | 292 +++++++++++++++++++++++++++++--- drivers/mfd/ab8500-gpadc.c | 282 ++++++++++++++++++++++-------- include/linux/mfd/abx500/ab8500-gpadc.h | 30 +++- 3 files changed, 499 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index 45fe3c50eb03..59ecd8c680ed 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -101,6 +101,11 @@ static int num_irqs; static struct device_attribute **dev_attr; static char **event_name; +static u8 avg_sample = SAMPLE_16; +static u8 trig_edge = RISING_EDGE; +static u8 conv_type = ADC_SW; +static u8 trig_timer; + /** * struct ab8500_reg_range * @first: the first address of the range @@ -808,9 +813,10 @@ static int ab8500_gpadc_bat_ctrl_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - bat_ctrl_raw = ab8500_gpadc_read_raw(gpadc, BAT_CTRL); + bat_ctrl_raw = ab8500_gpadc_read_raw(gpadc, BAT_CTRL, + avg_sample, trig_edge, trig_timer, conv_type); bat_ctrl_convert = ab8500_gpadc_ad_to_voltage(gpadc, - BAT_CTRL, bat_ctrl_raw); + BAT_CTRL, bat_ctrl_raw); return seq_printf(s, "%d,0x%X\n", bat_ctrl_convert, bat_ctrl_raw); @@ -836,9 +842,10 @@ static int ab8500_gpadc_btemp_ball_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - btemp_ball_raw = ab8500_gpadc_read_raw(gpadc, BTEMP_BALL); + btemp_ball_raw = ab8500_gpadc_read_raw(gpadc, BTEMP_BALL, + avg_sample, trig_edge, trig_timer, conv_type); btemp_ball_convert = ab8500_gpadc_ad_to_voltage(gpadc, BTEMP_BALL, - btemp_ball_raw); + btemp_ball_raw); return seq_printf(s, "%d,0x%X\n", btemp_ball_convert, btemp_ball_raw); @@ -865,9 +872,10 @@ static int ab8500_gpadc_main_charger_v_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - main_charger_v_raw = ab8500_gpadc_read_raw(gpadc, MAIN_CHARGER_V); + main_charger_v_raw = ab8500_gpadc_read_raw(gpadc, MAIN_CHARGER_V, + avg_sample, trig_edge, trig_timer, conv_type); main_charger_v_convert = ab8500_gpadc_ad_to_voltage(gpadc, - MAIN_CHARGER_V, main_charger_v_raw); + MAIN_CHARGER_V, main_charger_v_raw); return seq_printf(s, "%d,0x%X\n", main_charger_v_convert, main_charger_v_raw); @@ -895,9 +903,10 @@ static int ab8500_gpadc_acc_detect1_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - acc_detect1_raw = ab8500_gpadc_read_raw(gpadc, ACC_DETECT1); + acc_detect1_raw = ab8500_gpadc_read_raw(gpadc, ACC_DETECT1, + avg_sample, trig_edge, trig_timer, conv_type); acc_detect1_convert = ab8500_gpadc_ad_to_voltage(gpadc, ACC_DETECT1, - acc_detect1_raw); + acc_detect1_raw); return seq_printf(s, "%d,0x%X\n", acc_detect1_convert, acc_detect1_raw); @@ -925,9 +934,10 @@ static int ab8500_gpadc_acc_detect2_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - acc_detect2_raw = ab8500_gpadc_read_raw(gpadc, ACC_DETECT2); + acc_detect2_raw = ab8500_gpadc_read_raw(gpadc, ACC_DETECT2, + avg_sample, trig_edge, trig_timer, conv_type); acc_detect2_convert = ab8500_gpadc_ad_to_voltage(gpadc, - ACC_DETECT2, acc_detect2_raw); + ACC_DETECT2, acc_detect2_raw); return seq_printf(s, "%d,0x%X\n", acc_detect2_convert, acc_detect2_raw); @@ -955,9 +965,10 @@ static int ab8500_gpadc_aux1_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - aux1_raw = ab8500_gpadc_read_raw(gpadc, ADC_AUX1); + aux1_raw = ab8500_gpadc_read_raw(gpadc, ADC_AUX1, + avg_sample, trig_edge, trig_timer, conv_type); aux1_convert = ab8500_gpadc_ad_to_voltage(gpadc, ADC_AUX1, - aux1_raw); + aux1_raw); return seq_printf(s, "%d,0x%X\n", aux1_convert, aux1_raw); @@ -983,9 +994,10 @@ static int ab8500_gpadc_aux2_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - aux2_raw = ab8500_gpadc_read_raw(gpadc, ADC_AUX2); + aux2_raw = ab8500_gpadc_read_raw(gpadc, ADC_AUX2, + avg_sample, trig_edge, trig_timer, conv_type); aux2_convert = ab8500_gpadc_ad_to_voltage(gpadc, ADC_AUX2, - aux2_raw); + aux2_raw); return seq_printf(s, "%d,0x%X\n", aux2_convert, aux2_raw); @@ -1011,9 +1023,10 @@ static int ab8500_gpadc_main_bat_v_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - main_bat_v_raw = ab8500_gpadc_read_raw(gpadc, MAIN_BAT_V); + main_bat_v_raw = ab8500_gpadc_read_raw(gpadc, MAIN_BAT_V, + avg_sample, trig_edge, trig_timer, conv_type); main_bat_v_convert = ab8500_gpadc_ad_to_voltage(gpadc, MAIN_BAT_V, - main_bat_v_raw); + main_bat_v_raw); return seq_printf(s, "%d,0x%X\n", main_bat_v_convert, main_bat_v_raw); @@ -1040,9 +1053,10 @@ static int ab8500_gpadc_vbus_v_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - vbus_v_raw = ab8500_gpadc_read_raw(gpadc, VBUS_V); + vbus_v_raw = ab8500_gpadc_read_raw(gpadc, VBUS_V, + avg_sample, trig_edge, trig_timer, conv_type); vbus_v_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBUS_V, - vbus_v_raw); + vbus_v_raw); return seq_printf(s, "%d,0x%X\n", vbus_v_convert, vbus_v_raw); @@ -1068,9 +1082,10 @@ static int ab8500_gpadc_main_charger_c_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - main_charger_c_raw = ab8500_gpadc_read_raw(gpadc, MAIN_CHARGER_C); + main_charger_c_raw = ab8500_gpadc_read_raw(gpadc, MAIN_CHARGER_C, + avg_sample, trig_edge, trig_timer, conv_type); main_charger_c_convert = ab8500_gpadc_ad_to_voltage(gpadc, - MAIN_CHARGER_C, main_charger_c_raw); + MAIN_CHARGER_C, main_charger_c_raw); return seq_printf(s, "%d,0x%X\n", main_charger_c_convert, main_charger_c_raw); @@ -1098,9 +1113,10 @@ static int ab8500_gpadc_usb_charger_c_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - usb_charger_c_raw = ab8500_gpadc_read_raw(gpadc, USB_CHARGER_C); + usb_charger_c_raw = ab8500_gpadc_read_raw(gpadc, USB_CHARGER_C, + avg_sample, trig_edge, trig_timer, conv_type); usb_charger_c_convert = ab8500_gpadc_ad_to_voltage(gpadc, - USB_CHARGER_C, usb_charger_c_raw); + USB_CHARGER_C, usb_charger_c_raw); return seq_printf(s, "%d,0x%X\n", usb_charger_c_convert, usb_charger_c_raw); @@ -1128,9 +1144,10 @@ static int ab8500_gpadc_bk_bat_v_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - bk_bat_v_raw = ab8500_gpadc_read_raw(gpadc, BK_BAT_V); + bk_bat_v_raw = ab8500_gpadc_read_raw(gpadc, BK_BAT_V, + avg_sample, trig_edge, trig_timer, conv_type); bk_bat_v_convert = ab8500_gpadc_ad_to_voltage(gpadc, - BK_BAT_V, bk_bat_v_raw); + BK_BAT_V, bk_bat_v_raw); return seq_printf(s, "%d,0x%X\n", bk_bat_v_convert, bk_bat_v_raw); @@ -1156,9 +1173,10 @@ static int ab8500_gpadc_die_temp_print(struct seq_file *s, void *p) struct ab8500_gpadc *gpadc; gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - die_temp_raw = ab8500_gpadc_read_raw(gpadc, DIE_TEMP); + die_temp_raw = ab8500_gpadc_read_raw(gpadc, DIE_TEMP, + avg_sample, trig_edge, trig_timer, conv_type); die_temp_convert = ab8500_gpadc_ad_to_voltage(gpadc, DIE_TEMP, - die_temp_raw); + die_temp_raw); return seq_printf(s, "%d,0x%X\n", die_temp_convert, die_temp_raw); @@ -1177,6 +1195,208 @@ static const struct file_operations ab8500_gpadc_die_temp_fops = { .owner = THIS_MODULE, }; +static int ab8500_gpadc_avg_sample_print(struct seq_file *s, void *p) +{ + return seq_printf(s, "%d\n", avg_sample); +} + +static int ab8500_gpadc_avg_sample_open(struct inode *inode, struct file *file) +{ + return single_open(file, ab8500_gpadc_avg_sample_print, + inode->i_private); +} + +static ssize_t ab8500_gpadc_avg_sample_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct device *dev = ((struct seq_file *)(file->private_data))->private; + char buf[32]; + int buf_size; + unsigned long user_avg_sample; + int err; + + /* Get userspace string and assure termination */ + buf_size = min(count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + buf[buf_size] = 0; + + err = strict_strtoul(buf, 0, &user_avg_sample); + if (err) + return -EINVAL; + if ((user_avg_sample == SAMPLE_1) || (user_avg_sample == SAMPLE_4) + || (user_avg_sample == SAMPLE_8) + || (user_avg_sample == SAMPLE_16)) { + avg_sample = (u8) user_avg_sample; + } else { + dev_err(dev, "debugfs error input: " + "should be egal to 1, 4, 8 or 16\n"); + return -EINVAL; + } + return buf_size; +} + +static const struct file_operations ab8500_gpadc_avg_sample_fops = { + .open = ab8500_gpadc_avg_sample_open, + .read = seq_read, + .write = ab8500_gpadc_avg_sample_write, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8500_gpadc_trig_edge_print(struct seq_file *s, void *p) +{ + return seq_printf(s, "%d\n", trig_edge); +} + +static int ab8500_gpadc_trig_edge_open(struct inode *inode, struct file *file) +{ + return single_open(file, ab8500_gpadc_trig_edge_print, + inode->i_private); +} + +static ssize_t ab8500_gpadc_trig_edge_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct device *dev = ((struct seq_file *)(file->private_data))->private; + char buf[32]; + int buf_size; + unsigned long user_trig_edge; + int err; + + /* Get userspace string and assure termination */ + buf_size = min(count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + buf[buf_size] = 0; + + err = strict_strtoul(buf, 0, &user_trig_edge); + if (err) + return -EINVAL; + if ((user_trig_edge == RISING_EDGE) + || (user_trig_edge == FALLING_EDGE)) { + trig_edge = (u8) user_trig_edge; + } else { + dev_err(dev, "Wrong input:\n" + "Enter 0. Rising edge\n" + "Enter 1. Falling edge\n"); + return -EINVAL; + } + return buf_size; +} + +static const struct file_operations ab8500_gpadc_trig_edge_fops = { + .open = ab8500_gpadc_trig_edge_open, + .read = seq_read, + .write = ab8500_gpadc_trig_edge_write, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8500_gpadc_trig_timer_print(struct seq_file *s, void *p) +{ + return seq_printf(s, "%d\n", trig_timer); +} + +static int ab8500_gpadc_trig_timer_open(struct inode *inode, struct file *file) +{ + return single_open(file, ab8500_gpadc_trig_timer_print, + inode->i_private); +} + +static ssize_t ab8500_gpadc_trig_timer_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct device *dev = ((struct seq_file *)(file->private_data))->private; + char buf[32]; + int buf_size; + unsigned long user_trig_timer; + int err; + + /* Get userspace string and assure termination */ + buf_size = min(count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + buf[buf_size] = 0; + + err = strict_strtoul(buf, 0, &user_trig_timer); + if (err) + return -EINVAL; + if ((user_trig_timer >= 0) && (user_trig_timer <= 255)) { + trig_timer = (u8) user_trig_timer; + } else { + dev_err(dev, "debugfs error input: " + "should be beetween 0 to 255\n"); + return -EINVAL; + } + return buf_size; +} + +static const struct file_operations ab8500_gpadc_trig_timer_fops = { + .open = ab8500_gpadc_trig_timer_open, + .read = seq_read, + .write = ab8500_gpadc_trig_timer_write, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8500_gpadc_conv_type_print(struct seq_file *s, void *p) +{ + return seq_printf(s, "%d\n", conv_type); +} + +static int ab8500_gpadc_conv_type_open(struct inode *inode, struct file *file) +{ + return single_open(file, ab8500_gpadc_conv_type_print, + inode->i_private); +} + +static ssize_t ab8500_gpadc_conv_type_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct device *dev = ((struct seq_file *)(file->private_data))->private; + char buf[32]; + int buf_size; + unsigned long user_conv_type; + int err; + + /* Get userspace string and assure termination */ + buf_size = min(count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + buf[buf_size] = 0; + + err = strict_strtoul(buf, 0, &user_conv_type); + if (err) + return -EINVAL; + if ((user_conv_type == ADC_SW) + || (user_conv_type == ADC_HW)) { + conv_type = (u8) user_conv_type; + } else { + dev_err(dev, "Wrong input:\n" + "Enter 0. ADC SW conversion\n" + "Enter 1. ADC HW conversion\n"); + return -EINVAL; + } + return buf_size; +} + +static const struct file_operations ab8500_gpadc_conv_type_fops = { + .open = ab8500_gpadc_conv_type_open, + .read = seq_read, + .write = ab8500_gpadc_conv_type_write, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + /* * return length of an ASCII numerical value, 0 is string is not a * numerical value. @@ -1722,6 +1942,26 @@ static int ab8500_debug_probe(struct platform_device *plf) if (!file) goto err; + file = debugfs_create_file("avg_sample", (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_avg_sample_fops); + if (!file) + goto err; + + file = debugfs_create_file("trig_edge", (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_edge_fops); + if (!file) + goto err; + + file = debugfs_create_file("trig_timer", (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_timer_fops); + if (!file) + goto err; + + file = debugfs_create_file("conv_type", (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_conv_type_fops); + if (!file) + goto err; + return 0; err: diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index 7f39479c1afc..8673bf66f7d7 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -55,13 +55,18 @@ #define EN_VTVOUT 0x02 #define EN_GPADC 0x01 #define DIS_GPADC 0x00 -#define SW_AVG_16 0x60 +#define AVG_1 0x00 +#define AVG_4 0x20 +#define AVG_8 0x40 +#define AVG_16 0x60 #define ADC_SW_CONV 0x04 #define EN_ICHAR 0x80 #define BTEMP_PULL_UP 0x08 #define EN_BUF 0x40 #define DIS_ZERO 0x00 #define GPADC_BUSY 0x01 +#define EN_FALLING 0x10 +#define EN_TRIG_EDGE 0x02 /* GPADC constants from AB8500 spec, UM0836 */ #define ADC_RESOLUTION 1024 @@ -116,7 +121,10 @@ struct adc_cal_data { * the completion of gpadc conversion * @ab8500_gpadc_lock: structure of type mutex * @regu: pointer to the struct regulator - * @irq: interrupt number that is used by gpadc + * @irq_sw: interrupt number that is used by gpadc for Sw + * conversion + * @irq_hw: interrupt number that is used by gpadc for Hw + * conversion * @cal_data array of ADC calibration data structs */ struct ab8500_gpadc { @@ -126,7 +134,8 @@ struct ab8500_gpadc { struct completion ab8500_gpadc_complete; struct mutex ab8500_gpadc_lock; struct regulator *regu; - int irq; + int irq_sw; + int irq_hw; struct adc_cal_data cal_data[NBR_CAL_INPUTS]; }; @@ -244,30 +253,35 @@ int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 channel, EXPORT_SYMBOL(ab8500_gpadc_ad_to_voltage); /** - * ab8500_gpadc_convert() - gpadc conversion + * ab8500_gpadc_sw_hw_convert() - gpadc conversion * @channel: analog channel to be converted to digital data + * @avg_sample: number of ADC sample to average + * @trig_egde: selected ADC trig edge + * @trig_timer: selected ADC trigger delay timer + * @conv_type: selected conversion type (HW or SW conversion) * * This function converts the selected analog i/p to digital * data. */ -int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel) +int ab8500_gpadc_sw_hw_convert(struct ab8500_gpadc *gpadc, u8 channel, + u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type) { int ad_value; int voltage; - ad_value = ab8500_gpadc_read_raw(gpadc, channel); - - /* On failure retry a second time */ + ad_value = ab8500_gpadc_read_raw(gpadc, channel, avg_sample, + trig_edge, trig_timer, conv_type); +/* On failure retry a second time */ if (ad_value < 0) - ad_value = ab8500_gpadc_read_raw(gpadc, channel); - - if (ad_value < 0) { - dev_err(gpadc->dev, "GPADC raw value failed ch: %d\n", channel); + ad_value = ab8500_gpadc_read_raw(gpadc, channel, avg_sample, + trig_edge, trig_timer, conv_type); +if (ad_value < 0) { + dev_err(gpadc->dev, "GPADC raw value failed ch: %d\n", + channel); return ad_value; } voltage = ab8500_gpadc_ad_to_voltage(gpadc, channel, ad_value); - if (voltage < 0) dev_err(gpadc->dev, "GPADC to voltage conversion failed ch:" " %d AD: 0x%x\n", channel, ad_value); @@ -279,11 +293,16 @@ EXPORT_SYMBOL(ab8500_gpadc_convert); /** * ab8500_gpadc_read_raw() - gpadc read * @channel: analog channel to be read + * @avg_sample: number of ADC sample to average + * @trig_edge: selected trig edge + * @trig_timer: selected ADC trigger delay timer + * @conv_type: selected conversion type (HW or SW conversion) * - * This function obtains the raw ADC value, this then needs - * to be converted by calling ab8500_gpadc_ad_to_voltage() + * This function obtains the raw ADC value for an hardware conversion, + * this then needs to be converted by calling ab8500_gpadc_ad_to_voltage() */ -int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) +int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, + u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type) { int ret; int looplimit = 0; @@ -293,7 +312,6 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) return -ENODEV; mutex_lock(&gpadc->ab8500_gpadc_lock); - /* Enable VTVout LDO this is required for GPADC */ pm_runtime_get_sync(gpadc->dev); @@ -321,9 +339,29 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) goto out; } - /* Select the channel source and set average samples to 16 */ - ret = abx500_set_register_interruptible(gpadc->dev, AB8500_GPADC, - AB8500_GPADC_CTRL2_REG, (channel | SW_AVG_16)); + /* Select the channel source and set average samples */ + switch (avg_sample) { + case SAMPLE_1: + val = channel | AVG_1; + break; + case SAMPLE_4: + val = channel | AVG_4; + break; + case SAMPLE_8: + val = channel | AVG_8; + break; + default: + val = channel | AVG_16; + break; + + } + + if (conv_type == ADC_HW) + ret = abx500_set_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL3_REG, val); + else + ret = abx500_set_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL2_REG, val); if (ret < 0) { dev_err(gpadc->dev, "gpadc_conversion: set avg samples failed\n"); @@ -335,22 +373,43 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) * charging current sense if it needed, ABB 3.0 needs some special * treatment too. */ + if ((conv_type == ADC_HW) && (trig_edge)) { + ret = abx500_mask_and_set_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_FALLING, EN_FALLING); + + } switch (channel) { case MAIN_CHARGER_C: case USB_CHARGER_C: - ret = abx500_mask_and_set_register_interruptible(gpadc->dev, - AB8500_GPADC, AB8500_GPADC_CTRL1_REG, - EN_BUF | EN_ICHAR, - EN_BUF | EN_ICHAR); - break; - case BTEMP_BALL: - if (!is_ab8500_2p0_or_earlier(gpadc->parent)) { - /* Turn on btemp pull-up on ABB 3.0 */ + if (conv_type == ADC_HW) ret = abx500_mask_and_set_register_interruptible( gpadc->dev, AB8500_GPADC, AB8500_GPADC_CTRL1_REG, - EN_BUF | BTEMP_PULL_UP, - EN_BUF | BTEMP_PULL_UP); + EN_BUF | EN_ICHAR | EN_TRIG_EDGE, + EN_BUF | EN_ICHAR | EN_TRIG_EDGE); + else + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF | EN_ICHAR, + EN_BUF | EN_ICHAR); + break; + case BTEMP_BALL: + if (!is_ab8500_2p0_or_earlier(gpadc->parent)) { + if (conv_type == ADC_HW) + /* Turn on btemp pull-up on ABB 3.0 */ + ret = abx500_mask_and_set_register_interruptible + (gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF | BTEMP_PULL_UP | EN_TRIG_EDGE, + EN_BUF | BTEMP_PULL_UP | EN_TRIG_EDGE); + else + ret = abx500_mask_and_set_register_interruptible + (gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF | BTEMP_PULL_UP, + EN_BUF | BTEMP_PULL_UP); /* * Delay might be needed for ABB8500 cut 3.0, if not, remove @@ -361,8 +420,17 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) } /* Intentional fallthrough */ default: - ret = abx500_mask_and_set_register_interruptible(gpadc->dev, - AB8500_GPADC, AB8500_GPADC_CTRL1_REG, EN_BUF, EN_BUF); + if (conv_type == ADC_HW) + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF | EN_TRIG_EDGE, + EN_BUF | EN_TRIG_EDGE); + else + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, + AB8500_GPADC_CTRL1_REG, EN_BUF, EN_BUF); break; } if (ret < 0) { @@ -371,36 +439,83 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) goto out; } - ret = abx500_mask_and_set_register_interruptible(gpadc->dev, - AB8500_GPADC, AB8500_GPADC_CTRL1_REG, ADC_SW_CONV, ADC_SW_CONV); - if (ret < 0) { - dev_err(gpadc->dev, - "gpadc_conversion: start s/w conversion failed\n"); - goto out; + /* Set trigger delay timer */ + if (conv_type == ADC_HW) { + ret = abx500_set_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_AUTO_TIMER_REG, trig_timer); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: trig timer failed\n"); + goto out; + } + } + + /* Start SW conversion */ + if (conv_type == ADC_SW) { + ret = abx500_mask_and_set_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + ADC_SW_CONV, ADC_SW_CONV); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: start s/w conv failed\n"); + goto out; + } } + /* wait for completion of conversion */ - if (!wait_for_completion_timeout(&gpadc->ab8500_gpadc_complete, - msecs_to_jiffies(CONVERSION_TIME))) { - dev_err(gpadc->dev, - "timeout: didn't receive GPADC conversion interrupt\n"); - ret = -EINVAL; - goto out; + if (conv_type == ADC_HW) { + if (!wait_for_completion_timeout(&gpadc->ab8500_gpadc_complete, + 2*HZ)) { + dev_err(gpadc->dev, + "timeout didn't receive" + " hw GPADC conv interrupt\n"); + ret = -EINVAL; + goto out; + } + } else { + if (!wait_for_completion_timeout(&gpadc->ab8500_gpadc_complete, + msecs_to_jiffies(CONVERSION_TIME))) { + dev_err(gpadc->dev, + "timeout didn't receive" + " sw GPADC conv interrupt\n"); + ret = -EINVAL; + goto out; + } } /* Read the converted RAW data */ - ret = abx500_get_register_interruptible(gpadc->dev, AB8500_GPADC, - AB8500_GPADC_MANDATAL_REG, &low_data); - if (ret < 0) { - dev_err(gpadc->dev, "gpadc_conversion: read low data failed\n"); - goto out; - } + if (conv_type == ADC_HW) { + ret = abx500_get_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_AUTODATAL_REG, &low_data); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: read hw low data failed\n"); + goto out; + } - ret = abx500_get_register_interruptible(gpadc->dev, AB8500_GPADC, - AB8500_GPADC_MANDATAH_REG, &high_data); - if (ret < 0) { - dev_err(gpadc->dev, - "gpadc_conversion: read high data failed\n"); - goto out; + ret = abx500_get_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_AUTODATAH_REG, &high_data); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: read hw high data failed\n"); + goto out; + } + } else { + ret = abx500_get_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_MANDATAL_REG, &low_data); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: read sw low data failed\n"); + goto out; + } + + ret = abx500_get_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8500_GPADC_MANDATAH_REG, &high_data); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: read sw high data failed\n"); + goto out; + } } /* Disable GPADC */ @@ -411,6 +526,7 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) goto out; } + /* Disable VTVout LDO this is required for GPADC */ pm_runtime_mark_last_busy(gpadc->dev); pm_runtime_put_autosuspend(gpadc->dev); @@ -427,9 +543,7 @@ out: */ (void) abx500_set_register_interruptible(gpadc->dev, AB8500_GPADC, AB8500_GPADC_CTRL1_REG, DIS_GPADC); - pm_runtime_put(gpadc->dev); - mutex_unlock(&gpadc->ab8500_gpadc_lock); dev_err(gpadc->dev, "gpadc_conversion: Failed to AD convert channel %d\n", channel); @@ -438,16 +552,16 @@ out: EXPORT_SYMBOL(ab8500_gpadc_read_raw); /** - * ab8500_bm_gpswadcconvend_handler() - isr for s/w gpadc conversion completion + * ab8500_bm_gpadcconvend_handler() - isr for gpadc conversion completion * @irq: irq number * @data: pointer to the data passed during request irq * - * This is a interrupt service routine for s/w gpadc conversion completion. + * This is a interrupt service routine for gpadc conversion completion. * Notifies the gpadc completion is completed and the converted raw value * can be read from the registers. * Returns IRQ status(IRQ_HANDLED) */ -static irqreturn_t ab8500_bm_gpswadcconvend_handler(int irq, void *_gpadc) +static irqreturn_t ab8500_bm_gpadcconvend_handler(int irq, void *_gpadc) { struct ab8500_gpadc *gpadc = _gpadc; @@ -646,11 +760,19 @@ static int ab8500_gpadc_probe(struct platform_device *pdev) return -ENOMEM; } - gpadc->irq = platform_get_irq_byname(pdev, "SW_CONV_END"); - if (gpadc->irq < 0) { - dev_err(&pdev->dev, "failed to get platform irq-%d\n", - gpadc->irq); - ret = gpadc->irq; + gpadc->irq_sw = platform_get_irq_byname(pdev, "SW_CONV_END"); + if (gpadc->irq_sw < 0) { + dev_err(gpadc->dev, "failed to get platform irq-%d\n", + gpadc->irq_sw); + ret = gpadc->irq_sw; + goto fail; + } + + gpadc->irq_hw = platform_get_irq_byname(pdev, "HW_CONV_END"); + if (gpadc->irq_hw < 0) { + dev_err(gpadc->dev, "failed to get platform irq-%d\n", + gpadc->irq_hw); + ret = gpadc->irq_hw; goto fail; } @@ -661,14 +783,21 @@ static int ab8500_gpadc_probe(struct platform_device *pdev) /* Initialize completion used to notify completion of conversion */ init_completion(&gpadc->ab8500_gpadc_complete); - /* Register interrupt - SwAdcComplete */ - ret = request_threaded_irq(gpadc->irq, NULL, - ab8500_bm_gpswadcconvend_handler, - IRQF_ONESHOT | IRQF_NO_SUSPEND | IRQF_SHARED, - "ab8500-gpadc", gpadc); + /* Register interrupts */ + ret = request_threaded_irq(gpadc->irq_sw, NULL, + ab8500_bm_gpadcconvend_handler, + IRQF_NO_SUSPEND | IRQF_SHARED, "ab8500-gpadc-sw", gpadc); + if (ret < 0) { + dev_err(gpadc->dev, "Failed to register interrupt, irq: %d\n", + gpadc->irq_sw); + goto fail; + } + ret = request_threaded_irq(gpadc->irq_hw, NULL, + ab8500_bm_gpadcconvend_handler, + IRQF_NO_SUSPEND | IRQF_SHARED, "ab8500-gpadc-hw", gpadc); if (ret < 0) { dev_err(gpadc->dev, "Failed to register interrupt, irq: %d\n", - gpadc->irq); + gpadc->irq_hw); goto fail; } @@ -694,7 +823,8 @@ static int ab8500_gpadc_probe(struct platform_device *pdev) dev_dbg(gpadc->dev, "probe success\n"); return 0; fail_irq: - free_irq(gpadc->irq, gpadc); + free_irq(gpadc->irq_sw, gpadc); + free_irq(gpadc->irq_hw, gpadc); fail: kfree(gpadc); gpadc = NULL; @@ -708,7 +838,8 @@ static int ab8500_gpadc_remove(struct platform_device *pdev) /* remove this gpadc entry from the list */ list_del(&gpadc->node); /* remove interrupt - completion of Sw ADC conversion */ - free_irq(gpadc->irq, gpadc); + free_irq(gpadc->irq_sw, gpadc); + free_irq(gpadc->irq_hw, gpadc); pm_runtime_get_sync(gpadc->dev); pm_runtime_disable(gpadc->dev); @@ -757,6 +888,7 @@ subsys_initcall_sync(ab8500_gpadc_init); module_exit(ab8500_gpadc_exit); MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Arun R Murthy, Daniel Willerud, Johan Palsson"); +MODULE_AUTHOR("Arun R Murthy, Daniel Willerud, Johan Palsson," + "M'boumba Cedric Madianga"); MODULE_ALIAS("platform:ab8500_gpadc"); MODULE_DESCRIPTION("AB8500 GPADC driver"); diff --git a/include/linux/mfd/abx500/ab8500-gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h index 252966769d93..7694e7ab1880 100644 --- a/include/linux/mfd/abx500/ab8500-gpadc.h +++ b/include/linux/mfd/abx500/ab8500-gpadc.h @@ -4,12 +4,14 @@ * * Author: Arun R Murthy * Author: Daniel Willerud + * Author: M'boumba Cedric Madianga */ #ifndef _AB8500_GPADC_H #define _AB8500_GPADC_H -/* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2) */ +/* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2 + * and ADCHwSel[4:0] in GPADCCtrl3 ) */ #define BAT_CTRL 0x01 #define BTEMP_BALL 0x02 #define MAIN_CHARGER_V 0x03 @@ -24,12 +26,32 @@ #define BK_BAT_V 0x0C #define DIE_TEMP 0x0D +#define SAMPLE_1 1 +#define SAMPLE_4 4 +#define SAMPLE_8 8 +#define SAMPLE_16 16 +#define RISING_EDGE 0 +#define FALLING_EDGE 1 + +/* Arbitrary ADC conversion type constants */ +#define ADC_SW 0 +#define ADC_HW 1 + + struct ab8500_gpadc; struct ab8500_gpadc *ab8500_gpadc_get(char *name); -int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel); -int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel); +int ab8500_gpadc_sw_hw_convert(struct ab8500_gpadc *gpadc, u8 channel, + u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type); +static inline int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel) +{ + return ab8500_gpadc_sw_hw_convert(gpadc, channel, + SAMPLE_16, 0, 0, ADC_SW); +} + +int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, + u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type); int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, - u8 channel, int ad_value); + u8 channel, int ad_value); #endif /* _AB8500_GPADC_H */ -- cgit From 022ab148d28e8466e45d28552224e3029f1cccd8 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 16 Feb 2013 10:25:07 +0100 Subject: pinctrl: Declare operation structures as const The pinconf, pinctrl and pinmux operation structures hold function pointers that are never modified. Declare them as const. Signed-off-by: Laurent Pinchart Signed-off-by: Linus Walleij --- drivers/pinctrl/devicetree.c | 4 ++-- drivers/pinctrl/mvebu/pinctrl-mvebu.c | 6 +++--- drivers/pinctrl/pinconf.c | 2 +- drivers/pinctrl/pinctrl-abx500.c | 6 +++--- drivers/pinctrl/pinctrl-at91.c | 6 +++--- drivers/pinctrl/pinctrl-bcm2835.c | 6 +++--- drivers/pinctrl/pinctrl-exynos5440.c | 6 +++--- drivers/pinctrl/pinctrl-falcon.c | 2 +- drivers/pinctrl/pinctrl-imx.c | 6 +++--- drivers/pinctrl/pinctrl-lantiq.c | 4 ++-- drivers/pinctrl/pinctrl-mxs.c | 6 +++--- drivers/pinctrl/pinctrl-nomadik.c | 6 +++--- drivers/pinctrl/pinctrl-pxa3xx.c | 4 ++-- drivers/pinctrl/pinctrl-samsung.c | 6 +++--- drivers/pinctrl/pinctrl-single.c | 6 +++--- drivers/pinctrl/pinctrl-sirf.c | 4 ++-- drivers/pinctrl/pinctrl-sunxi.c | 6 +++--- drivers/pinctrl/pinctrl-tegra.c | 6 +++--- drivers/pinctrl/pinctrl-u300.c | 6 +++--- drivers/pinctrl/pinctrl-xway.c | 2 +- drivers/pinctrl/spear/pinctrl-spear.c | 4 ++-- include/linux/pinctrl/pinctrl.h | 6 +++--- 22 files changed, 55 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index fd40a11ad645..c7b7cb477129 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -41,7 +41,7 @@ static void dt_free_map(struct pinctrl_dev *pctldev, struct pinctrl_map *map, unsigned num_maps) { if (pctldev) { - struct pinctrl_ops *ops = pctldev->desc->pctlops; + const struct pinctrl_ops *ops = pctldev->desc->pctlops; ops->dt_free_map(pctldev, map, num_maps); } else { /* There is no pctldev for PIN_MAP_TYPE_DUMMY_STATE */ @@ -122,7 +122,7 @@ static int dt_to_map_one_config(struct pinctrl *p, const char *statename, { struct device_node *np_pctldev; struct pinctrl_dev *pctldev; - struct pinctrl_ops *ops; + const struct pinctrl_ops *ops; int ret; struct pinctrl_map *map; unsigned num_maps; diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c index c689c04a4f52..61149914882d 100644 --- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c +++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c @@ -263,7 +263,7 @@ static void mvebu_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, return; } -static struct pinconf_ops mvebu_pinconf_ops = { +static const struct pinconf_ops mvebu_pinconf_ops = { .pin_config_group_get = mvebu_pinconf_group_get, .pin_config_group_set = mvebu_pinconf_group_set, .pin_config_group_dbg_show = mvebu_pinconf_group_dbg_show, @@ -369,7 +369,7 @@ static int mvebu_pinmux_gpio_set_direction(struct pinctrl_dev *pctldev, return -ENOTSUPP; } -static struct pinmux_ops mvebu_pinmux_ops = { +static const struct pinmux_ops mvebu_pinmux_ops = { .get_functions_count = mvebu_pinmux_get_funcs_count, .get_function_name = mvebu_pinmux_get_func_name, .get_function_groups = mvebu_pinmux_get_groups, @@ -470,7 +470,7 @@ static void mvebu_pinctrl_dt_free_map(struct pinctrl_dev *pctldev, kfree(map); } -static struct pinctrl_ops mvebu_pinctrl_ops = { +static const struct pinctrl_ops mvebu_pinctrl_ops = { .get_groups_count = mvebu_pinctrl_get_groups_count, .get_group_name = mvebu_pinctrl_get_group_name, .get_group_pins = mvebu_pinctrl_get_group_pins, diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index ac8d382a79bb..8aefd28c797e 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -670,7 +670,7 @@ static int pinconf_dbg_config_print(struct seq_file *s, void *d) struct pinctrl_maps *maps_node; struct pinctrl_map const *map; struct pinctrl_dev *pctldev = NULL; - struct pinconf_ops *confops = NULL; + const struct pinconf_ops *confops = NULL; int i, j; bool found = false; diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c index caecdd373061..169d72c59a7b 100644 --- a/drivers/pinctrl/pinctrl-abx500.c +++ b/drivers/pinctrl/pinctrl-abx500.c @@ -656,7 +656,7 @@ static void abx500_gpio_disable_free(struct pinctrl_dev *pctldev, { } -static struct pinmux_ops abx500_pinmux_ops = { +static const struct pinmux_ops abx500_pinmux_ops = { .get_functions_count = abx500_pmx_get_funcs_cnt, .get_function_name = abx500_pmx_get_func_name, .get_function_groups = abx500_pmx_get_func_groups, @@ -704,7 +704,7 @@ static void abx500_pin_dbg_show(struct pinctrl_dev *pctldev, chip->base + offset - 1); } -static struct pinctrl_ops abx500_pinctrl_ops = { +static const struct pinctrl_ops abx500_pinctrl_ops = { .get_groups_count = abx500_get_groups_cnt, .get_group_name = abx500_get_group_name, .get_group_pins = abx500_get_group_pins, @@ -778,7 +778,7 @@ int abx500_pin_config_set(struct pinctrl_dev *pctldev, return ret; } -static struct pinconf_ops abx500_pinconf_ops = { +static const struct pinconf_ops abx500_pinconf_ops = { .pin_config_get = abx500_pin_config_get, .pin_config_set = abx500_pin_config_set, }; diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 75933a6aa828..e50fa5f863e1 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -294,7 +294,7 @@ static void at91_dt_free_map(struct pinctrl_dev *pctldev, { } -static struct pinctrl_ops at91_pctrl_ops = { +static const struct pinctrl_ops at91_pctrl_ops = { .get_groups_count = at91_get_groups_count, .get_group_name = at91_get_group_name, .get_group_pins = at91_get_group_pins, @@ -696,7 +696,7 @@ static void at91_gpio_disable_free(struct pinctrl_dev *pctldev, /* Set the pin to some default state, GPIO is usually default */ } -static struct pinmux_ops at91_pmx_ops = { +static const struct pinmux_ops at91_pmx_ops = { .get_functions_count = at91_pmx_get_funcs_count, .get_function_name = at91_pmx_get_func_name, .get_function_groups = at91_pmx_get_groups, @@ -776,7 +776,7 @@ static void at91_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, { } -static struct pinconf_ops at91_pinconf_ops = { +static const struct pinconf_ops at91_pinconf_ops = { .pin_config_get = at91_pinconf_get, .pin_config_set = at91_pinconf_set, .pin_config_dbg_show = at91_pinconf_dbg_show, diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c index 4eb6d2c4e4df..f28d4b08771a 100644 --- a/drivers/pinctrl/pinctrl-bcm2835.c +++ b/drivers/pinctrl/pinctrl-bcm2835.c @@ -795,7 +795,7 @@ out: return err; } -static struct pinctrl_ops bcm2835_pctl_ops = { +static const struct pinctrl_ops bcm2835_pctl_ops = { .get_groups_count = bcm2835_pctl_get_groups_count, .get_group_name = bcm2835_pctl_get_group_name, .get_group_pins = bcm2835_pctl_get_group_pins, @@ -872,7 +872,7 @@ static int bcm2835_pmx_gpio_set_direction(struct pinctrl_dev *pctldev, return 0; } -static struct pinmux_ops bcm2835_pmx_ops = { +static const struct pinmux_ops bcm2835_pmx_ops = { .get_functions_count = bcm2835_pmx_get_functions_count, .get_function_name = bcm2835_pmx_get_function_name, .get_function_groups = bcm2835_pmx_get_function_groups, @@ -916,7 +916,7 @@ static int bcm2835_pinconf_set(struct pinctrl_dev *pctldev, return 0; } -static struct pinconf_ops bcm2835_pinconf_ops = { +static const struct pinconf_ops bcm2835_pinconf_ops = { .pin_config_get = bcm2835_pinconf_get, .pin_config_set = bcm2835_pinconf_set, }; diff --git a/drivers/pinctrl/pinctrl-exynos5440.c b/drivers/pinctrl/pinctrl-exynos5440.c index 1376eb7305db..169ea3e5f777 100644 --- a/drivers/pinctrl/pinctrl-exynos5440.c +++ b/drivers/pinctrl/pinctrl-exynos5440.c @@ -286,7 +286,7 @@ static void exynos5440_dt_free_map(struct pinctrl_dev *pctldev, } /* list of pinctrl callbacks for the pinctrl core */ -static struct pinctrl_ops exynos5440_pctrl_ops = { +static const struct pinctrl_ops exynos5440_pctrl_ops = { .get_groups_count = exynos5440_get_group_count, .get_group_name = exynos5440_get_group_name, .get_group_pins = exynos5440_get_group_pins, @@ -374,7 +374,7 @@ static int exynos5440_pinmux_gpio_set_direction(struct pinctrl_dev *pctldev, } /* list of pinmux callbacks for the pinmux vertical in pinctrl core */ -static struct pinmux_ops exynos5440_pinmux_ops = { +static const struct pinmux_ops exynos5440_pinmux_ops = { .get_functions_count = exynos5440_get_functions_count, .get_function_name = exynos5440_pinmux_get_fname, .get_function_groups = exynos5440_pinmux_get_groups, @@ -523,7 +523,7 @@ static int exynos5440_pinconf_group_get(struct pinctrl_dev *pctldev, } /* list of pinconfig callbacks for pinconfig vertical in the pinctrl code */ -static struct pinconf_ops exynos5440_pinconf_ops = { +static const struct pinconf_ops exynos5440_pinconf_ops = { .pin_config_get = exynos5440_pinconf_get, .pin_config_set = exynos5440_pinconf_set, .pin_config_group_get = exynos5440_pinconf_group_get, diff --git a/drivers/pinctrl/pinctrl-falcon.c b/drivers/pinctrl/pinctrl-falcon.c index af97a1f90007..f9b2a1d4854f 100644 --- a/drivers/pinctrl/pinctrl-falcon.c +++ b/drivers/pinctrl/pinctrl-falcon.c @@ -353,7 +353,7 @@ static void falcon_pinconf_group_dbg_show(struct pinctrl_dev *pctrldev, { } -static struct pinconf_ops falcon_pinconf_ops = { +static const struct pinconf_ops falcon_pinconf_ops = { .pin_config_get = falcon_pinconf_get, .pin_config_set = falcon_pinconf_set, .pin_config_group_get = falcon_pinconf_group_get, diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index 4cebb9c6c5c5..0ef190449eab 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -207,7 +207,7 @@ static void imx_dt_free_map(struct pinctrl_dev *pctldev, kfree(map); } -static struct pinctrl_ops imx_pctrl_ops = { +static const struct pinctrl_ops imx_pctrl_ops = { .get_groups_count = imx_get_groups_count, .get_group_name = imx_get_group_name, .get_group_pins = imx_get_group_pins, @@ -299,7 +299,7 @@ static int imx_pmx_get_groups(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -static struct pinmux_ops imx_pmx_ops = { +static const struct pinmux_ops imx_pmx_ops = { .get_functions_count = imx_pmx_get_funcs_count, .get_function_name = imx_pmx_get_func_name, .get_function_groups = imx_pmx_get_groups, @@ -397,7 +397,7 @@ static void imx_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, } } -static struct pinconf_ops imx_pinconf_ops = { +static const struct pinconf_ops imx_pinconf_ops = { .pin_config_get = imx_pinconf_get, .pin_config_set = imx_pinconf_set, .pin_config_dbg_show = imx_pinconf_dbg_show, diff --git a/drivers/pinctrl/pinctrl-lantiq.c b/drivers/pinctrl/pinctrl-lantiq.c index a70384611351..615c5002b757 100644 --- a/drivers/pinctrl/pinctrl-lantiq.c +++ b/drivers/pinctrl/pinctrl-lantiq.c @@ -169,7 +169,7 @@ static int ltq_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev, return 0; } -static struct pinctrl_ops ltq_pctrl_ops = { +static const struct pinctrl_ops ltq_pctrl_ops = { .get_groups_count = ltq_get_group_count, .get_group_name = ltq_get_group_name, .get_group_pins = ltq_get_group_pins, @@ -311,7 +311,7 @@ static int ltq_pmx_gpio_request_enable(struct pinctrl_dev *pctrldev, return info->apply_mux(pctrldev, mfp, pin_func); } -static struct pinmux_ops ltq_pmx_ops = { +static const struct pinmux_ops ltq_pmx_ops = { .get_functions_count = ltq_pmx_func_count, .get_function_name = ltq_pmx_func_name, .get_function_groups = ltq_pmx_get_groups, diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c index 23af9f1f9c35..b45c4eb35798 100644 --- a/drivers/pinctrl/pinctrl-mxs.c +++ b/drivers/pinctrl/pinctrl-mxs.c @@ -158,7 +158,7 @@ static void mxs_dt_free_map(struct pinctrl_dev *pctldev, kfree(map); } -static struct pinctrl_ops mxs_pinctrl_ops = { +static const struct pinctrl_ops mxs_pinctrl_ops = { .get_groups_count = mxs_get_groups_count, .get_group_name = mxs_get_group_name, .get_group_pins = mxs_get_group_pins, @@ -219,7 +219,7 @@ static int mxs_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -static struct pinmux_ops mxs_pinmux_ops = { +static const struct pinmux_ops mxs_pinmux_ops = { .get_functions_count = mxs_pinctrl_get_funcs_count, .get_function_name = mxs_pinctrl_get_func_name, .get_function_groups = mxs_pinctrl_get_func_groups, @@ -319,7 +319,7 @@ static void mxs_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, seq_printf(s, "0x%lx", config); } -static struct pinconf_ops mxs_pinconf_ops = { +static const struct pinconf_ops mxs_pinconf_ops = { .pin_config_get = mxs_pinconf_get, .pin_config_set = mxs_pinconf_set, .pin_config_group_get = mxs_pinconf_group_get, diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c index 36d20293de5c..2328baaa86bf 100644 --- a/drivers/pinctrl/pinctrl-nomadik.c +++ b/drivers/pinctrl/pinctrl-nomadik.c @@ -1764,7 +1764,7 @@ int nmk_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev, return 0; } -static struct pinctrl_ops nmk_pinctrl_ops = { +static const struct pinctrl_ops nmk_pinctrl_ops = { .get_groups_count = nmk_get_groups_cnt, .get_group_name = nmk_get_group_name, .get_group_pins = nmk_get_group_pins, @@ -1975,7 +1975,7 @@ static void nmk_gpio_disable_free(struct pinctrl_dev *pctldev, /* Set the pin to some default state, GPIO is usually default */ } -static struct pinmux_ops nmk_pinmux_ops = { +static const struct pinmux_ops nmk_pinmux_ops = { .get_functions_count = nmk_pmx_get_funcs_cnt, .get_function_name = nmk_pmx_get_func_name, .get_function_groups = nmk_pmx_get_func_groups, @@ -2089,7 +2089,7 @@ static int nmk_pin_config_set(struct pinctrl_dev *pctldev, unsigned pin, return 0; } -static struct pinconf_ops nmk_pinconf_ops = { +static const struct pinconf_ops nmk_pinconf_ops = { .pin_config_get = nmk_pin_config_get, .pin_config_set = nmk_pin_config_set, }; diff --git a/drivers/pinctrl/pinctrl-pxa3xx.c b/drivers/pinctrl/pinctrl-pxa3xx.c index 1f49bb02a6af..05e11de1d144 100644 --- a/drivers/pinctrl/pinctrl-pxa3xx.c +++ b/drivers/pinctrl/pinctrl-pxa3xx.c @@ -53,7 +53,7 @@ static int pxa3xx_get_group_pins(struct pinctrl_dev *pctrldev, return 0; } -static struct pinctrl_ops pxa3xx_pctrl_ops = { +static const struct pinctrl_ops pxa3xx_pctrl_ops = { .get_groups_count = pxa3xx_get_groups_count, .get_group_name = pxa3xx_get_group_name, .get_group_pins = pxa3xx_get_group_pins, @@ -161,7 +161,7 @@ static int pxa3xx_pmx_request_gpio(struct pinctrl_dev *pctrldev, return 0; } -static struct pinmux_ops pxa3xx_pmx_ops = { +static const struct pinmux_ops pxa3xx_pmx_ops = { .get_functions_count = pxa3xx_pmx_get_funcs_count, .get_function_name = pxa3xx_pmx_get_func_name, .get_function_groups = pxa3xx_pmx_get_groups, diff --git a/drivers/pinctrl/pinctrl-samsung.c b/drivers/pinctrl/pinctrl-samsung.c index f206df175656..3475b92b24a4 100644 --- a/drivers/pinctrl/pinctrl-samsung.c +++ b/drivers/pinctrl/pinctrl-samsung.c @@ -214,7 +214,7 @@ static void samsung_dt_free_map(struct pinctrl_dev *pctldev, } /* list of pinctrl callbacks for the pinctrl core */ -static struct pinctrl_ops samsung_pctrl_ops = { +static const struct pinctrl_ops samsung_pctrl_ops = { .get_groups_count = samsung_get_group_count, .get_group_name = samsung_get_group_name, .get_group_pins = samsung_get_group_pins, @@ -357,7 +357,7 @@ static int samsung_pinmux_gpio_set_direction(struct pinctrl_dev *pctldev, } /* list of pinmux callbacks for the pinmux vertical in pinctrl core */ -static struct pinmux_ops samsung_pinmux_ops = { +static const struct pinmux_ops samsung_pinmux_ops = { .get_functions_count = samsung_get_functions_count, .get_function_name = samsung_pinmux_get_fname, .get_function_groups = samsung_pinmux_get_groups, @@ -468,7 +468,7 @@ static int samsung_pinconf_group_get(struct pinctrl_dev *pctldev, } /* list of pinconfig callbacks for pinconfig vertical in the pinctrl code */ -static struct pinconf_ops samsung_pinconf_ops = { +static const struct pinconf_ops samsung_pinconf_ops = { .pin_config_get = samsung_pinconf_get, .pin_config_set = samsung_pinconf_set, .pin_config_group_get = samsung_pinconf_group_get, diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 5c32e880bcb2..0c0e2da9d880 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -270,7 +270,7 @@ static int pcs_dt_node_to_map(struct pinctrl_dev *pctldev, struct device_node *np_config, struct pinctrl_map **map, unsigned *num_maps); -static struct pinctrl_ops pcs_pinctrl_ops = { +static const struct pinctrl_ops pcs_pinctrl_ops = { .get_groups_count = pcs_get_groups_count, .get_group_name = pcs_get_group_name, .get_group_pins = pcs_get_group_pins, @@ -408,7 +408,7 @@ static int pcs_request_gpio(struct pinctrl_dev *pctldev, return -ENOTSUPP; } -static struct pinmux_ops pcs_pinmux_ops = { +static const struct pinmux_ops pcs_pinmux_ops = { .get_functions_count = pcs_get_functions_count, .get_function_name = pcs_get_function_name, .get_function_groups = pcs_get_function_groups, @@ -451,7 +451,7 @@ static void pcs_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, { } -static struct pinconf_ops pcs_pinconf_ops = { +static const struct pinconf_ops pcs_pinconf_ops = { .pin_config_get = pcs_pinconf_get, .pin_config_set = pcs_pinconf_set, .pin_config_group_get = pcs_pinconf_group_get, diff --git a/drivers/pinctrl/pinctrl-sirf.c b/drivers/pinctrl/pinctrl-sirf.c index d02498b30c6e..0990a721758e 100644 --- a/drivers/pinctrl/pinctrl-sirf.c +++ b/drivers/pinctrl/pinctrl-sirf.c @@ -979,7 +979,7 @@ static void sirfsoc_dt_free_map(struct pinctrl_dev *pctldev, kfree(map); } -static struct pinctrl_ops sirfsoc_pctrl_ops = { +static const struct pinctrl_ops sirfsoc_pctrl_ops = { .get_groups_count = sirfsoc_get_groups_count, .get_group_name = sirfsoc_get_group_name, .get_group_pins = sirfsoc_get_group_pins, @@ -1181,7 +1181,7 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev, return 0; } -static struct pinmux_ops sirfsoc_pinmux_ops = { +static const struct pinmux_ops sirfsoc_pinmux_ops = { .enable = sirfsoc_pinmux_enable, .disable = sirfsoc_pinmux_disable, .get_functions_count = sirfsoc_pinmux_get_funcs_count, diff --git a/drivers/pinctrl/pinctrl-sunxi.c b/drivers/pinctrl/pinctrl-sunxi.c index 80b11e3415bc..46b8f2d4f0a5 100644 --- a/drivers/pinctrl/pinctrl-sunxi.c +++ b/drivers/pinctrl/pinctrl-sunxi.c @@ -1029,7 +1029,7 @@ static void sunxi_pctrl_dt_free_map(struct pinctrl_dev *pctldev, kfree(map); } -static struct pinctrl_ops sunxi_pctrl_ops = { +static const struct pinctrl_ops sunxi_pctrl_ops = { .dt_node_to_map = sunxi_pctrl_dt_node_to_map, .dt_free_map = sunxi_pctrl_dt_free_map, .get_groups_count = sunxi_pctrl_get_groups_count, @@ -1098,7 +1098,7 @@ static int sunxi_pconf_group_set(struct pinctrl_dev *pctldev, return 0; } -static struct pinconf_ops sunxi_pconf_ops = { +static const struct pinconf_ops sunxi_pconf_ops = { .pin_config_group_get = sunxi_pconf_group_get, .pin_config_group_set = sunxi_pconf_group_set, }; @@ -1204,7 +1204,7 @@ error: return ret; } -static struct pinmux_ops sunxi_pmx_ops = { +static const struct pinmux_ops sunxi_pmx_ops = { .get_functions_count = sunxi_pmx_get_funcs_cnt, .get_function_name = sunxi_pmx_get_func_name, .get_function_groups = sunxi_pmx_get_func_groups, diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c index f195d77a3572..2fa9bc6cd7ab 100644 --- a/drivers/pinctrl/pinctrl-tegra.c +++ b/drivers/pinctrl/pinctrl-tegra.c @@ -316,7 +316,7 @@ static int tegra_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev, return 0; } -static struct pinctrl_ops tegra_pinctrl_ops = { +static const struct pinctrl_ops tegra_pinctrl_ops = { .get_groups_count = tegra_pinctrl_get_groups_count, .get_group_name = tegra_pinctrl_get_group_name, .get_group_pins = tegra_pinctrl_get_group_pins, @@ -401,7 +401,7 @@ static void tegra_pinctrl_disable(struct pinctrl_dev *pctldev, pmx_writel(pmx, val, g->mux_bank, g->mux_reg); } -static struct pinmux_ops tegra_pinmux_ops = { +static const struct pinmux_ops tegra_pinmux_ops = { .get_functions_count = tegra_pinctrl_get_funcs_count, .get_function_name = tegra_pinctrl_get_func_name, .get_function_groups = tegra_pinctrl_get_func_groups, @@ -676,7 +676,7 @@ static void tegra_pinconf_config_dbg_show(struct pinctrl_dev *pctldev, } #endif -static struct pinconf_ops tegra_pinconf_ops = { +static const struct pinconf_ops tegra_pinconf_ops = { .pin_config_get = tegra_pinconf_get, .pin_config_set = tegra_pinconf_set, .pin_config_group_get = tegra_pinconf_group_get, diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c index 2b5772550836..6a3a7503e6a0 100644 --- a/drivers/pinctrl/pinctrl-u300.c +++ b/drivers/pinctrl/pinctrl-u300.c @@ -860,7 +860,7 @@ static void u300_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, seq_printf(s, " " DRIVER_NAME); } -static struct pinctrl_ops u300_pctrl_ops = { +static const struct pinctrl_ops u300_pctrl_ops = { .get_groups_count = u300_get_groups_count, .get_group_name = u300_get_group_name, .get_group_pins = u300_get_group_pins, @@ -1003,7 +1003,7 @@ static int u300_pmx_get_groups(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -static struct pinmux_ops u300_pmx_ops = { +static const struct pinmux_ops u300_pmx_ops = { .get_functions_count = u300_pmx_get_funcs_count, .get_function_name = u300_pmx_get_func_name, .get_function_groups = u300_pmx_get_groups, @@ -1046,7 +1046,7 @@ static int u300_pin_config_set(struct pinctrl_dev *pctldev, unsigned pin, return 0; } -static struct pinconf_ops u300_pconf_ops = { +static const struct pinconf_ops u300_pconf_ops = { .is_generic = true, .pin_config_get = u300_pin_config_get, .pin_config_set = u300_pin_config_set, diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index 068224efa6fa..f2977cff8366 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -553,7 +553,7 @@ int xway_pinconf_group_set(struct pinctrl_dev *pctldev, return ret; } -static struct pinconf_ops xway_pinconf_ops = { +static const struct pinconf_ops xway_pinconf_ops = { .pin_config_get = xway_pinconf_get, .pin_config_set = xway_pinconf_set, .pin_config_group_set = xway_pinconf_group_set, diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c index 6a7dae70db08..116da0412c4b 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.c +++ b/drivers/pinctrl/spear/pinctrl-spear.c @@ -198,7 +198,7 @@ static void spear_pinctrl_dt_free_map(struct pinctrl_dev *pctldev, kfree(map); } -static struct pinctrl_ops spear_pinctrl_ops = { +static const struct pinctrl_ops spear_pinctrl_ops = { .get_groups_count = spear_pinctrl_get_groups_cnt, .get_group_name = spear_pinctrl_get_group_name, .get_group_pins = spear_pinctrl_get_group_pins, @@ -340,7 +340,7 @@ static void gpio_disable_free(struct pinctrl_dev *pctldev, gpio_request_endisable(pctldev, range, offset, false); } -static struct pinmux_ops spear_pinmux_ops = { +static const struct pinmux_ops spear_pinmux_ops = { .get_functions_count = spear_pinctrl_get_funcs_count, .get_function_name = spear_pinctrl_get_func_name, .get_function_groups = spear_pinctrl_get_func_groups, diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 778804df293f..2c2a9e8d8578 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -118,9 +118,9 @@ struct pinctrl_desc { const char *name; struct pinctrl_pin_desc const *pins; unsigned int npins; - struct pinctrl_ops *pctlops; - struct pinmux_ops *pmxops; - struct pinconf_ops *confops; + const struct pinctrl_ops *pctlops; + const struct pinmux_ops *pmxops; + const struct pinconf_ops *confops; struct module *owner; }; -- cgit From 3e1a498f2728476535571d270081a17fdfceaf26 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 25 Feb 2013 14:57:35 +0000 Subject: mfd: ab8500-core: Add Interrupt support for ab8540 ITSource/ITLatch 7, 8, 9 and 10 don't exist on AB8540. This patch replaces them with '-1' in the interrupt list, and handles the '-1' in the code accordingly. Signed-off-by: Lee Jones Acked-by: Samuel Ortiz --- drivers/mfd/ab8500-core.c | 50 +++++++++++++++++++++++++++++++++------ include/linux/mfd/abx500/ab8500.h | 1 + 2 files changed, 44 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index 23db4fcea496..0ba08a26cf73 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -103,8 +103,10 @@ #define AB8500_IT_LATCHHIER1_REG 0x60 #define AB8500_IT_LATCHHIER2_REG 0x61 #define AB8500_IT_LATCHHIER3_REG 0x62 +#define AB8540_IT_LATCHHIER4_REG 0x63 #define AB8500_IT_LATCHHIER_NUM 3 +#define AB8540_IT_LATCHHIER_NUM 4 #define AB8500_REV_REG 0x80 #define AB8500_IC_NAME_REG 0x82 @@ -143,6 +145,12 @@ static const int ab9540_irq_regoffset[AB9540_NUM_IRQ_REGS] = { 0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 18, 19, 20, 21, 12, 13, 24, 5, 22, 23 }; +/* AB8540 support */ +static const int ab8540_irq_regoffset[AB8540_NUM_IRQ_REGS] = { + 0, 1, 2, 3, 4, -1, -1, -1, -1, 11, 18, 19, 20, 21, 12, 13, 24, 5, 22, 23, + 25, 26, 27, 28, 29, 30, 31, +}; + static const char ab8500_version_str[][7] = { [AB8500_VERSION_AB8500] = "AB8500", [AB8500_VERSION_AB8505] = "AB8505", @@ -360,6 +368,9 @@ static void ab8500_irq_sync_unlock(struct irq_data *data) is_ab8500_1p1_or_earlier(ab8500)) continue; + if (ab8500->irq_reg_offset[i] < 0) + continue; + ab8500->oldmask[i] = new; reg = AB8500_IT_MASK1_REG + ab8500->irq_reg_offset[i]; @@ -431,6 +442,18 @@ static struct irq_chip ab8500_irq_chip = { .irq_set_type = ab8500_irq_set_type, }; +static void update_latch_offset(u8 *offset, int i) +{ + /* Fix inconsistent ITFromLatch25 bit mapping... */ + if (unlikely(*offset == 17)) + *offset = 24; + /* Fix inconsistent ab8540 bit mapping... */ + if (unlikely(*offset == 16)) + *offset = 25; + if ((i==3) && (*offset >= 24)) + *offset += 2; +} + static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500, int latch_offset, u8 latch_val) { @@ -482,9 +505,7 @@ static int ab8500_handle_hierarchical_latch(struct ab8500 *ab8500, latch_bit = __ffs(hier_val); latch_offset = (hier_offset << 3) + latch_bit; - /* Fix inconsistent ITFromLatch25 bit mapping... */ - if (unlikely(latch_offset == 17)) - latch_offset = 24; + update_latch_offset(&latch_offset, hier_offset); status = get_register_interruptible(ab8500, AB8500_INTERRUPT, @@ -512,7 +533,7 @@ static irqreturn_t ab8500_hierarchical_irq(int irq, void *dev) dev_vdbg(ab8500->dev, "interrupt\n"); /* Hierarchical interrupt version */ - for (i = 0; i < AB8500_IT_LATCHHIER_NUM; i++) { + for (i = 0; i < (ab8500->it_latchhier_num); i++) { int status; u8 hier_val; @@ -565,6 +586,9 @@ static irqreturn_t ab8500_irq(int irq, void *dev) if (regoffset == 11 && is_ab8500_1p1_or_earlier(ab8500)) continue; + if (regoffset < 0) + continue; + status = get_register_interruptible(ab8500, AB8500_INTERRUPT, AB8500_IT_LATCH1_REG + regoffset, &value); if (status < 0 || value == 0) @@ -615,7 +639,9 @@ static int ab8500_irq_init(struct ab8500 *ab8500, struct device_node *np) { int num_irqs; - if (is_ab9540(ab8500)) + if (is_ab8540(ab8500)) + num_irqs = AB8540_NR_IRQS; + else if (is_ab9540(ab8500)) num_irqs = AB9540_NR_IRQS; else if (is_ab8505(ab8500)) num_irqs = AB8505_NR_IRQS; @@ -1552,13 +1578,20 @@ static int ab8500_probe(struct platform_device *pdev) ab8500->chip_id >> 4, ab8500->chip_id & 0x0F); - /* Configure AB8500 or AB9540 IRQ */ - if (is_ab9540(ab8500) || is_ab8505(ab8500)) { + /* Configure AB8540 */ + if (is_ab8540(ab8500)) { + ab8500->mask_size = AB8540_NUM_IRQ_REGS; + ab8500->irq_reg_offset = ab8540_irq_regoffset; + ab8500->it_latchhier_num = AB8540_IT_LATCHHIER_NUM; + }/* Configure AB8500 or AB9540 IRQ */ + else if (is_ab9540(ab8500) || is_ab8505(ab8500)) { ab8500->mask_size = AB9540_NUM_IRQ_REGS; ab8500->irq_reg_offset = ab9540_irq_regoffset; + ab8500->it_latchhier_num = AB8500_IT_LATCHHIER_NUM; } else { ab8500->mask_size = AB8500_NUM_IRQ_REGS; ab8500->irq_reg_offset = ab8500_irq_regoffset; + ab8500->it_latchhier_num = AB8500_IT_LATCHHIER_NUM; } ab8500->mask = devm_kzalloc(&pdev->dev, ab8500->mask_size, GFP_KERNEL); if (!ab8500->mask) @@ -1620,6 +1653,9 @@ static int ab8500_probe(struct platform_device *pdev) is_ab8500_1p1_or_earlier(ab8500)) continue; + if (ab8500->irq_reg_offset[i] < 0) + continue; + get_register_interruptible(ab8500, AB8500_INTERRUPT, AB8500_IT_LATCH1_REG + ab8500->irq_reg_offset[i], &value); diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index fdd8be64feeb..b5780fd40fe4 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -362,6 +362,7 @@ struct ab8500 { u8 *oldmask; int mask_size; const int *irq_reg_offset; + int it_latchhier_num; }; struct regulator_reg_init; -- cgit From 75932094601b404fc9ef28f7b6c0aa83dd619af0 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 12 Feb 2013 15:11:19 +0000 Subject: mfd: ab8500-sysctrl: Add new reset function Add a new reset function which uses the AB WD with 0 timeout. Signed-off-by: Lee Jones Acked-by: Samuel Ortiz --- drivers/mfd/ab8500-sysctrl.c | 63 +++++++++++++++++++++++++++++++ include/linux/mfd/abx500/ab8500-sysctrl.h | 6 +++ 2 files changed, 69 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c index 6ac63a05893c..f43c42b9f32c 100644 --- a/drivers/mfd/ab8500-sysctrl.c +++ b/drivers/mfd/ab8500-sysctrl.c @@ -15,6 +15,12 @@ #include #include +/* RtcCtrl bits */ +#define AB8500_ALARM_MIN_LOW 0x08 +#define AB8500_ALARM_MIN_MID 0x09 +#define RTC_CTRL 0x0B +#define RTC_ALARM_ENABLE 0x4 + static struct device *sysctrl_dev; void ab8500_power_off(void) @@ -79,6 +85,63 @@ shutdown: } } +/* + * Use the AB WD to reset the platform. It will perform a hard + * reset instead of a soft reset. Write the reset reason to + * the AB before reset, which can be read upon restart. + */ +void ab8500_restart(char mode, const char *cmd) +{ + struct ab8500_platform_data *plat; + struct ab8500_sysctrl_platform_data *pdata; + u16 reason = 0; + u8 val; + + if (sysctrl_dev == NULL) { + pr_err("%s: sysctrl not initialized\n", __func__); + return; + } + + plat = dev_get_platdata(sysctrl_dev->parent); + pdata = plat->sysctrl; + if (pdata->reboot_reason_code) + reason = pdata->reboot_reason_code(cmd); + else + pr_warn("[%s] No reboot reason set. Default reason %d\n", + __func__, reason); + + /* + * Disable RTC alarm, just a precaution so that no alarm + * is running when WD reset is executed. + */ + abx500_get_register_interruptible(sysctrl_dev, AB8500_RTC, + RTC_CTRL , &val); + abx500_set_register_interruptible(sysctrl_dev, AB8500_RTC, + RTC_CTRL , (val & ~RTC_ALARM_ENABLE)); + + /* + * Android is not using the RTC alarm registers during reboot + * so we borrow them for writing the reason of reset + */ + + /* reason[8 LSB] */ + val = reason & 0xFF; + abx500_set_register_interruptible(sysctrl_dev, AB8500_RTC, + AB8500_ALARM_MIN_LOW , val); + + /* reason[8 MSB] */ + val = (reason>>8) & 0xFF; + abx500_set_register_interruptible(sysctrl_dev, AB8500_RTC, + AB8500_ALARM_MIN_MID , val); + + /* Setting WD timeout to 0 */ + ab8500_sysctrl_write(AB8500_MAINWDOGTIMER, 0xFF, 0x0); + + /* Setting the parameters to AB8500 WD*/ + ab8500_sysctrl_write(AB8500_MAINWDOGCTRL, 0xFF, (AB8500_ENABLE_WD | + AB8500_WD_RESTART_ON_EXPIRE | AB8500_KICK_WD)); +} + static inline bool valid_bank(u8 bank) { return ((bank == AB8500_SYS_CTRL1_BLOCK) || diff --git a/include/linux/mfd/abx500/ab8500-sysctrl.h b/include/linux/mfd/abx500/ab8500-sysctrl.h index ebf12e793db9..990bc93f46e1 100644 --- a/include/linux/mfd/abx500/ab8500-sysctrl.h +++ b/include/linux/mfd/abx500/ab8500-sysctrl.h @@ -12,6 +12,7 @@ int ab8500_sysctrl_read(u16 reg, u8 *value); int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value); +void ab8500_restart(char mode, const char *cmd); #else @@ -40,6 +41,7 @@ static inline int ab8500_sysctrl_clear(u16 reg, u8 bits) /* Configuration data for SysClkReq1RfClkBuf - SysClkReq8RfClkBuf */ struct ab8500_sysctrl_platform_data { u8 initial_req_buf_config[8]; + u16 (*reboot_reason_code)(const char *cmd); }; /* Registers */ @@ -299,4 +301,8 @@ struct ab8500_sysctrl_platform_data { #define AB9540_SYSCLK12BUF4VALID_SYSCLK12BUF4VALID_MASK 0xFF #define AB9540_SYSCLK12BUF4VALID_SYSCLK12BUF4VALID_SHIFT 0 +#define AB8500_ENABLE_WD 0x1 +#define AB8500_KICK_WD 0x2 +#define AB8500_WD_RESTART_ON_EXPIRE 0x10 + #endif /* __AB8500_SYSCTRL_H */ -- cgit From e4bffe8d8ad9856143b6e941a17870aee37413d7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 11 Feb 2013 10:38:00 +0000 Subject: mfd: ab8500-gpadc: Add support for the AB8540 This patch enables the GPADC to work on AB8540 based platforms. Signed-off-by: Lee Jones Acked-by: Samuel Ortiz --- drivers/mfd/ab8500-gpadc.c | 316 ++++++++++++++++++++++++++++---- include/linux/mfd/abx500/ab8500-gpadc.h | 43 +++-- 2 files changed, 305 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index fc8da4496e84..c985b90577f6 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -37,6 +37,13 @@ #define AB8500_GPADC_AUTODATAL_REG 0x07 #define AB8500_GPADC_AUTODATAH_REG 0x08 #define AB8500_GPADC_MUX_CTRL_REG 0x09 +#define AB8540_GPADC_MANDATA2L_REG 0x09 +#define AB8540_GPADC_MANDATA2H_REG 0x0A +#define AB8540_GPADC_APEAAX_REG 0x10 +#define AB8540_GPADC_APEAAT_REG 0x11 +#define AB8540_GPADC_APEAAM_REG 0x12 +#define AB8540_GPADC_APEAAH_REG 0x13 +#define AB8540_GPADC_APEAAL_REG 0x14 /* * OTP register offsets @@ -49,6 +56,10 @@ #define AB8500_GPADC_CAL_5 0x13 #define AB8500_GPADC_CAL_6 0x14 #define AB8500_GPADC_CAL_7 0x15 +/* New calibration for 8540 */ +#define AB8540_GPADC_OTP4_REG_7 0x38 +#define AB8540_GPADC_OTP4_REG_6 0x39 +#define AB8540_GPADC_OTP4_REG_5 0x3A /* gpadc constants */ #define EN_VINTCORE12 0x04 @@ -67,6 +78,7 @@ #define GPADC_BUSY 0x01 #define EN_FALLING 0x10 #define EN_TRIG_EDGE 0x02 +#define EN_VBIAS_XTAL_TEMP 0x02 /* GPADC constants from AB8500 spec, UM0836 */ #define ADC_RESOLUTION 1024 @@ -85,8 +97,21 @@ #define ADC_CH_BKBAT_MIN 0 #define ADC_CH_BKBAT_MAX 3200 +/* GPADC constants from AB8540 spec */ +#define ADC_CH_IBAT_MIN (-6000) /* mA range measured by ADC for ibat*/ +#define ADC_CH_IBAT_MAX 6000 +#define ADC_CH_IBAT_MIN_V (-60) /* mV range measured by ADC for ibat*/ +#define ADC_CH_IBAT_MAX_V 60 +#define IBAT_VDROP_L (-56) /* mV */ +#define IBAT_VDROP_H 56 + /* This is used to not lose precision when dividing to get gain and offset */ -#define CALIB_SCALE 1000 +#define CALIB_SCALE 1000 +/* + * Number of bits shift used to not lose precision + * when dividing to get ibat gain. + */ +#define CALIB_SHIFT_IBAT 20 /* Time in ms before disabling regulator */ #define GPADC_AUDOSUSPEND_DELAY 1 @@ -97,6 +122,7 @@ enum cal_channels { ADC_INPUT_VMAIN = 0, ADC_INPUT_BTEMP, ADC_INPUT_VBAT, + ADC_INPUT_IBAT, NBR_CAL_INPUTS, }; @@ -107,8 +133,8 @@ enum cal_channels { * @offset: Offset of the ADC channel */ struct adc_cal_data { - u64 gain; - u64 offset; + s64 gain; + s64 offset; }; /** @@ -180,6 +206,7 @@ int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 channel, gpadc->cal_data[ADC_INPUT_VMAIN].offset) / CALIB_SCALE; break; + case XTAL_TEMP: case BAT_CTRL: case BTEMP_BALL: case ACC_DETECT1: @@ -198,6 +225,7 @@ int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 channel, break; case MAIN_BAT_V: + case VBAT_TRUE_MEAS: /* For some reason we don't have calibrated data */ if (!gpadc->cal_data[ADC_INPUT_VBAT].gain) { res = ADC_CH_VBAT_MIN + (ADC_CH_VBAT_MAX - @@ -241,6 +269,20 @@ int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 channel, ADC_RESOLUTION; break; + case IBAT_VIRTUAL_CHANNEL: + /* For some reason we don't have calibrated data */ + if (!gpadc->cal_data[ADC_INPUT_IBAT].gain) { + res = ADC_CH_IBAT_MIN + (ADC_CH_IBAT_MAX - + ADC_CH_IBAT_MIN) * ad_value / + ADC_RESOLUTION; + break; + } + /* Here we can use the calibrated data */ + res = (int) (ad_value * gpadc->cal_data[ADC_INPUT_IBAT].gain + + gpadc->cal_data[ADC_INPUT_IBAT].offset) + >> CALIB_SHIFT_IBAT; + break; + default: dev_err(gpadc->dev, "unknown channel, not possible to convert\n"); @@ -303,10 +345,20 @@ EXPORT_SYMBOL(ab8500_gpadc_convert); */ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type) +{ + int raw_data; + raw_data = ab8500_gpadc_double_read_raw(gpadc, channel, + avg_sample, trig_edge, trig_timer, conv_type, NULL); + return raw_data; +} + +int ab8500_gpadc_double_read_raw(struct ab8500_gpadc *gpadc, u8 channel, + u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type, + int *ibat) { int ret; int looplimit = 0; - u8 val, low_data, high_data; + u8 val, low_data, high_data, low_data2, high_data2; if (!gpadc) return -ENODEV; @@ -359,7 +411,6 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, default: val = channel | AVG_16; break; - } if (conv_type == ADC_HW) @@ -383,8 +434,8 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, ret = abx500_mask_and_set_register_interruptible(gpadc->dev, AB8500_GPADC, AB8500_GPADC_CTRL1_REG, EN_FALLING, EN_FALLING); - } + switch (channel) { case MAIN_CHARGER_C: case USB_CHARGER_C: @@ -401,6 +452,55 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, EN_BUF | EN_ICHAR, EN_BUF | EN_ICHAR); break; + + case XTAL_TEMP: + if (conv_type == ADC_HW) + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF | EN_TRIG_EDGE, + EN_BUF | EN_TRIG_EDGE); + else + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF , + EN_BUF); + break; + + case VBAT_TRUE_MEAS: + if (conv_type == ADC_HW) + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF | EN_TRIG_EDGE, + EN_BUF | EN_TRIG_EDGE); + else + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF , + EN_BUF); + break; + + case BAT_CTRL_AND_IBAT: + case VBAT_MEAS_AND_IBAT: + case VBAT_TRUE_MEAS_AND_IBAT: + case BAT_TEMP_AND_IBAT: + if (conv_type == ADC_HW) + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_TRIG_EDGE, + EN_TRIG_EDGE); + else + ret = abx500_mask_and_set_register_interruptible( + gpadc->dev, + AB8500_GPADC, AB8500_GPADC_CTRL1_REG, + EN_BUF, + 0); + break; + case BTEMP_BALL: if (!is_ab8500_2p0_or_earlier(gpadc->parent)) { if (conv_type == ADC_HW) @@ -471,21 +571,19 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, /* wait for completion of conversion */ if (conv_type == ADC_HW) { if (!wait_for_completion_timeout(&gpadc->ab8500_gpadc_complete, - 2*HZ)) { - dev_err(gpadc->dev, - "timeout didn't receive" - " hw GPADC conv interrupt\n"); - ret = -EINVAL; - goto out; + 2 * HZ)) { + dev_err(gpadc->dev, + "timeout didn't receive hw GPADC conv interrupt\n"); + ret = -EINVAL; + goto out; } } else { if (!wait_for_completion_timeout(&gpadc->ab8500_gpadc_complete, - msecs_to_jiffies(CONVERSION_TIME))) { - dev_err(gpadc->dev, - "timeout didn't receive" - " sw GPADC conv interrupt\n"); - ret = -EINVAL; - goto out; + msecs_to_jiffies(CONVERSION_TIME))) { + dev_err(gpadc->dev, + "timeout didn't receive sw GPADC conv interrupt\n"); + ret = -EINVAL; + goto out; } } @@ -523,6 +621,46 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, goto out; } } + /* Check if double convertion is required */ + if ((channel == BAT_CTRL_AND_IBAT) || + (channel == VBAT_MEAS_AND_IBAT) || + (channel == VBAT_TRUE_MEAS_AND_IBAT) || + (channel == BAT_TEMP_AND_IBAT)) { + + if (conv_type == ADC_HW) { + /* not supported */ + ret = -ENOTSUPP; + dev_err(gpadc->dev, + "gpadc_conversion: only SW double conversion supported\n"); + goto out; + } else { + /* Read the converted RAW data 2 */ + ret = abx500_get_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8540_GPADC_MANDATA2L_REG, + &low_data2); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: read sw low data 2 failed\n"); + goto out; + } + + ret = abx500_get_register_interruptible(gpadc->dev, + AB8500_GPADC, AB8540_GPADC_MANDATA2H_REG, + &high_data2); + if (ret < 0) { + dev_err(gpadc->dev, + "gpadc_conversion: read sw high data 2 failed\n"); + goto out; + } + if (ibat != NULL) { + *ibat = (high_data2 << 8) | low_data2; + } else { + dev_warn(gpadc->dev, + "gpadc_conversion: ibat not stored\n"); + } + + } + } /* Disable GPADC */ ret = abx500_set_register_interruptible(gpadc->dev, AB8500_GPADC, @@ -586,15 +724,27 @@ static int otp_cal_regs[] = { AB8500_GPADC_CAL_7, }; +static int otp4_cal_regs[] = { + AB8540_GPADC_OTP4_REG_7, + AB8540_GPADC_OTP4_REG_6, + AB8540_GPADC_OTP4_REG_5, +}; + static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) { int i; int ret[ARRAY_SIZE(otp_cal_regs)]; u8 gpadc_cal[ARRAY_SIZE(otp_cal_regs)]; - + int ret_otp4[ARRAY_SIZE(otp4_cal_regs)]; + u8 gpadc_otp4[ARRAY_SIZE(otp4_cal_regs)]; int vmain_high, vmain_low; int btemp_high, btemp_low; int vbat_high, vbat_low; + int ibat_high, ibat_low; + s64 V_gain, V_offset, V2A_gain, V2A_offset; + struct ab8500 *ab8500; + + ab8500 = gpadc->parent; /* First we read all OTP registers and store the error code */ for (i = 0; i < ARRAY_SIZE(otp_cal_regs); i++) { @@ -614,7 +764,7 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) * bt_h/l = btemp_high/low * vb_h/l = vbat_high/low * - * Data bits: + * Data bits 8500/9540: * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 * |.......|.......|.......|.......|.......|.......|.......|....... * | | vm_h9 | vm_h8 @@ -632,6 +782,35 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) * | vb_l5 | vb_l4 | vb_l3 | vb_l2 | vb_l1 | vb_l0 | * |.......|.......|.......|.......|.......|.......|.......|....... * + * Data bits 8540: + * OTP2 + * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | + * |.......|.......|.......|.......|.......|.......|.......|....... + * | vm_h9 | vm_h8 | vm_h7 | vm_h6 | vm_h5 | vm_h4 | vm_h3 | vm_h2 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | vm_h1 | vm_h0 | vm_l4 | vm_l3 | vm_l2 | vm_l1 | vm_l0 | bt_h9 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | bt_h8 | bt_h7 | bt_h6 | bt_h5 | bt_h4 | bt_h3 | bt_h2 | bt_h1 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | bt_h0 | bt_l4 | bt_l3 | bt_l2 | bt_l1 | bt_l0 | vb_h9 | vb_h8 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | vb_h7 | vb_h6 | vb_h5 | vb_h4 | vb_h3 | vb_h2 | vb_h1 | vb_h0 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | vb_l5 | vb_l4 | vb_l3 | vb_l2 | vb_l1 | vb_l0 | + * |.......|.......|.......|.......|.......|.......|.......|....... + * + * Data bits 8540: + * OTP4 + * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | | ib_h9 | ib_h8 | ib_h7 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | ib_h6 | ib_h5 | ib_h4 | ib_h3 | ib_h2 | ib_h1 | ib_h0 | ib_l5 + * |.......|.......|.......|.......|.......|.......|.......|....... + * | ib_l4 | ib_l3 | ib_l2 | ib_l1 | ib_l0 | + * * * Ideal output ADC codes corresponding to injected input voltages * during manufacturing is: @@ -644,38 +823,96 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) * vbat_low: Vin = 2380mV / ADC ideal code = 33 */ - /* Calculate gain and offset for VMAIN if all reads succeeded */ - if (!(ret[0] < 0 || ret[1] < 0 || ret[2] < 0)) { - vmain_high = (((gpadc_cal[0] & 0x03) << 8) | - ((gpadc_cal[1] & 0x3F) << 2) | - ((gpadc_cal[2] & 0xC0) >> 6)); + if (is_ab8540(ab8500)) { + /* Calculate gain and offset for VMAIN if all reads succeeded*/ + if (!(ret[1] < 0 || ret[2] < 0)) { + vmain_high = (((gpadc_cal[1] & 0xFF) << 2) | + ((gpadc_cal[2] & 0xC0) >> 6)); + vmain_low = ((gpadc_cal[2] & 0x3E) >> 1); + gpadc->cal_data[ADC_INPUT_VMAIN].gain = CALIB_SCALE * + (19500 - 315) / (vmain_high - vmain_low); + gpadc->cal_data[ADC_INPUT_VMAIN].offset = CALIB_SCALE * + 19500 - (CALIB_SCALE * (19500 - 315) / + (vmain_high - vmain_low)) * vmain_high; + } else { + gpadc->cal_data[ADC_INPUT_VMAIN].gain = 0; + } - vmain_low = ((gpadc_cal[2] & 0x3E) >> 1); + /* Read IBAT calibration Data */ + for (i = 0; i < ARRAY_SIZE(otp4_cal_regs); i++) { + ret_otp4[i] = abx500_get_register_interruptible( + gpadc->dev, AB8500_OTP_EMUL, + otp4_cal_regs[i], &gpadc_otp4[i]); + if (ret_otp4[i] < 0) + dev_err(gpadc->dev, + "%s: read otp4 reg 0x%02x failed\n", + __func__, otp4_cal_regs[i]); + } - gpadc->cal_data[ADC_INPUT_VMAIN].gain = CALIB_SCALE * - (19500 - 315) / (vmain_high - vmain_low); + /* Calculate gain and offset for IBAT if all reads succeeded */ + if (!(ret_otp4[0] < 0 || ret_otp4[1] < 0 || ret_otp4[2] < 0)) { + ibat_high = (((gpadc_otp4[0] & 0x07) << 7) | + ((gpadc_otp4[1] & 0xFE) >> 1)); + ibat_low = (((gpadc_otp4[1] & 0x01) << 5) | + ((gpadc_otp4[2] & 0xF8) >> 3)); + + V_gain = ((IBAT_VDROP_H - IBAT_VDROP_L) + << CALIB_SHIFT_IBAT) / (ibat_high - ibat_low); + + V_offset = (IBAT_VDROP_H << CALIB_SHIFT_IBAT) - + (((IBAT_VDROP_H - IBAT_VDROP_L) << + CALIB_SHIFT_IBAT) / (ibat_high - ibat_low)) + * ibat_high; + /* + * Result obtained is in mV (at a scale factor), + * we need to calculate gain and offset to get mA + */ + V2A_gain = (ADC_CH_IBAT_MAX - ADC_CH_IBAT_MIN)/ + (ADC_CH_IBAT_MAX_V - ADC_CH_IBAT_MIN_V); + V2A_offset = ((ADC_CH_IBAT_MAX_V * ADC_CH_IBAT_MIN - + ADC_CH_IBAT_MAX * ADC_CH_IBAT_MIN_V) + << CALIB_SHIFT_IBAT) + / (ADC_CH_IBAT_MAX_V - ADC_CH_IBAT_MIN_V); + + gpadc->cal_data[ADC_INPUT_IBAT].gain = V_gain * V2A_gain; + gpadc->cal_data[ADC_INPUT_IBAT].offset = V_offset * + V2A_gain + V2A_offset; + } else { + gpadc->cal_data[ADC_INPUT_IBAT].gain = 0; + } - gpadc->cal_data[ADC_INPUT_VMAIN].offset = CALIB_SCALE * 19500 - - (CALIB_SCALE * (19500 - 315) / - (vmain_high - vmain_low)) * vmain_high; + dev_dbg(gpadc->dev, "IBAT gain %llu offset %llu\n", + gpadc->cal_data[ADC_INPUT_IBAT].gain, + gpadc->cal_data[ADC_INPUT_IBAT].offset); } else { - gpadc->cal_data[ADC_INPUT_VMAIN].gain = 0; + /* Calculate gain and offset for VMAIN if all reads succeeded */ + if (!(ret[0] < 0 || ret[1] < 0 || ret[2] < 0)) { + vmain_high = (((gpadc_cal[0] & 0x03) << 8) | + ((gpadc_cal[1] & 0x3F) << 2) | + ((gpadc_cal[2] & 0xC0) >> 6)); + vmain_low = ((gpadc_cal[2] & 0x3E) >> 1); + + gpadc->cal_data[ADC_INPUT_VMAIN].gain = CALIB_SCALE * + (19500 - 315) / (vmain_high - vmain_low); + + gpadc->cal_data[ADC_INPUT_VMAIN].offset = CALIB_SCALE * + 19500 - (CALIB_SCALE * (19500 - 315) / + (vmain_high - vmain_low)) * vmain_high; + } else { + gpadc->cal_data[ADC_INPUT_VMAIN].gain = 0; + } } - /* Calculate gain and offset for BTEMP if all reads succeeded */ if (!(ret[2] < 0 || ret[3] < 0 || ret[4] < 0)) { btemp_high = (((gpadc_cal[2] & 0x01) << 9) | - (gpadc_cal[3] << 1) | - ((gpadc_cal[4] & 0x80) >> 7)); - + (gpadc_cal[3] << 1) | ((gpadc_cal[4] & 0x80) >> 7)); btemp_low = ((gpadc_cal[4] & 0x7C) >> 2); gpadc->cal_data[ADC_INPUT_BTEMP].gain = CALIB_SCALE * (1300 - 21) / (btemp_high - btemp_low); - gpadc->cal_data[ADC_INPUT_BTEMP].offset = CALIB_SCALE * 1300 - - (CALIB_SCALE * (1300 - 21) / - (btemp_high - btemp_low)) * btemp_high; + (CALIB_SCALE * (1300 - 21) / (btemp_high - btemp_low)) + * btemp_high; } else { gpadc->cal_data[ADC_INPUT_BTEMP].gain = 0; } @@ -687,7 +924,6 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) gpadc->cal_data[ADC_INPUT_VBAT].gain = CALIB_SCALE * (4700 - 2380) / (vbat_high - vbat_low); - gpadc->cal_data[ADC_INPUT_VBAT].offset = CALIB_SCALE * 4700 - (CALIB_SCALE * (4700 - 2380) / (vbat_high - vbat_low)) * vbat_high; diff --git a/include/linux/mfd/abx500/ab8500-gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h index 7694e7ab1880..4131437ace4b 100644 --- a/include/linux/mfd/abx500/ab8500-gpadc.h +++ b/include/linux/mfd/abx500/ab8500-gpadc.h @@ -12,19 +12,32 @@ /* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2 * and ADCHwSel[4:0] in GPADCCtrl3 ) */ -#define BAT_CTRL 0x01 -#define BTEMP_BALL 0x02 -#define MAIN_CHARGER_V 0x03 -#define ACC_DETECT1 0x04 -#define ACC_DETECT2 0x05 -#define ADC_AUX1 0x06 -#define ADC_AUX2 0x07 -#define MAIN_BAT_V 0x08 -#define VBUS_V 0x09 -#define MAIN_CHARGER_C 0x0A -#define USB_CHARGER_C 0x0B -#define BK_BAT_V 0x0C -#define DIE_TEMP 0x0D +#define BAT_CTRL 0x01 +#define BTEMP_BALL 0x02 +#define MAIN_CHARGER_V 0x03 +#define ACC_DETECT1 0x04 +#define ACC_DETECT2 0x05 +#define ADC_AUX1 0x06 +#define ADC_AUX2 0x07 +#define MAIN_BAT_V 0x08 +#define VBUS_V 0x09 +#define MAIN_CHARGER_C 0x0A +#define USB_CHARGER_C 0x0B +#define BK_BAT_V 0x0C +#define DIE_TEMP 0x0D +#define USB_ID 0x0E +#define XTAL_TEMP 0x12 +#define VBAT_TRUE_MEAS 0x13 +#define BAT_CTRL_AND_IBAT 0x1C +#define VBAT_MEAS_AND_IBAT 0x1D +#define VBAT_TRUE_MEAS_AND_IBAT 0x1E +#define BAT_TEMP_AND_IBAT 0x1F + +/* Virtual channel used only for ibat convertion to ampere + * Battery current conversion (ibat) cannot be requested as a single conversion + * but it is always in combination with other input requests + */ +#define IBAT_VIRTUAL_CHANNEL 0xFF #define SAMPLE_1 1 #define SAMPLE_4 4 @@ -37,7 +50,6 @@ #define ADC_SW 0 #define ADC_HW 1 - struct ab8500_gpadc; struct ab8500_gpadc *ab8500_gpadc_get(char *name); @@ -51,6 +63,9 @@ static inline int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel) int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type); +int ab8500_gpadc_double_read_raw(struct ab8500_gpadc *gpadc, u8 channel, + u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type, + int *ibat); int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 channel, int ad_value); -- cgit From bc6b4132bcae4b8e59766ba2dae8f377009b26d0 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Feb 2013 14:02:31 +0000 Subject: mfd: ab8500-debug: Add support for the AB8540 Allow GPADC debug information to be shown when executing on an AB8540 based platform. Signed-off-by: Alexandre Bourdiol Reviewed-by: Marcus COOPER Reviewed-by: Philippe LANGLAIS Acked-by: Samuel Ortiz --- drivers/mfd/ab8500-debugfs.c | 286 +++++++++++++++++++++++++++++++- drivers/mfd/ab8500-gpadc.c | 44 +++++ include/linux/mfd/abx500/ab8500-gpadc.h | 3 + 3 files changed, 332 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index 074eea9e4bfd..1e44d65e1771 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1633,6 +1633,254 @@ static const struct file_operations ab8500_gpadc_die_temp_fops = { .owner = THIS_MODULE, }; +static int ab8540_gpadc_xtal_temp_print(struct seq_file *s, void *p) +{ + int xtal_temp_raw; + int xtal_temp_convert; + struct ab8500_gpadc *gpadc; + + gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); + xtal_temp_raw = ab8500_gpadc_read_raw(gpadc, XTAL_TEMP, + avg_sample, trig_edge, trig_timer, conv_type); + xtal_temp_convert = ab8500_gpadc_ad_to_voltage(gpadc, XTAL_TEMP, + xtal_temp_raw); + + return seq_printf(s, "%d,0x%X\n", + xtal_temp_convert, xtal_temp_raw); +} + +static int ab8540_gpadc_xtal_temp_open(struct inode *inode, struct file *file) +{ + return single_open(file, ab8540_gpadc_xtal_temp_print, + inode->i_private); +} + +static const struct file_operations ab8540_gpadc_xtal_temp_fops = { + .open = ab8540_gpadc_xtal_temp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8540_gpadc_vbat_true_meas_print(struct seq_file *s, void *p) +{ + int vbat_true_meas_raw; + int vbat_true_meas_convert; + struct ab8500_gpadc *gpadc; + + gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); + vbat_true_meas_raw = ab8500_gpadc_read_raw(gpadc, VBAT_TRUE_MEAS, + avg_sample, trig_edge, trig_timer, conv_type); + vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS, + vbat_true_meas_raw); + + return seq_printf(s, "%d,0x%X\n", + vbat_true_meas_convert, vbat_true_meas_raw); +} + +static int ab8540_gpadc_vbat_true_meas_open(struct inode *inode, + struct file *file) +{ + return single_open(file, ab8540_gpadc_vbat_true_meas_print, + inode->i_private); +} + +static const struct file_operations ab8540_gpadc_vbat_true_meas_fops = { + .open = ab8540_gpadc_vbat_true_meas_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8540_gpadc_bat_ctrl_and_ibat_print(struct seq_file *s, void *p) +{ + int bat_ctrl_raw; + int bat_ctrl_convert; + int ibat_raw; + int ibat_convert; + struct ab8500_gpadc *gpadc; + + gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); + bat_ctrl_raw = ab8500_gpadc_double_read_raw(gpadc, BAT_CTRL_AND_IBAT, + avg_sample, trig_edge, trig_timer, conv_type, &ibat_raw); + + bat_ctrl_convert = ab8500_gpadc_ad_to_voltage(gpadc, BAT_CTRL, + bat_ctrl_raw); + ibat_convert = ab8500_gpadc_ad_to_voltage(gpadc, IBAT_VIRTUAL_CHANNEL, + ibat_raw); + + return seq_printf(s, "%d,0x%X\n" "%d,0x%X\n", + bat_ctrl_convert, bat_ctrl_raw, + ibat_convert, ibat_raw); +} + +static int ab8540_gpadc_bat_ctrl_and_ibat_open(struct inode *inode, + struct file *file) +{ + return single_open(file, ab8540_gpadc_bat_ctrl_and_ibat_print, + inode->i_private); +} + +static const struct file_operations ab8540_gpadc_bat_ctrl_and_ibat_fops = { + .open = ab8540_gpadc_bat_ctrl_and_ibat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8540_gpadc_vbat_meas_and_ibat_print(struct seq_file *s, void *p) +{ + int vbat_meas_raw; + int vbat_meas_convert; + int ibat_raw; + int ibat_convert; + struct ab8500_gpadc *gpadc; + + gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); + vbat_meas_raw = ab8500_gpadc_double_read_raw(gpadc, VBAT_MEAS_AND_IBAT, + avg_sample, trig_edge, trig_timer, conv_type, &ibat_raw); + vbat_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, MAIN_BAT_V, + vbat_meas_raw); + ibat_convert = ab8500_gpadc_ad_to_voltage(gpadc, IBAT_VIRTUAL_CHANNEL, + ibat_raw); + + return seq_printf(s, "%d,0x%X\n" "%d,0x%X\n", + vbat_meas_convert, vbat_meas_raw, + ibat_convert, ibat_raw); +} + +static int ab8540_gpadc_vbat_meas_and_ibat_open(struct inode *inode, + struct file *file) +{ + return single_open(file, ab8540_gpadc_vbat_meas_and_ibat_print, + inode->i_private); +} + +static const struct file_operations ab8540_gpadc_vbat_meas_and_ibat_fops = { + .open = ab8540_gpadc_vbat_meas_and_ibat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, void *p) +{ + int vbat_true_meas_raw; + int vbat_true_meas_convert; + int ibat_raw; + int ibat_convert; + struct ab8500_gpadc *gpadc; + + gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); + vbat_true_meas_raw = ab8500_gpadc_double_read_raw(gpadc, + VBAT_TRUE_MEAS_AND_IBAT, avg_sample, trig_edge, + trig_timer, conv_type, &ibat_raw); + vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, + VBAT_TRUE_MEAS, vbat_true_meas_raw); + ibat_convert = ab8500_gpadc_ad_to_voltage(gpadc, IBAT_VIRTUAL_CHANNEL, + ibat_raw); + + return seq_printf(s, "%d,0x%X\n" "%d,0x%X\n", + vbat_true_meas_convert, vbat_true_meas_raw, + ibat_convert, ibat_raw); +} + +static int ab8540_gpadc_vbat_true_meas_and_ibat_open(struct inode *inode, + struct file *file) +{ + return single_open(file, ab8540_gpadc_vbat_true_meas_and_ibat_print, + inode->i_private); +} + +static const struct file_operations ab8540_gpadc_vbat_true_meas_and_ibat_fops = { + .open = ab8540_gpadc_vbat_true_meas_and_ibat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8540_gpadc_bat_temp_and_ibat_print(struct seq_file *s, void *p) +{ + int bat_temp_raw; + int bat_temp_convert; + int ibat_raw; + int ibat_convert; + struct ab8500_gpadc *gpadc; + + gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); + bat_temp_raw = ab8500_gpadc_double_read_raw(gpadc, BAT_TEMP_AND_IBAT, + avg_sample, trig_edge, trig_timer, conv_type, &ibat_raw); + bat_temp_convert = ab8500_gpadc_ad_to_voltage(gpadc, BTEMP_BALL, + bat_temp_raw); + ibat_convert = ab8500_gpadc_ad_to_voltage(gpadc, IBAT_VIRTUAL_CHANNEL, + ibat_raw); + + return seq_printf(s, "%d,0x%X\n" "%d,0x%X\n", + bat_temp_convert, bat_temp_raw, + ibat_convert, ibat_raw); +} + +static int ab8540_gpadc_bat_temp_and_ibat_open(struct inode *inode, + struct file *file) +{ + return single_open(file, ab8540_gpadc_bat_temp_and_ibat_print, + inode->i_private); +} + +static const struct file_operations ab8540_gpadc_bat_temp_and_ibat_fops = { + .open = ab8540_gpadc_bat_temp_and_ibat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int ab8540_gpadc_otp_cal_print(struct seq_file *s, void *p) +{ + struct ab8500_gpadc *gpadc; + u16 vmain_l, vmain_h, btemp_l, btemp_h; + u16 vbat_l, vbat_h, ibat_l, ibat_h; + + gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); + ab8540_gpadc_get_otp(gpadc, &vmain_l, &vmain_h, &btemp_l, &btemp_h, + &vbat_l, &vbat_h, &ibat_l, &ibat_h); + return seq_printf(s, "VMAIN_L:0x%X\n" + "VMAIN_H:0x%X\n" + "BTEMP_L:0x%X\n" + "BTEMP_H:0x%X\n" + "VBAT_L:0x%X\n" + "VBAT_H:0x%X\n" + "IBAT_L:0x%X\n" + "IBAT_H:0x%X\n" + , + vmain_l, + vmain_h, + btemp_l, + btemp_h, + vbat_l, + vbat_h, + ibat_l, + ibat_h); +} + +static int ab8540_gpadc_otp_cal_open(struct inode *inode, struct file *file) +{ + return single_open(file, ab8540_gpadc_otp_cal_print, inode->i_private); +} + +static const struct file_operations ab8540_gpadc_otp_calib_fops = { + .open = ab8540_gpadc_otp_cal_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + static int ab8500_gpadc_avg_sample_print(struct seq_file *s, void *p) { return seq_printf(s, "%d\n", avg_sample); @@ -2386,7 +2634,43 @@ static int ab8500_debug_probe(struct platform_device *plf) ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_die_temp_fops); if (!file) goto err; - + if (is_ab8540(ab8500)) { + file = debugfs_create_file("xtal_temp", (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_xtal_temp_fops); + if (!file) + goto err; + file = debugfs_create_file("vbattruemeas", (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, + &ab8540_gpadc_vbat_true_meas_fops); + if (!file) + goto err; + file = debugfs_create_file("batctrl_and_ibat", + (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, + &plf->dev, &ab8540_gpadc_bat_ctrl_and_ibat_fops); + if (!file) + goto err; + file = debugfs_create_file("vbatmeas_and_ibat", + (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, + &plf->dev, + &ab8540_gpadc_vbat_meas_and_ibat_fops); + if (!file) + goto err; + file = debugfs_create_file("vbattruemeas_and_ibat", + (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, + &plf->dev, + &ab8540_gpadc_vbat_true_meas_and_ibat_fops); + if (!file) + goto err; + file = debugfs_create_file("battemp_and_ibat", + (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, + &plf->dev, &ab8540_gpadc_bat_temp_and_ibat_fops); + if (!file) + goto err; + file = debugfs_create_file("otp_calib", (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_otp_calib_fops); + if (!file) + goto err; + } file = debugfs_create_file("avg_sample", (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_avg_sample_fops); if (!file) diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index c985b90577f6..e3535c74d5fe 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -135,6 +135,8 @@ enum cal_channels { struct adc_cal_data { s64 gain; s64 offset; + u16 otp_calib_hi; + u16 otp_calib_lo; }; /** @@ -829,6 +831,12 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) vmain_high = (((gpadc_cal[1] & 0xFF) << 2) | ((gpadc_cal[2] & 0xC0) >> 6)); vmain_low = ((gpadc_cal[2] & 0x3E) >> 1); + + gpadc->cal_data[ADC_INPUT_VMAIN].otp_calib_hi = + (u16)vmain_high; + gpadc->cal_data[ADC_INPUT_VMAIN].otp_calib_lo = + (u16)vmain_low; + gpadc->cal_data[ADC_INPUT_VMAIN].gain = CALIB_SCALE * (19500 - 315) / (vmain_high - vmain_low); gpadc->cal_data[ADC_INPUT_VMAIN].offset = CALIB_SCALE * @@ -856,6 +864,11 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) ibat_low = (((gpadc_otp4[1] & 0x01) << 5) | ((gpadc_otp4[2] & 0xF8) >> 3)); + gpadc->cal_data[ADC_INPUT_IBAT].otp_calib_hi = + (u16)ibat_high; + gpadc->cal_data[ADC_INPUT_IBAT].otp_calib_lo = + (u16)ibat_low; + V_gain = ((IBAT_VDROP_H - IBAT_VDROP_L) << CALIB_SHIFT_IBAT) / (ibat_high - ibat_low); @@ -892,6 +905,11 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) ((gpadc_cal[2] & 0xC0) >> 6)); vmain_low = ((gpadc_cal[2] & 0x3E) >> 1); + gpadc->cal_data[ADC_INPUT_VMAIN].otp_calib_hi = + (u16)vmain_high; + gpadc->cal_data[ADC_INPUT_VMAIN].otp_calib_lo = + (u16)vmain_low; + gpadc->cal_data[ADC_INPUT_VMAIN].gain = CALIB_SCALE * (19500 - 315) / (vmain_high - vmain_low); @@ -902,12 +920,16 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) gpadc->cal_data[ADC_INPUT_VMAIN].gain = 0; } } + /* Calculate gain and offset for BTEMP if all reads succeeded */ if (!(ret[2] < 0 || ret[3] < 0 || ret[4] < 0)) { btemp_high = (((gpadc_cal[2] & 0x01) << 9) | (gpadc_cal[3] << 1) | ((gpadc_cal[4] & 0x80) >> 7)); btemp_low = ((gpadc_cal[4] & 0x7C) >> 2); + gpadc->cal_data[ADC_INPUT_BTEMP].otp_calib_hi = (u16)btemp_high; + gpadc->cal_data[ADC_INPUT_BTEMP].otp_calib_lo = (u16)btemp_low; + gpadc->cal_data[ADC_INPUT_BTEMP].gain = CALIB_SCALE * (1300 - 21) / (btemp_high - btemp_low); gpadc->cal_data[ADC_INPUT_BTEMP].offset = CALIB_SCALE * 1300 - @@ -922,6 +944,9 @@ static void ab8500_gpadc_read_calibration_data(struct ab8500_gpadc *gpadc) vbat_high = (((gpadc_cal[4] & 0x03) << 8) | gpadc_cal[5]); vbat_low = ((gpadc_cal[6] & 0xFC) >> 2); + gpadc->cal_data[ADC_INPUT_VBAT].otp_calib_hi = (u16)vbat_high; + gpadc->cal_data[ADC_INPUT_VBAT].otp_calib_lo = (u16)vbat_low; + gpadc->cal_data[ADC_INPUT_VBAT].gain = CALIB_SCALE * (4700 - 2380) / (vbat_high - vbat_low); gpadc->cal_data[ADC_INPUT_VBAT].offset = CALIB_SCALE * 4700 - @@ -1131,6 +1156,25 @@ static void __exit ab8500_gpadc_exit(void) platform_driver_unregister(&ab8500_gpadc_driver); } +/** + * ab8540_gpadc_get_otp() - returns OTP values + * + */ +void ab8540_gpadc_get_otp(struct ab8500_gpadc *gpadc, + u16 *vmain_l, u16 *vmain_h, u16 *btemp_l, u16 *btemp_h, + u16 *vbat_l, u16 *vbat_h, u16 *ibat_l, u16 *ibat_h) +{ + *vmain_l = gpadc->cal_data[ADC_INPUT_VMAIN].otp_calib_lo; + *vmain_h = gpadc->cal_data[ADC_INPUT_VMAIN].otp_calib_hi; + *btemp_l = gpadc->cal_data[ADC_INPUT_BTEMP].otp_calib_lo; + *btemp_h = gpadc->cal_data[ADC_INPUT_BTEMP].otp_calib_hi; + *vbat_l = gpadc->cal_data[ADC_INPUT_VBAT].otp_calib_lo; + *vbat_h = gpadc->cal_data[ADC_INPUT_VBAT].otp_calib_hi; + *ibat_l = gpadc->cal_data[ADC_INPUT_IBAT].otp_calib_lo; + *ibat_h = gpadc->cal_data[ADC_INPUT_IBAT].otp_calib_hi; + return ; +} + subsys_initcall_sync(ab8500_gpadc_init); module_exit(ab8500_gpadc_exit); diff --git a/include/linux/mfd/abx500/ab8500-gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h index 4131437ace4b..49ded001049b 100644 --- a/include/linux/mfd/abx500/ab8500-gpadc.h +++ b/include/linux/mfd/abx500/ab8500-gpadc.h @@ -68,5 +68,8 @@ int ab8500_gpadc_double_read_raw(struct ab8500_gpadc *gpadc, u8 channel, int *ibat); int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 channel, int ad_value); +void ab8540_gpadc_get_otp(struct ab8500_gpadc *gpadc, + u16 *vmain_l, u16 *vmain_h, u16 *btemp_l, u16 *btemp_h, + u16 *vbat_l, u16 *vbat_h, u16 *ibat_l, u16 *ibat_h); #endif /* _AB8500_GPADC_H */ -- cgit From 93ff722e88530b9719cbf53be4f3197722461394 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 31 May 2012 16:16:36 +0200 Subject: ab8500-fg: Add power cut feature for ab8505 and ab8540 Add support for a power cut feature which allows user to configure when ab8505 and ab8540 based platforms should shut down system due to low battery. Signed-off-by: Lee Jones --- drivers/mfd/ab8500-core.c | 36 +++ drivers/power/ab8500_bmdata.c | 5 + drivers/power/ab8500_fg.c | 474 +++++++++++++++++++++++++++++++++++ include/linux/mfd/abx500.h | 10 + include/linux/mfd/abx500/ab8500-bm.h | 18 ++ 5 files changed, 543 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index c7ff55753a8f..f276352cc9ef 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -113,6 +113,7 @@ #define AB8500_SWITCH_OFF_STATUS 0x00 #define AB8500_TURN_ON_STATUS 0x00 +#define AB8505_TURN_ON_STATUS_2 0x04 #define AB8500_CH_USBCH_STAT1_REG 0x02 #define VBUS_DET_DBNC100 0x02 @@ -1401,6 +1402,21 @@ static ssize_t show_turn_on_status(struct device *dev, return sprintf(buf, "%#x\n", value); } +static ssize_t show_turn_on_status_2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + u8 value; + struct ab8500 *ab8500; + + ab8500 = dev_get_drvdata(dev); + ret = get_register_interruptible(ab8500, AB8500_SYS_CTRL1_BLOCK, + AB8505_TURN_ON_STATUS_2, &value); + if (ret < 0) + return ret; + return sprintf(buf, "%#x\n", (value & 0x1)); +} + static ssize_t show_ab9540_dbbrstn(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1457,6 +1473,7 @@ exit: static DEVICE_ATTR(chip_id, S_IRUGO, show_chip_id, NULL); static DEVICE_ATTR(switch_off_status, S_IRUGO, show_switch_off_status, NULL); static DEVICE_ATTR(turn_on_status, S_IRUGO, show_turn_on_status, NULL); +static DEVICE_ATTR(turn_on_status_2, S_IRUGO, show_turn_on_status_2, NULL); static DEVICE_ATTR(dbbrstn, S_IRUGO | S_IWUSR, show_ab9540_dbbrstn, store_ab9540_dbbrstn); @@ -1467,6 +1484,11 @@ static struct attribute *ab8500_sysfs_entries[] = { NULL, }; +static struct attribute *ab8505_sysfs_entries[] = { + &dev_attr_turn_on_status_2.attr, + NULL, +}; + static struct attribute *ab9540_sysfs_entries[] = { &dev_attr_chip_id.attr, &dev_attr_switch_off_status.attr, @@ -1479,6 +1501,10 @@ static struct attribute_group ab8500_attr_group = { .attrs = ab8500_sysfs_entries, }; +static struct attribute_group ab8505_attr_group = { + .attrs = ab8505_sysfs_entries, +}; + static struct attribute_group ab9540_attr_group = { .attrs = ab9540_sysfs_entries, }; @@ -1719,6 +1745,12 @@ static int ab8500_probe(struct platform_device *pdev) else ret = sysfs_create_group(&ab8500->dev->kobj, &ab8500_attr_group); + + if ((is_ab8505(ab8500) || is_ab9540(ab8500)) && + ab8500->chip_id >= AB8500_CUT2P0) + ret = sysfs_create_group(&ab8500->dev->kobj, + &ab8505_attr_group); + if (ret) dev_err(ab8500->dev, "error creating sysfs entries\n"); @@ -1735,6 +1767,10 @@ static int ab8500_remove(struct platform_device *pdev) else sysfs_remove_group(&ab8500->dev->kobj, &ab8500_attr_group); + if ((is_ab8505(ab8500) || is_ab9540(ab8500)) && + ab8500->chip_id >= AB8500_CUT2P0) + sysfs_remove_group(&ab8500->dev->kobj, &ab8505_attr_group); + mfd_remove_devices(ab8500->dev); return 0; diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c index 7a96c0650fbb..e8759763fbe0 100644 --- a/drivers/power/ab8500_bmdata.c +++ b/drivers/power/ab8500_bmdata.c @@ -407,6 +407,11 @@ static const struct abx500_fg_parameters fg = { .battok_raising_th_sel1 = 2860, .maint_thres = 95, .user_cap_limit = 15, + .pcut_enable = 1, + .pcut_max_time = 127, + .pcut_flag_time = 112, + .pcut_max_restart = 15, + .pcut_debounce_time = 2, }; static const struct abx500_maxim_parameters maxi_params = { diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 25dae4c4b0ef..92f342bcf188 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -2344,6 +2344,50 @@ static int ab8500_fg_init_hw_registers(struct ab8500_fg *di) dev_err(di->dev, "BattOk init write failed.\n"); goto out; } + + if (((is_ab8505(di->parent) || is_ab9540(di->parent)) && + abx500_get_chip_id(di->dev) >= AB8500_CUT2P0) + || is_ab8540(di->parent)) { + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_MAX_TIME_REG, di->bm->fg_params->pcut_max_time); + + if (ret) { + dev_err(di->dev, "%s write failed AB8505_RTC_PCUT_MAX_TIME_REG\n", __func__); + goto out; + }; + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_FLAG_TIME_REG, di->bm->fg_params->pcut_flag_time); + + if (ret) { + dev_err(di->dev, "%s write failed AB8505_RTC_PCUT_FLAG_TIME_REG\n", __func__); + goto out; + }; + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_RESTART_REG, di->bm->fg_params->pcut_max_restart); + + if (ret) { + dev_err(di->dev, "%s write failed AB8505_RTC_PCUT_RESTART_REG\n", __func__); + goto out; + }; + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_DEBOUNCE_REG, di->bm->fg_params->pcut_debounce_time); + + if (ret) { + dev_err(di->dev, "%s write failed AB8505_RTC_PCUT_DEBOUNCE_REG\n", __func__); + goto out; + }; + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_CTL_STATUS_REG, di->bm->fg_params->pcut_enable); + + if (ret) { + dev_err(di->dev, "%s write failed AB8505_RTC_PCUT_CTL_STATUS_REG\n", __func__); + goto out; + }; + } out: return ret; } @@ -2546,6 +2590,428 @@ static int ab8500_fg_sysfs_init(struct ab8500_fg *di) return ret; } + +static ssize_t ab8505_powercut_flagtime_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_FLAG_TIME_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_FLAG_TIME_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", (reg_value & 0x7F)); + +fail: + return ret; +} + +static ssize_t ab8505_powercut_flagtime_write(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + long unsigned reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + reg_value = simple_strtoul(buf, NULL, 10); + + if (reg_value > 0x7F) { + dev_err(dev, "Incorrect parameter, echo 0 (1.98s) - 127 (15.625ms) for flagtime\n"); + goto fail; + } + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_FLAG_TIME_REG, (u8)reg_value); + + if (ret < 0) + dev_err(dev, "Failed to set AB8505_RTC_PCUT_FLAG_TIME_REG\n"); + +fail: + return count; +} + +static ssize_t ab8505_powercut_maxtime_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_MAX_TIME_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_MAX_TIME_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", (reg_value & 0x7F)); + +fail: + return ret; + +} + +static ssize_t ab8505_powercut_maxtime_write(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + int reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + reg_value = simple_strtoul(buf, NULL, 10); + if (reg_value > 0x7F) { + dev_err(dev, "Incorrect parameter, echo 0 (0.0s) - 127 (1.98s) for maxtime\n"); + goto fail; + } + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_MAX_TIME_REG, (u8)reg_value); + + if (ret < 0) + dev_err(dev, "Failed to set AB8505_RTC_PCUT_MAX_TIME_REG\n"); + +fail: + return count; +} + +static ssize_t ab8505_powercut_restart_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_RESTART_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_RESTART_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", (reg_value & 0xF)); + +fail: + return ret; +} + +static ssize_t ab8505_powercut_restart_write(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + int reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + reg_value = simple_strtoul(buf, NULL, 10); + if (reg_value > 0xF) { + dev_err(dev, "Incorrect parameter, echo 0 - 15 for number of restart\n"); + goto fail; + } + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_RESTART_REG, (u8)reg_value); + + if (ret < 0) + dev_err(dev, "Failed to set AB8505_RTC_PCUT_RESTART_REG\n"); + +fail: + return count; + +} + +static ssize_t ab8505_powercut_timer_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_TIME_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_TIME_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", (reg_value & 0x7F)); + +fail: + return ret; +} + +static ssize_t ab8505_powercut_restart_counter_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_RESTART_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_RESTART_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", (reg_value & 0xF0) >> 4); + +fail: + return ret; +} + +static ssize_t ab8505_powercut_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_CTL_STATUS_REG, ®_value); + + if (ret < 0) + goto fail; + + return scnprintf(buf, PAGE_SIZE, "%d\n", (reg_value & 0x1)); + +fail: + return ret; +} + +static ssize_t ab8505_powercut_write(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + int reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + reg_value = simple_strtoul(buf, NULL, 10); + if (reg_value > 0x1) { + dev_err(dev, "Incorrect parameter, echo 0/1 to disable/enable Pcut feature\n"); + goto fail; + } + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_CTL_STATUS_REG, (u8)reg_value); + + if (ret < 0) + dev_err(dev, "Failed to set AB8505_RTC_PCUT_CTL_STATUS_REG\n"); + +fail: + return count; +} + +static ssize_t ab8505_powercut_flag_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_CTL_STATUS_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_CTL_STATUS_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", ((reg_value & 0x10) >> 4)); + +fail: + return ret; +} + +static ssize_t ab8505_powercut_debounce_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_DEBOUNCE_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_DEBOUNCE_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", (reg_value & 0x7)); + +fail: + return ret; +} + +static ssize_t ab8505_powercut_debounce_write(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + int reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + reg_value = simple_strtoul(buf, NULL, 10); + if (reg_value > 0x7) { + dev_err(dev, "Incorrect parameter, echo 0 to 7 for debounce setting\n"); + goto fail; + } + + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_DEBOUNCE_REG, (u8)reg_value); + + if (ret < 0) + dev_err(dev, "Failed to set AB8505_RTC_PCUT_DEBOUNCE_REG\n"); + +fail: + return count; +} + +static ssize_t ab8505_powercut_enable_status_read(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + u8 reg_value; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + ret = abx500_get_register_interruptible(di->dev, AB8500_RTC, + AB8505_RTC_PCUT_CTL_STATUS_REG, ®_value); + + if (ret < 0) { + dev_err(dev, "Failed to read AB8505_RTC_PCUT_CTL_STATUS_REG\n"); + goto fail; + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", ((reg_value & 0x20) >> 5)); + +fail: + return ret; +} + +static struct device_attribute ab8505_fg_sysfs_psy_attrs[] = { + __ATTR(powercut_flagtime, (S_IRUGO | S_IWUSR | S_IWGRP), + ab8505_powercut_flagtime_read, ab8505_powercut_flagtime_write), + __ATTR(powercut_maxtime, (S_IRUGO | S_IWUSR | S_IWGRP), + ab8505_powercut_maxtime_read, ab8505_powercut_maxtime_write), + __ATTR(powercut_restart_max, (S_IRUGO | S_IWUSR | S_IWGRP), + ab8505_powercut_restart_read, ab8505_powercut_restart_write), + __ATTR(powercut_timer, S_IRUGO, ab8505_powercut_timer_read, NULL), + __ATTR(powercut_restart_counter, S_IRUGO, + ab8505_powercut_restart_counter_read, NULL), + __ATTR(powercut_enable, (S_IRUGO | S_IWUSR | S_IWGRP), + ab8505_powercut_read, ab8505_powercut_write), + __ATTR(powercut_flag, S_IRUGO, ab8505_powercut_flag_read, NULL), + __ATTR(powercut_debounce_time, (S_IRUGO | S_IWUSR | S_IWGRP), + ab8505_powercut_debounce_read, ab8505_powercut_debounce_write), + __ATTR(powercut_enable_status, S_IRUGO, + ab8505_powercut_enable_status_read, NULL), +}; + +static int ab8500_fg_sysfs_psy_create_attrs(struct device *dev) +{ + unsigned int i, j; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + if (((is_ab8505(di->parent) || is_ab9540(di->parent)) && + abx500_get_chip_id(dev->parent) >= AB8500_CUT2P0) + || is_ab8540(di->parent)) { + for (j = 0; j < ARRAY_SIZE(ab8505_fg_sysfs_psy_attrs); j++) + if (device_create_file(dev, &ab8505_fg_sysfs_psy_attrs[j])) + goto sysfs_psy_create_attrs_failed_ab8505; + } + return 0; +sysfs_psy_create_attrs_failed_ab8505: + dev_err(dev, "Failed creating sysfs psy attrs for ab8505.\n"); + while (j--) + device_remove_file(dev, &ab8505_fg_sysfs_psy_attrs[i]); + + return -EIO; +} + +static void ab8500_fg_sysfs_psy_remove_attrs(struct device *dev) +{ + unsigned int i; + struct power_supply *psy = dev_get_drvdata(dev); + struct ab8500_fg *di; + + di = to_ab8500_fg_device_info(psy); + + if (((is_ab8505(di->parent) || is_ab9540(di->parent)) && + abx500_get_chip_id(dev->parent) >= AB8500_CUT2P0) + || is_ab8540(di->parent)) { + for (i = 0; i < ARRAY_SIZE(ab8505_fg_sysfs_psy_attrs); i++) + (void)device_remove_file(dev, &ab8505_fg_sysfs_psy_attrs[i]); + } +} + /* Exposure to the sysfs interface <> */ #if defined(CONFIG_PM) @@ -2607,6 +3073,7 @@ static int ab8500_fg_remove(struct platform_device *pdev) ab8500_fg_sysfs_exit(di); flush_scheduled_work(); + ab8500_fg_sysfs_psy_remove_attrs(di->fg_psy.dev); power_supply_unregister(&di->fg_psy); platform_set_drvdata(pdev, NULL); return ret; @@ -2772,6 +3239,13 @@ static int ab8500_fg_probe(struct platform_device *pdev) goto free_irq; } + ret = ab8500_fg_sysfs_psy_create_attrs(di->fg_psy.dev); + if (ret) { + dev_err(di->dev, "failed to create FG psy\n"); + ab8500_fg_sysfs_exit(di); + goto free_irq; + } + /* Calibrate the fg first time */ di->flags.calibrate = true; di->calib_state = AB8500_FG_CALIB_INIT; diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 9ead60bc66b7..188aedc322c2 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -89,6 +89,11 @@ struct abx500_fg; * points. * @maint_thres This is the threshold where we stop reporting * battery full while in maintenance, in per cent + * @pcut_enable: Enable power cut feature in ab8505 + * @pcut_max_time: Max time threshold + * @pcut_flag_time: Flagtime threshold + * @pcut_max_restart: Max number of restarts + * @pcut_debounce_time: Sets battery debounce time */ struct abx500_fg_parameters { int recovery_sleep_timer; @@ -106,6 +111,11 @@ struct abx500_fg_parameters { int battok_raising_th_sel1; int user_cap_limit; int maint_thres; + bool pcut_enable; + u8 pcut_max_time; + u8 pcut_flag_time; + u8 pcut_max_restart; + u8 pcut_debounce_time; }; /** diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h index 8d35bfe164c8..0efbe0efee7f 100644 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ b/include/linux/mfd/abx500/ab8500-bm.h @@ -235,6 +235,14 @@ /* Battery type */ #define BATTERY_UNKNOWN 00 +/* Registers for pcut feature in ab8505 and ab9540 */ +#define AB8505_RTC_PCUT_CTL_STATUS_REG 0x12 +#define AB8505_RTC_PCUT_TIME_REG 0x13 +#define AB8505_RTC_PCUT_MAX_TIME_REG 0x14 +#define AB8505_RTC_PCUT_FLAG_TIME_REG 0x15 +#define AB8505_RTC_PCUT_RESTART_REG 0x16 +#define AB8505_RTC_PCUT_DEBOUNCE_REG 0x17 + /** * struct res_to_temp - defines one point in a temp to res curve. To * be used in battery packs that combines the identification resistor with a @@ -283,6 +291,11 @@ struct ab8500_fg; * points. * @maint_thres This is the threshold where we stop reporting * battery full while in maintenance, in per cent + * @pcut_enable: Enable power cut feature in ab8505 + * @pcut_max_time: Max time threshold + * @pcut_flag_time: Flagtime threshold + * @pcut_max_restart: Max number of restarts + * @pcut_debunce_time: Sets battery debounce time */ struct ab8500_fg_parameters { int recovery_sleep_timer; @@ -299,6 +312,11 @@ struct ab8500_fg_parameters { int battok_raising_th_sel1; int user_cap_limit; int maint_thres; + bool pcut_enable; + u8 pcut_max_time; + u8 pcut_flag_time; + u8 pcut_max_restart; + u8 pcut_debunce_time; }; /** -- cgit From 0f4aa401853e07885707aedfc68c608051b0d6e4 Mon Sep 17 00:00:00 2001 From: Yang QU Date: Tue, 26 Jun 2012 19:25:52 +0800 Subject: ab8500-charger: Add backup battery charge voltages on the ab8540 Add 2.7v, 2.9v, 3.0v, 3.2v and 3.3v charging voltages for backup battery. Before that only 2.5v, 2.6v, 2.8v, 3.1v were available. Signed-off-by: Yang QU Signed-off-by: Lee Jones Reviewed-by: Maxime COQUELIN Reviewed-by: Marcus COOPER Tested-by: Xiao Mei ZHANG --- drivers/power/ab8500_charger.c | 19 +++++++++++++++++-- include/linux/mfd/abx500/ab8500-bm.h | 24 ++++++++++++++++++++---- 2 files changed, 37 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index 24b30b7ea5ca..fd3fa2beca2b 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -2836,6 +2836,7 @@ static int ab8500_charger_usb_get_property(struct power_supply *psy, static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) { int ret = 0; + u8 bup_vch_range = 0, vbup33_vrtcn = 0; /* Setup maximum charger current and voltage for ABB cut2.0 */ if (!is_ab8500_1p1_or_earlier(di->parent)) { @@ -2945,15 +2946,29 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) } /* Backup battery voltage and current */ + if (di->bm->bkup_bat_v > BUP_VCH_SEL_3P1V) + bup_vch_range = BUP_VCH_RANGE; + if (di->bm->bkup_bat_v == BUP_VCH_SEL_3P3V) + vbup33_vrtcn = VBUP33_VRTCN; + ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, AB8500_RTC_BACKUP_CHG_REG, - di->bm->bkup_bat_v | - di->bm->bkup_bat_i); + (di->bm->bkup_bat_v & 0x3) | di->bm->bkup_bat_i); if (ret) { dev_err(di->dev, "failed to setup backup battery charging\n"); goto out; } + if (is_ab8540(di->parent)) { + ret = abx500_set_register_interruptible(di->dev, + AB8500_RTC, + AB8500_RTC_CTRL1_REG, + bup_vch_range | vbup33_vrtcn); + if (ret) { + dev_err(di->dev, "failed to setup backup battery charging\n"); + goto out; + } + } /* Enable backup battery charging */ abx500_mask_and_set_register_interruptible(di->dev, diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h index 0efbe0efee7f..a73e05a0441b 100644 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ b/include/linux/mfd/abx500/ab8500-bm.h @@ -105,6 +105,7 @@ #define AB8500_RTC_BACKUP_CHG_REG 0x0C #define AB8500_RTC_CC_CONF_REG 0x01 #define AB8500_RTC_CTRL_REG 0x0B +#define AB8500_RTC_CTRL1_REG 0x11 /* * OTP register offsets @@ -179,10 +180,25 @@ #define BUP_ICH_SEL_300UA 0x08 #define BUP_ICH_SEL_700UA 0x0C -#define BUP_VCH_SEL_2P5V 0x00 -#define BUP_VCH_SEL_2P6V 0x01 -#define BUP_VCH_SEL_2P8V 0x02 -#define BUP_VCH_SEL_3P1V 0x03 +enum bup_vch_sel { + BUP_VCH_SEL_2P5V, + BUP_VCH_SEL_2P6V, + BUP_VCH_SEL_2P8V, + BUP_VCH_SEL_3P1V, + /* + * Note that the following 5 values 2.7v, 2.9v, 3.0v, 3.2v, 3.3v + * are only available on ab8540. You can't choose these 5 + * voltage on ab8500/ab8505/ab9540. + */ + BUP_VCH_SEL_2P7V, + BUP_VCH_SEL_2P9V, + BUP_VCH_SEL_3P0V, + BUP_VCH_SEL_3P2V, + BUP_VCH_SEL_3P3V, +}; + +#define BUP_VCH_RANGE 0x02 +#define VBUP33_VRTCN 0x01 /* Battery OVV constants */ #define BATT_OVV_ENA 0x02 -- cgit From 4dcdf57773fd45b483fc7613b9e51b89a57d655c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 14 Feb 2013 09:24:10 +0000 Subject: ab8500-bm: Quick re-attach charging behaviour Due to a bug in some AB8500 ASICs charger removal cannot always be detected if the removal and reinsertion is done to close in time. This patch detects above described case and handles the situation so that charging will be kept turned on. Signed-off-by: Lee Jones --- drivers/power/ab8500_charger.c | 105 +++++++++++++++++++++++++++++- drivers/power/abx500_chargalg.c | 33 ++++++++++ include/linux/mfd/abx500/ux500_chargalg.h | 1 + 3 files changed, 137 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index dcd3c6feca97..3eb23cf9ff47 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -52,6 +52,7 @@ #define VBUS_DET_DBNC100 0x02 #define VBUS_DET_DBNC1 0x01 #define OTP_ENABLE_WD 0x01 +#define DROP_COUNT_RESET 0x01 #define MAIN_CH_INPUT_CURR_SHIFT 4 #define VBUS_IN_CURR_LIM_SHIFT 4 @@ -1677,6 +1678,105 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger, return ret; } +/** + * ab8500_charger_usb_check_enable() - enable usb charging + * @charger: pointer to the ux500_charger structure + * @vset: charging voltage + * @iset: charger output current + * + * Check if the VBUS charger has been disconnected and reconnected without + * AB8500 rising an interrupt. Returns 0 on success. + */ +static int ab8500_charger_usb_check_enable(struct ux500_charger *charger, + int vset, int iset) +{ + u8 usbch_ctrl1 = 0; + int ret = 0; + + struct ab8500_charger *di = to_ab8500_charger_usb_device_info(charger); + + if (!di->usb.charger_connected) + return ret; + + ret = abx500_get_register_interruptible(di->dev, AB8500_CHARGER, + AB8500_USBCH_CTRL1_REG, &usbch_ctrl1); + if (ret < 0) { + dev_err(di->dev, "ab8500 read failed %d\n", __LINE__); + return ret; + } + dev_dbg(di->dev, "USB charger ctrl: 0x%02x\n", usbch_ctrl1); + + if (!(usbch_ctrl1 & USB_CH_ENA)) { + dev_info(di->dev, "Charging has been disabled abnormally and will be re-enabled\n"); + + ret = abx500_mask_and_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8500_CHARGER_CTRL, + DROP_COUNT_RESET, DROP_COUNT_RESET); + if (ret < 0) { + dev_err(di->dev, "ab8500 write failed %d\n", __LINE__); + return ret; + } + + ret = ab8500_charger_usb_en(&di->usb_chg, true, vset, iset); + if (ret < 0) { + dev_err(di->dev, "Failed to enable VBUS charger %d\n", + __LINE__); + return ret; + } + } + return ret; +} + +/** + * ab8500_charger_ac_check_enable() - enable usb charging + * @charger: pointer to the ux500_charger structure + * @vset: charging voltage + * @iset: charger output current + * + * Check if the AC charger has been disconnected and reconnected without + * AB8500 rising an interrupt. Returns 0 on success. + */ +static int ab8500_charger_ac_check_enable(struct ux500_charger *charger, + int vset, int iset) +{ + u8 mainch_ctrl1 = 0; + int ret = 0; + + struct ab8500_charger *di = to_ab8500_charger_ac_device_info(charger); + + if (!di->ac.charger_connected) + return ret; + + ret = abx500_get_register_interruptible(di->dev, AB8500_CHARGER, + AB8500_MCH_CTRL1, &mainch_ctrl1); + if (ret < 0) { + dev_err(di->dev, "ab8500 read failed %d\n", __LINE__); + return ret; + } + dev_dbg(di->dev, "AC charger ctrl: 0x%02x\n", mainch_ctrl1); + + if (!(mainch_ctrl1 & MAIN_CH_ENA)) { + dev_info(di->dev, "Charging has been disabled abnormally and will be re-enabled\n"); + + ret = abx500_mask_and_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8500_CHARGER_CTRL, + DROP_COUNT_RESET, DROP_COUNT_RESET); + + if (ret < 0) { + dev_err(di->dev, "ab8500 write failed %d\n", __LINE__); + return ret; + } + + ret = ab8500_charger_ac_en(&di->usb_chg, true, vset, iset); + if (ret < 0) { + dev_err(di->dev, "failed to enable AC charger %d\n", + __LINE__); + return ret; + } + } + return ret; +} + /** * ab8500_charger_watchdog_kick() - kick charger watchdog * @di: pointer to the ab8500_charger structure @@ -1734,8 +1834,7 @@ static int ab8500_charger_update_charger_current(struct ux500_charger *charger, /* Reset the main and usb drop input current measurement counter */ ret = abx500_set_register_interruptible(di->dev, AB8500_CHARGER, - AB8500_CHARGER_CTRL, - 0x1); + AB8500_CHARGER_CTRL, DROP_COUNT_RESET); if (ret) { dev_err(di->dev, "%s write failed\n", __func__); return ret; @@ -3221,6 +3320,7 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->ac_chg.psy.num_supplicants = ARRAY_SIZE(supply_interface), /* ux500_charger sub-class */ di->ac_chg.ops.enable = &ab8500_charger_ac_en; + di->ac_chg.ops.check_enable = &ab8500_charger_ac_check_enable; di->ac_chg.ops.kick_wd = &ab8500_charger_watchdog_kick; di->ac_chg.ops.update_curr = &ab8500_charger_update_charger_current; di->ac_chg.max_out_volt = ab8500_charger_voltage_map[ @@ -3242,6 +3342,7 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->usb_chg.psy.num_supplicants = ARRAY_SIZE(supply_interface), /* ux500_charger sub-class */ di->usb_chg.ops.enable = &ab8500_charger_usb_en; + di->usb_chg.ops.check_enable = &ab8500_charger_usb_check_enable; di->usb_chg.ops.kick_wd = &ab8500_charger_watchdog_kick; di->usb_chg.ops.update_curr = &ab8500_charger_update_charger_current; di->usb_chg.max_out_volt = ab8500_charger_voltage_map[ diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index 31507bfe549c..8ab65a3a8190 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -305,6 +305,30 @@ static void abx500_chargalg_state_to(struct abx500_chargalg *di, di->charge_state = state; } +static int abx500_chargalg_check_charger_enable(struct abx500_chargalg *di) +{ + switch (di->charge_state) { + case STATE_NORMAL: + case STATE_MAINTENANCE_A: + case STATE_MAINTENANCE_B: + break; + default: + return 0; + } + + if (di->chg_info.charger_type & USB_CHG) { + return di->usb_chg->ops.check_enable(di->usb_chg, + di->bm->bat_type[di->bm->batt_id].normal_vol_lvl, + di->bm->bat_type[di->bm->batt_id].normal_cur_lvl); + } else if ((di->chg_info.charger_type & AC_CHG) && + !(di->ac_chg->external)) { + return di->ac_chg->ops.check_enable(di->ac_chg, + di->bm->bat_type[di->bm->batt_id].normal_vol_lvl, + di->bm->bat_type[di->bm->batt_id].normal_cur_lvl); + } + return 0; +} + /** * abx500_chargalg_check_charger_connection() - Check charger connection change * @di: pointer to the abx500_chargalg structure @@ -1219,6 +1243,7 @@ static void abx500_chargalg_external_power_changed(struct power_supply *psy) static void abx500_chargalg_algorithm(struct abx500_chargalg *di) { int charger_status; + int ret; /* Collect data from all power_supply class devices */ class_for_each_device(power_supply_class, NULL, @@ -1229,6 +1254,14 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) abx500_chargalg_check_charger_voltage(di); charger_status = abx500_chargalg_check_charger_connection(di); + + if (is_ab8500(di->parent)) { + ret = abx500_chargalg_check_charger_enable(di); + if (ret < 0) + dev_err(di->dev, "Checking charger is enabled error" + ": Returned Value %d\n", ret); + } + /* * First check if we have a charger connected. * Also we don't allow charging of unknown batteries if configured diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h index d43ac0f35526..110d12f09548 100644 --- a/include/linux/mfd/abx500/ux500_chargalg.h +++ b/include/linux/mfd/abx500/ux500_chargalg.h @@ -17,6 +17,7 @@ struct ux500_charger; struct ux500_charger_ops { int (*enable) (struct ux500_charger *, int, int, int); + int (*check_enable) (struct ux500_charger *, int, int); int (*kick_wd) (struct ux500_charger *); int (*update_curr) (struct ux500_charger *, int); }; -- cgit From 8891716e24d7b0f4b1c3b4fdff641bcb1fb282c4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 13 Feb 2013 11:39:19 +0000 Subject: ab8500-bm: Charge only mode fixes for the ab9540 Fix for charging not getting enabled in charge only mode by external charger. Signed-off-by: Lee Jones --- drivers/power/ab8500_charger.c | 42 +++++++++++++++++++++++++++++++ drivers/power/abx500_chargalg.c | 14 +++++++++++ drivers/power/pm2301_charger.c | 7 ++++++ include/linux/mfd/abx500/ux500_chargalg.h | 2 ++ 4 files changed, 65 insertions(+) (limited to 'include/linux') diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index 3eb23cf9ff47..f1d712308b02 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,10 @@ #define AB8500_SW_CONTROL_FALLBACK 0x03 /* Wait for enumeration before charing in us */ #define WAIT_ACA_RID_ENUMERATION (5 * 1000) +/*External charger control*/ +#define AB8500_SYS_CHARGER_CONTROL_REG 0x52 +#define EXTERNAL_CHARGER_DISABLE_REG_VAL 0x03 +#define EXTERNAL_CHARGER_ENABLE_REG_VAL 0x07 /* UsbLineStatus register - usb types */ enum ab8500_charger_link_status { @@ -1678,6 +1683,29 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger, return ret; } +static int ab8500_external_charger_prepare(struct notifier_block *charger_nb, + unsigned long event, void *data) +{ + int ret; + struct device *dev = data; + /*Toggle External charger control pin*/ + ret = abx500_set_register_interruptible(dev, AB8500_SYS_CTRL1_BLOCK, + AB8500_SYS_CHARGER_CONTROL_REG, + EXTERNAL_CHARGER_DISABLE_REG_VAL); + if (ret < 0) { + dev_err(dev, "write reg failed %d\n", ret); + goto out; + } + ret = abx500_set_register_interruptible(dev, AB8500_SYS_CTRL1_BLOCK, + AB8500_SYS_CHARGER_CONTROL_REG, + EXTERNAL_CHARGER_ENABLE_REG_VAL); + if (ret < 0) + dev_err(dev, "Write reg failed %d\n", ret); + +out: + return ret; +} + /** * ab8500_charger_usb_check_enable() - enable usb charging * @charger: pointer to the ux500_charger structure @@ -3221,6 +3249,10 @@ static int ab8500_charger_suspend(struct platform_device *pdev, #define ab8500_charger_resume NULL #endif +static struct notifier_block charger_nb = { + .notifier_call = ab8500_external_charger_prepare, +}; + static int ab8500_charger_remove(struct platform_device *pdev) { struct ab8500_charger *di = platform_get_drvdata(pdev); @@ -3250,6 +3282,11 @@ static int ab8500_charger_remove(struct platform_device *pdev) /* Delete the work queue */ destroy_workqueue(di->charger_wq); + /* Unregister external charger enable notifier */ + if (!di->ac_chg.enabled) + blocking_notifier_chain_unregister( + &charger_notifier_list, &charger_nb); + flush_scheduled_work(); if (di->usb_chg.enabled) power_supply_unregister(&di->usb_chg.psy); @@ -3331,6 +3368,11 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->ac_chg.enabled = di->bm->ac_enabled; di->ac_chg.external = false; + /*notifier for external charger enabling*/ + if (!di->ac_chg.enabled) + blocking_notifier_chain_register( + &charger_notifier_list, &charger_nb); + /* USB supply */ /* power_supply base class */ di->usb_chg.psy.name = "ab8500_usb"; diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index 8ab65a3a8190..a876976678ab 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -26,6 +26,7 @@ #include #include #include +#include /* Watchdog kick interval */ #define CHG_WD_INTERVAL (6 * HZ) @@ -243,6 +244,9 @@ struct abx500_chargalg { struct kobject chargalg_kobject; }; +/*External charger prepare notifier*/ +BLOCKING_NOTIFIER_HEAD(charger_notifier_list); + /* Main battery properties */ static enum power_supply_property abx500_chargalg_props[] = { POWER_SUPPLY_PROP_STATUS, @@ -503,6 +507,8 @@ static int abx500_chargalg_kick_watchdog(struct abx500_chargalg *di) static int abx500_chargalg_ac_en(struct abx500_chargalg *di, int enable, int vset, int iset) { + static int abx500_chargalg_ex_ac_enable_toggle; + if (!di->ac_chg || !di->ac_chg->ops.enable) return -ENXIO; @@ -515,6 +521,14 @@ static int abx500_chargalg_ac_en(struct abx500_chargalg *di, int enable, di->chg_info.ac_iset = iset; di->chg_info.ac_vset = vset; + /* Enable external charger */ + if (enable && di->ac_chg->external && + !abx500_chargalg_ex_ac_enable_toggle) { + blocking_notifier_call_chain(&charger_notifier_list, + 0, di->dev); + abx500_chargalg_ex_ac_enable_toggle++; + } + return di->ac_chg->ops.enable(di->ac_chg, enable, vset, iset); } diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c index b560fa5ac4e7..45ef3b9de6b9 100644 --- a/drivers/power/pm2301_charger.c +++ b/drivers/power/pm2301_charger.c @@ -1059,6 +1059,13 @@ static int pm2xxx_wall_charger_probe(struct i2c_client *i2c_client, ret = pm2xxx_charger_detection(pm2, &val); if ((ret == 0) && val) { + /* + * When boot is due to AC charger plug-in, + * read interrupt registers + */ + pm2xxx_reg_read(pm2, PM2XXX_REG_INT1, &val); + pm2xxx_reg_read(pm2, PM2XXX_REG_INT2, &val); + pm2xxx_reg_read(pm2, PM2XXX_REG_INT4, &val); pm2->ac.charger_connected = 1; pm2->ac_conn = true; power_supply_changed(&pm2->ac_chg.psy); diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h index 110d12f09548..fa831f1e8cf8 100644 --- a/include/linux/mfd/abx500/ux500_chargalg.h +++ b/include/linux/mfd/abx500/ux500_chargalg.h @@ -41,4 +41,6 @@ struct ux500_charger { bool external; }; +extern struct blocking_notifier_head charger_notifier_list; + #endif -- cgit From db43e6c473b57d4e7a55c4bd6edef71f40f13eae Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 14 Feb 2013 12:39:15 +0000 Subject: ab8500-bm: Add usb power path support AB8540 supports power path function in USB charging mode for fast power up with dead and weak battery, and it could extend the battery age. When USB charging starts, if the Vbattrue is below than SW cut off voltage, power path and pre-charge should be enabled. If Vbattrue is higher than SW cut off voltage, power path and pre-charge should be disabled. This is to make sure full current to battery charge. At the end of charge, power path should be enable again to reduce charging the battery again. Signed-off-by: Lee Jones --- drivers/power/ab8500_charger.c | 81 +++++++++++++++++++++++++++++++ drivers/power/abx500_chargalg.c | 62 +++++++++++++++++++++++ include/linux/mfd/abx500.h | 1 + include/linux/mfd/abx500/ab8500-bm.h | 12 +++++ include/linux/mfd/abx500/ux500_chargalg.h | 4 ++ 5 files changed, 160 insertions(+) (limited to 'include/linux') diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index 6fea4fdf8701..f249a65b02e1 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -1925,6 +1925,67 @@ static int ab8500_charger_update_charger_current(struct ux500_charger *charger, return ret; } +/** + * ab8540_charger_power_path_enable() - enable usb power path mode + * @charger: pointer to the ux500_charger structure + * @enable: enable/disable flag + * + * Enable or disable the power path for usb mode + * Returns error code in case of failure else 0(on success) + */ +static int ab8540_charger_power_path_enable(struct ux500_charger *charger, + bool enable) +{ + int ret; + struct ab8500_charger *di; + + if (charger->psy.type == POWER_SUPPLY_TYPE_USB) + di = to_ab8500_charger_usb_device_info(charger); + else + return -ENXIO; + + ret = abx500_mask_and_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8540_USB_PP_MODE_REG, + BUS_POWER_PATH_MODE_ENA, enable); + if (ret) { + dev_err(di->dev, "%s write failed\n", __func__); + return ret; + } + + return ret; +} + + +/** + * ab8540_charger_usb_pre_chg_enable() - enable usb pre change + * @charger: pointer to the ux500_charger structure + * @enable: enable/disable flag + * + * Enable or disable the pre-chage for usb mode + * Returns error code in case of failure else 0(on success) + */ +static int ab8540_charger_usb_pre_chg_enable(struct ux500_charger *charger, + bool enable) +{ + int ret; + struct ab8500_charger *di; + + if (charger->psy.type == POWER_SUPPLY_TYPE_USB) + di = to_ab8500_charger_usb_device_info(charger); + else + return -ENXIO; + + ret = abx500_mask_and_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8540_USB_PP_CHR_REG, + BUS_POWER_PATH_PRECHG_ENA, enable); + if (ret) { + dev_err(di->dev, "%s write failed\n", __func__); + return ret; + } + + return ret; +} + static int ab8500_charger_get_ext_psy_data(struct device *dev, void *data) { struct power_supply *psy; @@ -3201,6 +3262,23 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) if (ret < 0) dev_err(di->dev, "%s mask and set failed\n", __func__); + if (is_ab8540(di->parent)) { + ret = abx500_mask_and_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8540_USB_PP_MODE_REG, + BUS_VSYS_VOL_SELECT_MASK, BUS_VSYS_VOL_SELECT_3P6V); + if (ret) { + dev_err(di->dev, "failed to setup usb power path vsys voltage\n"); + goto out; + } + ret = abx500_mask_and_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8540_USB_PP_CHR_REG, + BUS_PP_PRECHG_CURRENT_MASK, 0); + if (ret) { + dev_err(di->dev, "failed to setup usb power path prechage current\n"); + goto out; + } + } + out: return ret; } @@ -3484,6 +3562,8 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->usb_chg.ops.check_enable = &ab8500_charger_usb_check_enable; di->usb_chg.ops.kick_wd = &ab8500_charger_watchdog_kick; di->usb_chg.ops.update_curr = &ab8500_charger_update_charger_current; + di->usb_chg.ops.pp_enable = &ab8540_charger_power_path_enable; + di->usb_chg.ops.pre_chg_enable = &ab8540_charger_usb_pre_chg_enable; di->usb_chg.max_out_volt = ab8500_charger_voltage_map[ ARRAY_SIZE(ab8500_charger_voltage_map) - 1]; di->usb_chg.max_out_curr = ab8500_charger_current_map[ @@ -3491,6 +3571,7 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->usb_chg.wdt_refresh = CHG_WD_INTERVAL; di->usb_chg.enabled = di->bm->usb_enabled; di->usb_chg.external = false; + di->usb_chg.power_path = di->bm->usb_power_path; di->usb_state.usb_current = -1; /* Create a work queue for the charger */ diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index a876976678ab..a9b8efdafb8f 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -34,6 +34,9 @@ /* End-of-charge criteria counter */ #define EOC_COND_CNT 10 +/* Plus margin for the low battery threshold */ +#define BAT_PLUS_MARGIN (100) + #define to_abx500_chargalg_device_info(x) container_of((x), \ struct abx500_chargalg, chargalg_psy); @@ -83,6 +86,7 @@ enum abx500_chargalg_states { STATE_HW_TEMP_PROTECT_INIT, STATE_HW_TEMP_PROTECT, STATE_NORMAL_INIT, + STATE_USB_PP_PRE_CHARGE, STATE_NORMAL, STATE_WAIT_FOR_RECHARGE_INIT, STATE_WAIT_FOR_RECHARGE, @@ -114,6 +118,7 @@ static const char *states[] = { "HW_TEMP_PROTECT_INIT", "HW_TEMP_PROTECT", "NORMAL_INIT", + "USB_PP_PRE_CHARGE", "NORMAL", "WAIT_FOR_RECHARGE_INIT", "WAIT_FOR_RECHARGE", @@ -560,6 +565,37 @@ static int abx500_chargalg_usb_en(struct abx500_chargalg *di, int enable, return di->usb_chg->ops.enable(di->usb_chg, enable, vset, iset); } + /** + * ab8540_chargalg_usb_pp_en() - Enable/ disable USB power path + * @di: pointer to the abx500_chargalg structure + * @enable: power path enable/disable + * + * The USB power path will be enable/ disable + */ +static int ab8540_chargalg_usb_pp_en(struct abx500_chargalg *di, bool enable) +{ + if (!di->usb_chg || !di->usb_chg->ops.pp_enable) + return -ENXIO; + + return di->usb_chg->ops.pp_enable(di->usb_chg, enable); +} + +/** + * ab8540_chargalg_usb_pre_chg_en() - Enable/ disable USB pre-charge + * @di: pointer to the abx500_chargalg structure + * @enable: USB pre-charge enable/disable + * + * The USB USB pre-charge will be enable/ disable + */ +static int ab8540_chargalg_usb_pre_chg_en(struct abx500_chargalg *di, + bool enable) +{ + if (!di->usb_chg || !di->usb_chg->ops.pre_chg_enable) + return -ENXIO; + + return di->usb_chg->ops.pre_chg_enable(di->usb_chg, enable); +} + /** * abx500_chargalg_update_chg_curr() - Update charger current * @di: pointer to the abx500_chargalg structure @@ -765,6 +801,9 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di) di->batt_data.avg_curr > 0) { if (++di->eoc_cnt >= EOC_COND_CNT) { di->eoc_cnt = 0; + if ((di->chg_info.charger_type & USB_CHG) && + (di->usb_chg->power_path)) + ab8540_chargalg_usb_pp_en(di, true); di->charge_status = POWER_SUPPLY_STATUS_FULL; di->maintenance_chg = true; dev_dbg(di->dev, "EOC reached!\n"); @@ -1465,6 +1504,22 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) break; case STATE_NORMAL_INIT: + if ((di->chg_info.charger_type & USB_CHG) && + di->usb_chg->power_path) { + if (di->batt_data.volt > + (di->bm->fg_params->lowbat_threshold + + BAT_PLUS_MARGIN)) { + ab8540_chargalg_usb_pre_chg_en(di, false); + ab8540_chargalg_usb_pp_en(di, false); + } else { + ab8540_chargalg_usb_pp_en(di, true); + ab8540_chargalg_usb_pre_chg_en(di, true); + abx500_chargalg_state_to(di, + STATE_USB_PP_PRE_CHARGE); + break; + } + } + abx500_chargalg_start_charging(di, di->bm->bat_type[di->bm->batt_id].normal_vol_lvl, di->bm->bat_type[di->bm->batt_id].normal_cur_lvl); @@ -1479,6 +1534,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) break; + case STATE_USB_PP_PRE_CHARGE: + if (di->batt_data.volt > + (di->bm->fg_params->lowbat_threshold + + BAT_PLUS_MARGIN)) + abx500_chargalg_state_to(di, STATE_NORMAL_INIT); + break; + case STATE_NORMAL: handle_maxim_chg_curr(di); if (di->charge_status == POWER_SUPPLY_STATUS_FULL && diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 188aedc322c2..cd71d8eadf50 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -267,6 +267,7 @@ struct abx500_bm_data { bool autopower_cfg; bool ac_enabled; bool usb_enabled; + bool usb_power_path; bool no_maintenance; bool capacity_scaling; bool chg_unknown_bat; diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h index a73e05a0441b..0ebf0c5d1f88 100644 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ b/include/linux/mfd/abx500/ab8500-bm.h @@ -69,6 +69,8 @@ #define AB8500_USBCH_CTRL1_REG 0xC0 #define AB8500_USBCH_CTRL2_REG 0xC1 #define AB8500_USBCH_IPT_CRNTLVL_REG 0xC2 +#define AB8540_USB_PP_MODE_REG 0xC5 +#define AB8540_USB_PP_CHR_REG 0xC6 /* * Gas Gauge register offsets @@ -259,6 +261,16 @@ enum bup_vch_sel { #define AB8505_RTC_PCUT_RESTART_REG 0x16 #define AB8505_RTC_PCUT_DEBOUNCE_REG 0x17 +/* USB Power Path constants for ab8540 */ +#define BUS_VSYS_VOL_SELECT_MASK 0x06 +#define BUS_VSYS_VOL_SELECT_3P6V 0x00 +#define BUS_VSYS_VOL_SELECT_3P325V 0x02 +#define BUS_VSYS_VOL_SELECT_3P9V 0x04 +#define BUS_VSYS_VOL_SELECT_4P3V 0x06 +#define BUS_POWER_PATH_MODE_ENA 0x01 +#define BUS_PP_PRECHG_CURRENT_MASK 0x0E +#define BUS_POWER_PATH_PRECHG_ENA 0x01 + /** * struct res_to_temp - defines one point in a temp to res curve. To * be used in battery packs that combines the identification resistor with a diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h index fa831f1e8cf8..234c99143bf7 100644 --- a/include/linux/mfd/abx500/ux500_chargalg.h +++ b/include/linux/mfd/abx500/ux500_chargalg.h @@ -20,6 +20,8 @@ struct ux500_charger_ops { int (*check_enable) (struct ux500_charger *, int, int); int (*kick_wd) (struct ux500_charger *); int (*update_curr) (struct ux500_charger *, int); + int (*pp_enable) (struct ux500_charger *, bool); + int (*pre_chg_enable) (struct ux500_charger *, bool); }; /** @@ -30,6 +32,7 @@ struct ux500_charger_ops { * @max_out_curr maximum output charger current in mA * @enabled indicates if this charger is used or not * @external external charger unit (pm2xxx) + * @power_path USB power path support */ struct ux500_charger { struct power_supply psy; @@ -39,6 +42,7 @@ struct ux500_charger { int wdt_refresh; bool enabled; bool external; + bool power_path; }; extern struct blocking_notifier_head charger_notifier_list; -- cgit From 861a30da53e2c5b9823b5390c1757baaf8f6e356 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 29 Aug 2012 20:36:51 +0800 Subject: ab8500-bm: Add support for the new ab8540 platform Provide AB8540 platform specific information required to run the Battery Management subsystem on AB8540 based devices. For this to happen we see the introduction of separate platform specific data structures and a means in which to process them. Signed-off-by: Lee Jones --- drivers/power/ab8500_bmdata.c | 91 +++++++++++- drivers/power/ab8500_btemp.c | 42 +++++- drivers/power/ab8500_charger.c | 270 +++++++++++++++-------------------- include/linux/mfd/abx500.h | 10 +- include/linux/mfd/abx500/ab8500-bm.h | 5 +- 5 files changed, 248 insertions(+), 170 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c index e8759763fbe0..85742a6d29ff 100644 --- a/drivers/power/ab8500_bmdata.c +++ b/drivers/power/ab8500_bmdata.c @@ -414,13 +414,20 @@ static const struct abx500_fg_parameters fg = { .pcut_debounce_time = 2, }; -static const struct abx500_maxim_parameters maxi_params = { +static const struct abx500_maxim_parameters ab8500_maxi_params = { .ena_maxi = true, .chg_curr = 910, .wait_cycles = 10, .charger_curr_step = 100, }; +static const struct abx500_maxim_parameters abx540_maxi_params = { + .ena_maxi = true, + .chg_curr = 3000, + .wait_cycles = 10, + .charger_curr_step = 200, +}; + static const struct abx500_bm_charger_parameters chg = { .usb_volt_max = 5500, .usb_curr_max = 1500, @@ -428,6 +435,46 @@ static const struct abx500_bm_charger_parameters chg = { .ac_curr_max = 1500, }; +/* + * This array maps the raw hex value to charger output current used by the + * AB8500 values + */ +static int ab8500_charge_output_curr_map[] = { + 100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1500, +}; + +static int ab8540_charge_output_curr_map[] = { + 0, 0, 0, 75, 100, 125, 150, 175, + 200, 225, 250, 275, 300, 325, 350, 375, + 400, 425, 450, 475, 500, 525, 550, 575, + 600, 625, 650, 675, 700, 725, 750, 775, + 800, 825, 850, 875, 900, 925, 950, 975, + 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, + 1200, 1225, 1250, 1275, 1300, 1325, 1350, 1375, + 1400, 1425, 1450, 1500, 1600, 1700, 1900, 2000, +}; + +/* + * This array maps the raw hex value to charger input current used by the + * AB8500 values + */ +static int ab8500_charge_input_curr_map[] = { + 50, 98, 193, 290, 380, 450, 500, 600, + 700, 800, 900, 1000, 1100, 1300, 1400, 1500, +}; + +static int ab8540_charge_input_curr_map[] = { + 25, 50, 75, 100, 125, 150, 175, 200, + 225, 250, 275, 300, 325, 350, 375, 400, + 425, 450, 475, 500, 525, 550, 575, 600, + 625, 650, 675, 700, 725, 750, 775, 800, + 825, 850, 875, 900, 925, 950, 975, 1000, + 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, + 1225, 1250, 1275, 1300, 1325, 1350, 1375, 1400, + 1425, 1450, 1475, 1500, 1500, 1500, 1500, 1500, +}; + struct abx500_bm_data ab8500_bm_data = { .temp_under = 3, .temp_low = 8, @@ -447,15 +494,53 @@ struct abx500_bm_data ab8500_bm_data = { .fg_res = 100, .cap_levels = &cap_levels, .bat_type = bat_type_thermistor, - .n_btypes = 3, + .n_btypes = ARRAY_SIZE(bat_type_thermistor), .batt_id = 0, .interval_charging = 5, .interval_not_charging = 120, .temp_hysteresis = 3, .gnd_lift_resistance = 34, - .maxi = &maxi_params, + .chg_output_curr = ab8500_charge_output_curr_map, + .n_chg_out_curr = ARRAY_SIZE(ab8500_charge_output_curr_map), + .maxi = &ab8500_maxi_params, .chg_params = &chg, .fg_params = &fg, + .chg_input_curr = ab8500_charge_input_curr_map, + .n_chg_in_curr = ARRAY_SIZE(ab8500_charge_input_curr_map), +}; + +struct abx500_bm_data ab8540_bm_data = { + .temp_under = 3, + .temp_low = 8, + .temp_high = 43, + .temp_over = 48, + .main_safety_tmr_h = 4, + .temp_interval_chg = 20, + .temp_interval_nochg = 120, + .usb_safety_tmr_h = 4, + .bkup_bat_v = BUP_VCH_SEL_2P6V, + .bkup_bat_i = BUP_ICH_SEL_150UA, + .no_maintenance = false, + .capacity_scaling = false, + .adc_therm = ABx500_ADC_THERM_BATCTRL, + .chg_unknown_bat = false, + .enable_overshoot = false, + .fg_res = 100, + .cap_levels = &cap_levels, + .bat_type = bat_type_thermistor, + .n_btypes = ARRAY_SIZE(bat_type_thermistor), + .batt_id = 0, + .interval_charging = 5, + .interval_not_charging = 120, + .temp_hysteresis = 3, + .gnd_lift_resistance = 0, + .maxi = &abx540_maxi_params, + .chg_params = &chg, + .fg_params = &fg, + .chg_output_curr = ab8540_charge_output_curr_map, + .n_chg_out_curr = ARRAY_SIZE(ab8540_charge_output_curr_map), + .chg_input_curr = ab8540_charge_input_curr_map, + .n_chg_in_curr = ARRAY_SIZE(ab8540_charge_input_curr_map), }; int ab8500_bm_of_probe(struct device *dev, diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c index 91ad3edf6197..7336dcf45f7e 100644 --- a/drivers/power/ab8500_btemp.c +++ b/drivers/power/ab8500_btemp.c @@ -42,6 +42,9 @@ #define BTEMP_BATCTRL_CURR_SRC_16UA 16 #define BTEMP_BATCTRL_CURR_SRC_18UA 18 +#define BTEMP_BATCTRL_CURR_SRC_60UA 60 +#define BTEMP_BATCTRL_CURR_SRC_120UA 120 + #define to_ab8500_btemp_device_info(x) container_of((x), \ struct ab8500_btemp, btemp_psy); @@ -216,7 +219,12 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di, /* Only do this for batteries with internal NTC */ if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) { - if (is_ab9540(di->parent) || is_ab8505(di->parent)) { + if (is_ab8540(di->parent)) { + if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_60UA) + curr = BAT_CTRL_60U_ENA; + else + curr = BAT_CTRL_120U_ENA; + } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_16UA) curr = BAT_CTRL_16U_ENA; else @@ -257,7 +265,14 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di, } else if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) { dev_dbg(di->dev, "Disable BATCTRL curr source\n"); - if (is_ab9540(di->parent) || is_ab8505(di->parent)) { + if (is_ab8540(di->parent)) { + /* Write 0 to the curr bits */ + ret = abx500_mask_and_set_register_interruptible( + di->dev, + AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE, + BAT_CTRL_60U_ENA | BAT_CTRL_120U_ENA, + ~(BAT_CTRL_60U_ENA | BAT_CTRL_120U_ENA)); + } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { /* Write 0 to the curr bits */ ret = abx500_mask_and_set_register_interruptible( di->dev, @@ -314,7 +329,13 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di, * if we got an error above */ disable_curr_source: - if (is_ab9540(di->parent) || is_ab8505(di->parent)) { + if (is_ab8540(di->parent)) { + /* Write 0 to the curr bits */ + ret = abx500_mask_and_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE, + BAT_CTRL_60U_ENA | BAT_CTRL_120U_ENA, + ~(BAT_CTRL_60U_ENA | BAT_CTRL_120U_ENA)); + } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { /* Write 0 to the curr bits */ ret = abx500_mask_and_set_register_interruptible(di->dev, AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE, @@ -541,7 +562,9 @@ static int ab8500_btemp_id(struct ab8500_btemp *di) { int res; u8 i; - if (is_ab9540(di->parent) || is_ab8505(di->parent)) + if (is_ab8540(di->parent)) + di->curr_source = BTEMP_BATCTRL_CURR_SRC_60UA; + else if (is_ab9540(di->parent) || is_ab8505(di->parent)) di->curr_source = BTEMP_BATCTRL_CURR_SRC_16UA; else di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA; @@ -582,9 +605,14 @@ static int ab8500_btemp_id(struct ab8500_btemp *di) * detected type is Type 1, else we use the 7uA source */ if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && - di->bm->batt_id == 1) { - if (is_ab9540(di->parent) || is_ab8505(di->parent)) { - dev_dbg(di->dev, "Set BATCTRL current source to 16uA\n"); + di->bm->batt_id == 1) { + if (is_ab8540(di->parent)) { + dev_dbg(di->dev, + "Set BATCTRL current source to 60uA\n"); + di->curr_source = BTEMP_BATCTRL_CURR_SRC_60UA; + } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { + dev_dbg(di->dev, + "Set BATCTRL current source to 16uA\n"); di->curr_source = BTEMP_BATCTRL_CURR_SRC_16UA; } else { dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n"); diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index f249a65b02e1..6089ee7bc609 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -57,7 +57,9 @@ #define MAIN_CH_INPUT_CURR_SHIFT 4 #define VBUS_IN_CURR_LIM_SHIFT 4 +#define AB8540_VBUS_IN_CURR_LIM_SHIFT 2 #define AUTO_VBUS_IN_CURR_LIM_SHIFT 4 +#define AB8540_AUTO_VBUS_IN_CURR_MASK 0x3F #define VBUS_IN_CURR_LIM_RETRY_SET_TIME 30 /* seconds */ #define LED_INDICATOR_PWM_ENA 0x01 @@ -82,6 +84,7 @@ #define AB8500_USB_LINK_STATUS 0x78 #define AB8505_USB_LINK_STATUS 0xF8 #define AB8500_STD_HOST_SUSP 0x18 +#define USB_LINK_STATUS_SHIFT 3 /* Watchdog timeout constant */ #define WD_TIMER 0x30 /* 4min */ @@ -751,8 +754,7 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di, "VBUS has collapsed\n"); ret = -ENXIO; break; - } - if (is_ab9540(di->parent) || is_ab8505(di->parent)) { + } else { dev_dbg(di->dev, "USB Type - Charging not allowed\n"); di->max_usb_in_curr.usb_type_max = USB_CH_IP_CUR_LVL_0P05; @@ -807,30 +809,22 @@ static int ab8500_charger_read_usb_type(struct ab8500_charger *di) dev_err(di->dev, "%s ab8500 read failed\n", __func__); return ret; } - if (is_ab8500(di->parent)) { + if (is_ab8500(di->parent)) ret = abx500_get_register_interruptible(di->dev, AB8500_USB, - AB8500_USB_LINE_STAT_REG, &val); - } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { - ret = abx500_get_register_interruptible(di->dev, - AB8500_USB, AB8500_USB_LINK1_STAT_REG, &val); - } else { - dev_err(di->dev, "%s unsupported analog baseband\n", __func__); - return -ENXIO; - } + AB8500_USB_LINE_STAT_REG, &val); + else + ret = abx500_get_register_interruptible(di->dev, + AB8500_USB, AB8500_USB_LINK1_STAT_REG, &val); if (ret < 0) { dev_err(di->dev, "%s ab8500 read failed\n", __func__); return ret; } /* get the USB type */ - if (is_ab8500(di->parent)) { - val = (val & AB8500_USB_LINK_STATUS) >> 3; - } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { - val = (val & AB8505_USB_LINK_STATUS) >> 3; - } else { - dev_err(di->dev, "%s unsupported analog baseband\n", __func__); - return -ENXIO; - } + if (is_ab8500(di->parent)) + val = (val & AB8500_USB_LINK_STATUS) >> USB_LINK_STATUS_SHIFT; + else + val = (val & AB8505_USB_LINK_STATUS) >> USB_LINK_STATUS_SHIFT; ret = ab8500_charger_max_usb_curr(di, (enum ab8500_charger_link_status) val); @@ -866,16 +860,12 @@ static int ab8500_charger_detect_usb_type(struct ab8500_charger *di) return ret; } - if (is_ab8500(di->parent)) { + if (is_ab8500(di->parent)) ret = abx500_get_register_interruptible(di->dev, AB8500_USB, AB8500_USB_LINE_STAT_REG, &val); - } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { + else ret = abx500_get_register_interruptible(di->dev, AB8500_USB, AB8500_USB_LINK1_STAT_REG, &val); - } else { - dev_err(di->dev, "%s unsupported analog baseband\n", __func__); - return -ENXIO; - } if (ret < 0) { dev_err(di->dev, "%s ab8500 read failed\n", __func__); return ret; @@ -889,14 +879,12 @@ static int ab8500_charger_detect_usb_type(struct ab8500_charger *di) */ /* get the USB type */ - if (is_ab8500(di->parent)) { - val = (val & AB8500_USB_LINK_STATUS) >> 3; - } else if (is_ab9540(di->parent) || is_ab8505(di->parent)) { - val = (val & AB8505_USB_LINK_STATUS) >> 3; - } else { - dev_err(di->dev, "%s unsupported analog baseband\n", __func__); - return -ENXIO; - } + if (is_ab8500(di->parent)) + val = (val & AB8500_USB_LINK_STATUS) >> + USB_LINK_STATUS_SHIFT; + else + val = (val & AB8505_USB_LINK_STATUS) >> + USB_LINK_STATUS_SHIFT; if (val) break; } @@ -991,51 +979,6 @@ static int ab8500_charger_voltage_map[] = { 4600 , }; -/* - * This array maps the raw hex value to charger current used by the AB8500 - * Values taken from the UM0836 - */ -static int ab8500_charger_current_map[] = { - 100 , - 200 , - 300 , - 400 , - 500 , - 600 , - 700 , - 800 , - 900 , - 1000 , - 1100 , - 1200 , - 1300 , - 1400 , - 1500 , -}; - -/* - * This array maps the raw hex value to VBUS input current used by the AB8500 - * Values taken from the UM0836 - */ -static int ab8500_charger_vbus_in_curr_map[] = { - USB_CH_IP_CUR_LVL_0P05, - USB_CH_IP_CUR_LVL_0P09, - USB_CH_IP_CUR_LVL_0P19, - USB_CH_IP_CUR_LVL_0P29, - USB_CH_IP_CUR_LVL_0P38, - USB_CH_IP_CUR_LVL_0P45, - USB_CH_IP_CUR_LVL_0P5, - USB_CH_IP_CUR_LVL_0P6, - USB_CH_IP_CUR_LVL_0P7, - USB_CH_IP_CUR_LVL_0P8, - USB_CH_IP_CUR_LVL_0P9, - USB_CH_IP_CUR_LVL_1P0, - USB_CH_IP_CUR_LVL_1P1, - USB_CH_IP_CUR_LVL_1P3, - USB_CH_IP_CUR_LVL_1P4, - USB_CH_IP_CUR_LVL_1P5, -}; - static int ab8500_voltage_to_regval(int voltage) { int i; @@ -1057,41 +1000,41 @@ static int ab8500_voltage_to_regval(int voltage) return -1; } -static int ab8500_current_to_regval(int curr) +static int ab8500_current_to_regval(struct ab8500_charger *di, int curr) { int i; - if (curr < ab8500_charger_current_map[0]) + if (curr < di->bm->chg_output_curr[0]) return 0; - for (i = 0; i < ARRAY_SIZE(ab8500_charger_current_map); i++) { - if (curr < ab8500_charger_current_map[i]) + for (i = 0; i < di->bm->n_chg_out_curr; i++) { + if (curr < di->bm->chg_output_curr[i]) return i - 1; } /* If not last element, return error */ - i = ARRAY_SIZE(ab8500_charger_current_map) - 1; - if (curr == ab8500_charger_current_map[i]) + i = di->bm->n_chg_out_curr - 1; + if (curr == di->bm->chg_output_curr[i]) return i; else return -1; } -static int ab8500_vbus_in_curr_to_regval(int curr) +static int ab8500_vbus_in_curr_to_regval(struct ab8500_charger *di, int curr) { int i; - if (curr < ab8500_charger_vbus_in_curr_map[0]) + if (curr < di->bm->chg_input_curr[0]) return 0; - for (i = 0; i < ARRAY_SIZE(ab8500_charger_vbus_in_curr_map); i++) { - if (curr < ab8500_charger_vbus_in_curr_map[i]) + for (i = 0; i < di->bm->n_chg_in_curr; i++) { + if (curr < di->bm->chg_input_curr[i]) return i - 1; } /* If not last element, return error */ - i = ARRAY_SIZE(ab8500_charger_vbus_in_curr_map) - 1; - if (curr == ab8500_charger_vbus_in_curr_map[i]) + i = di->bm->n_chg_in_curr - 1; + if (curr == di->bm->chg_input_curr[i]) return i; else return -1; @@ -1169,7 +1112,7 @@ static int ab8500_charger_set_current(struct ab8500_charger *di, int ich, int reg) { int ret = 0; - int auto_curr_index, curr_index, prev_curr_index, shift_value, i; + int curr_index, prev_curr_index, shift_value, i; u8 reg_value; u32 step_udelay; bool no_stepping = false; @@ -1187,39 +1130,27 @@ static int ab8500_charger_set_current(struct ab8500_charger *di, case AB8500_MCH_IPT_CURLVL_REG: shift_value = MAIN_CH_INPUT_CURR_SHIFT; prev_curr_index = (reg_value >> shift_value); - curr_index = ab8500_current_to_regval(ich); + curr_index = ab8500_current_to_regval(di, ich); step_udelay = STEP_UDELAY; if (!di->ac.charger_connected) no_stepping = true; break; case AB8500_USBCH_IPT_CRNTLVL_REG: - shift_value = VBUS_IN_CURR_LIM_SHIFT; + if (is_ab8540(di->parent)) + shift_value = AB8540_VBUS_IN_CURR_LIM_SHIFT; + else + shift_value = VBUS_IN_CURR_LIM_SHIFT; prev_curr_index = (reg_value >> shift_value); - curr_index = ab8500_vbus_in_curr_to_regval(ich); + curr_index = ab8500_vbus_in_curr_to_regval(di, ich); step_udelay = STEP_UDELAY * 100; - ret = abx500_get_register_interruptible(di->dev, AB8500_CHARGER, - AB8500_CH_USBCH_STAT2_REG, ®_value); - if (ret < 0) { - dev_err(di->dev, "%s read failed\n", __func__); - goto exit_set_current; - } - auto_curr_index = - reg_value >> AUTO_VBUS_IN_CURR_LIM_SHIFT; - - dev_dbg(di->dev, "%s Auto VBUS curr is %d mA\n", - __func__, - ab8500_charger_vbus_in_curr_map[auto_curr_index]); - - prev_curr_index = min(prev_curr_index, auto_curr_index); - if (!di->usb.charger_connected) no_stepping = true; break; case AB8500_CH_OPT_CRNTLVL_REG: shift_value = 0; prev_curr_index = (reg_value >> shift_value); - curr_index = ab8500_current_to_regval(ich); + curr_index = ab8500_current_to_regval(di, ich); step_udelay = STEP_UDELAY; if (curr_index && (curr_index - prev_curr_index) > 1) step_udelay *= 100; @@ -1459,8 +1390,8 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger, /* Check if the requested voltage or current is valid */ volt_index = ab8500_voltage_to_regval(vset); - curr_index = ab8500_current_to_regval(iset); - input_curr_index = ab8500_current_to_regval( + curr_index = ab8500_current_to_regval(di, iset); + input_curr_index = ab8500_current_to_regval(di, di->bm->chg_params->ac_curr_max); if (volt_index < 0 || curr_index < 0 || input_curr_index < 0) { dev_err(di->dev, @@ -1631,7 +1562,7 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger, /* Check if the requested voltage or current is valid */ volt_index = ab8500_voltage_to_regval(vset); - curr_index = ab8500_current_to_regval(ich_out); + curr_index = ab8500_current_to_regval(di, ich_out); if (volt_index < 0 || curr_index < 0) { dev_err(di->dev, "Charger voltage or current too high, " @@ -2396,18 +2327,21 @@ static void ab8500_charger_usb_link_status_work(struct work_struct *work) else dev_dbg(di->dev, "Error reading USB link status\n"); - if (is_ab9540(di->parent) || is_ab8505(di->parent)) - link_status = AB8505_USB_LINK_STATUS; - else + if (is_ab8500(di->parent)) link_status = AB8500_USB_LINK_STATUS; + else + link_status = AB8505_USB_LINK_STATUS; if (detected_chargers & USB_PW_CONN) { - if (((val & link_status) >> 3) == USB_STAT_NOT_VALID_LINK && + if (((val & link_status) >> USB_LINK_STATUS_SHIFT) == + USB_STAT_NOT_VALID_LINK && di->invalid_charger_detect_state == 0) { - dev_dbg(di->dev, "Invalid charger detected, state= 0\n"); + dev_dbg(di->dev, + "Invalid charger detected, state= 0\n"); /*Enable charger*/ abx500_mask_and_set_register_interruptible(di->dev, - AB8500_CHARGER, AB8500_USBCH_CTRL1_REG, 0x01, 0x01); + AB8500_CHARGER, AB8500_USBCH_CTRL1_REG, + USB_CH_ENA, USB_CH_ENA); /*Enable charger detection*/ abx500_mask_and_set_register_interruptible(di->dev, AB8500_USB, AB8500_MCH_IPT_CURLVL_REG, 0x01, 0x01); @@ -2417,15 +2351,17 @@ static void ab8500_charger_usb_link_status_work(struct work_struct *work) } if (di->invalid_charger_detect_state == 1) { - dev_dbg(di->dev, "Invalid charger detected, state= 1\n"); + dev_dbg(di->dev, + "Invalid charger detected, state= 1\n"); /*Stop charger detection*/ abx500_mask_and_set_register_interruptible(di->dev, AB8500_USB, AB8500_MCH_IPT_CURLVL_REG, 0x01, 0x00); /*Check link status*/ - ret = abx500_get_register_interruptible(di->dev, AB8500_USB, + ret = abx500_get_register_interruptible(di->dev, + AB8500_USB, AB8500_USB_LINE_STAT_REG, &val); dev_dbg(di->dev, "USB link status= 0x%02x\n", - (val & link_status) >> 3); + (val & link_status) >> USB_LINK_STATUS_SHIFT); di->invalid_charger_detect_state = 2; } } else { @@ -2741,7 +2677,7 @@ static void ab8500_charger_vbus_drop_end_work(struct work_struct *work) { struct ab8500_charger *di = container_of(work, struct ab8500_charger, vbus_drop_end_work.work); - int ret; + int ret, curr; u8 reg_value; di->flags.vbus_drop_end = false; @@ -2749,32 +2685,41 @@ static void ab8500_charger_vbus_drop_end_work(struct work_struct *work) /* Reset the drop counter */ abx500_set_register_interruptible(di->dev, AB8500_CHARGER, AB8500_CHARGER_CTRL, 0x01); - ret = abx500_get_register_interruptible(di->dev, AB8500_CHARGER, - AB8500_CH_USBCH_STAT2_REG, - ®_value); + + if (is_ab8540(di->parent)) + ret = abx500_get_register_interruptible(di->dev, AB8500_CHARGER, + AB8540_CH_USBCH_STAT3_REG, ®_value); + else + ret = abx500_get_register_interruptible(di->dev, AB8500_CHARGER, + AB8500_CH_USBCH_STAT2_REG, ®_value); if (ret < 0) { - dev_err(di->dev, "%s ab8500 read failed\n", __func__); - } else { - int curr = ab8500_charger_vbus_in_curr_map[ + dev_err(di->dev, "%s read failed\n", __func__); + return; + } + + if (is_ab8540(di->parent)) + curr = di->bm->chg_input_curr[ + reg_value & AB8540_AUTO_VBUS_IN_CURR_MASK]; + else + curr = di->bm->chg_input_curr[ reg_value >> AUTO_VBUS_IN_CURR_LIM_SHIFT]; - if (di->max_usb_in_curr.calculated_max != curr) { - /* USB source is collapsing */ - di->max_usb_in_curr.calculated_max = curr; - dev_dbg(di->dev, - "VBUS input current limiting to %d mA\n", - di->max_usb_in_curr.calculated_max); - } else { - /* - * USB source can not give more than this amount. - * Taking more will collapse the source. - */ - di->max_usb_in_curr.set_max = - di->max_usb_in_curr.calculated_max; - dev_dbg(di->dev, - "VBUS input current limited to %d mA\n", - di->max_usb_in_curr.set_max); - return; - } + + if (di->max_usb_in_curr.calculated_max != curr) { + /* USB source is collapsing */ + di->max_usb_in_curr.calculated_max = curr; + dev_dbg(di->dev, + "VBUS input current limiting to %d mA\n", + di->max_usb_in_curr.calculated_max); + } else { + /* + * USB source can not give more than this amount. + * Taking more will collapse the source. + */ + di->max_usb_in_curr.set_max = + di->max_usb_in_curr.calculated_max; + dev_dbg(di->dev, + "VBUS input current limited to %d mA\n", + di->max_usb_in_curr.set_max); } if (di->usb.charger_connected) @@ -3134,9 +3079,14 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) goto out; } - ret = abx500_set_register_interruptible(di->dev, - AB8500_CHARGER, - AB8500_CH_OPT_CRNTLVL_MAX_REG, CH_OP_CUR_LVL_1P6); + if (is_ab8540(di->parent)) + ret = abx500_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8500_CH_OPT_CRNTLVL_MAX_REG, + CH_OP_CUR_LVL_2P); + else + ret = abx500_set_register_interruptible(di->dev, + AB8500_CHARGER, AB8500_CH_OPT_CRNTLVL_MAX_REG, + CH_OP_CUR_LVL_1P6); if (ret) { dev_err(di->dev, "failed to set CH_OPT_CRNTLVL_MAX_REG\n"); @@ -3144,7 +3094,8 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) } } - if (is_ab9540_2p0(di->parent) || is_ab8505_2p0(di->parent)) + if (is_ab9540_2p0(di->parent) || is_ab9540_3p0(di->parent) + || is_ab8505_2p0(di->parent) || is_ab8540(di->parent)) ret = abx500_mask_and_set_register_interruptible(di->dev, AB8500_CHARGER, AB8500_USBCH_CTRL2_REG, @@ -3250,7 +3201,8 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) AB8500_RTC_CTRL1_REG, bup_vch_range | vbup33_vrtcn); if (ret) { - dev_err(di->dev, "failed to setup backup battery charging\n"); + dev_err(di->dev, + "failed to setup backup battery charging\n"); goto out; } } @@ -3267,14 +3219,16 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) AB8500_CHARGER, AB8540_USB_PP_MODE_REG, BUS_VSYS_VOL_SELECT_MASK, BUS_VSYS_VOL_SELECT_3P6V); if (ret) { - dev_err(di->dev, "failed to setup usb power path vsys voltage\n"); + dev_err(di->dev, + "failed to setup usb power path vsys voltage\n"); goto out; } ret = abx500_mask_and_set_register_interruptible(di->dev, AB8500_CHARGER, AB8540_USB_PP_CHR_REG, BUS_PP_PRECHG_CURRENT_MASK, 0); if (ret) { - dev_err(di->dev, "failed to setup usb power path prechage current\n"); + dev_err(di->dev, + "failed to setup usb power path prechage current\n"); goto out; } } @@ -3537,8 +3491,8 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->ac_chg.ops.update_curr = &ab8500_charger_update_charger_current; di->ac_chg.max_out_volt = ab8500_charger_voltage_map[ ARRAY_SIZE(ab8500_charger_voltage_map) - 1]; - di->ac_chg.max_out_curr = ab8500_charger_current_map[ - ARRAY_SIZE(ab8500_charger_current_map) - 1]; + di->ac_chg.max_out_curr = + di->bm->chg_output_curr[di->bm->n_chg_out_curr - 1]; di->ac_chg.wdt_refresh = CHG_WD_INTERVAL; di->ac_chg.enabled = di->bm->ac_enabled; di->ac_chg.external = false; @@ -3566,8 +3520,8 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->usb_chg.ops.pre_chg_enable = &ab8540_charger_usb_pre_chg_enable; di->usb_chg.max_out_volt = ab8500_charger_voltage_map[ ARRAY_SIZE(ab8500_charger_voltage_map) - 1]; - di->usb_chg.max_out_curr = ab8500_charger_current_map[ - ARRAY_SIZE(ab8500_charger_current_map) - 1]; + di->usb_chg.max_out_curr = + di->bm->chg_output_curr[di->bm->n_chg_out_curr - 1]; di->usb_chg.wdt_refresh = CHG_WD_INTERVAL; di->usb_chg.enabled = di->bm->usb_enabled; di->usb_chg.external = false; diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index cd71d8eadf50..33b0253569a3 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -246,7 +246,11 @@ struct abx500_bm_charger_parameters { * @interval_not_charging charge alg cycle period time when not charging (sec) * @temp_hysteresis temperature hysteresis * @gnd_lift_resistance Battery ground to phone ground resistance (mOhm) - * @maxi: maximization parameters + * @n_chg_out_curr number of elements in array chg_output_curr + * @n_chg_in_curr number of elements in array chg_input_curr + * @chg_output_curr charger output current level map + * @chg_input_curr charger input current level map + * @maxi maximization parameters * @cap_levels capacity in percent for the different capacity levels * @bat_type table of supported battery types * @chg_params charger parameters @@ -281,6 +285,10 @@ struct abx500_bm_data { int interval_not_charging; int temp_hysteresis; int gnd_lift_resistance; + int n_chg_out_curr; + int n_chg_in_curr; + int *chg_output_curr; + int *chg_input_curr; const struct abx500_maxim_parameters *maxi; const struct abx500_bm_capacity_levels *cap_levels; struct abx500_battery_type *bat_type; diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h index 0ebf0c5d1f88..ee1c1626c886 100644 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ b/include/linux/mfd/abx500/ab8500-bm.h @@ -33,7 +33,7 @@ #define AB8500_CH_STATUS2_REG 0x01 #define AB8500_CH_USBCH_STAT1_REG 0x02 #define AB8500_CH_USBCH_STAT2_REG 0x03 -#define AB8500_CH_FSM_STAT_REG 0x04 +#define AB8540_CH_USBCH_STAT3_REG 0x04 #define AB8500_CH_STAT_REG 0x05 /* @@ -157,6 +157,7 @@ #define CH_OP_CUR_LVL_1P4 0x0D #define CH_OP_CUR_LVL_1P5 0x0E #define CH_OP_CUR_LVL_1P6 0x0F +#define CH_OP_CUR_LVL_2P 0x3F /* BTEMP High thermal limits */ #define BTEMP_HIGH_TH_57_0 0x00 @@ -246,6 +247,8 @@ enum bup_vch_sel { #define BAT_CTRL_20U_ENA 0x02 #define BAT_CTRL_18U_ENA 0x01 #define BAT_CTRL_16U_ENA 0x02 +#define BAT_CTRL_60U_ENA 0x01 +#define BAT_CTRL_120U_ENA 0x02 #define BAT_CTRL_CMP_ENA 0x04 #define FORCE_BAT_CTRL_CMP_HIGH 0x08 #define BAT_CTRL_PULL_UP_ENA 0x10 -- cgit From b3ea5f451e4e435b650e34142f8552002dc21297 Mon Sep 17 00:00:00 2001 From: Marcus Cooper Date: Wed, 29 Aug 2012 17:56:19 +0200 Subject: ab8500-charger: Add UsbLineCtrl2 reference When the state of USB Charge detection is changed then the calls use a define for another register in other bank. This change creates a new define for the correct register and removes the magic numbers that are present. Signed-off-by: Marcus Cooper Signed-off-by: Lee Jones Reviewed-by: Hakan BERG Reviewed-by: Jonas ABERG --- drivers/power/ab8500_charger.c | 11 +++++++---- include/linux/mfd/abx500/ab8500-bm.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index bf8b479914cd..64accb235d2c 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -54,6 +54,7 @@ #define VBUS_DET_DBNC1 0x01 #define OTP_ENABLE_WD 0x01 #define DROP_COUNT_RESET 0x01 +#define USB_CH_DET 0x01 #define MAIN_CH_INPUT_CURR_SHIFT 4 #define VBUS_IN_CURR_LIM_SHIFT 4 @@ -2348,8 +2349,9 @@ static void ab8500_charger_usb_link_status_work(struct work_struct *work) AB8500_CHARGER, AB8500_USBCH_CTRL1_REG, USB_CH_ENA, USB_CH_ENA); /*Enable charger detection*/ - abx500_mask_and_set_register_interruptible(di->dev, AB8500_USB, - AB8500_MCH_IPT_CURLVL_REG, 0x01, 0x01); + abx500_mask_and_set_register_interruptible(di->dev, + AB8500_USB, AB8500_USB_LINE_CTRL2_REG, + USB_CH_DET, USB_CH_DET); di->invalid_charger_detect_state = 1; /*exit and wait for new link status interrupt.*/ return; @@ -2359,8 +2361,9 @@ static void ab8500_charger_usb_link_status_work(struct work_struct *work) dev_dbg(di->dev, "Invalid charger detected, state= 1\n"); /*Stop charger detection*/ - abx500_mask_and_set_register_interruptible(di->dev, AB8500_USB, - AB8500_MCH_IPT_CURLVL_REG, 0x01, 0x00); + abx500_mask_and_set_register_interruptible(di->dev, + AB8500_USB, AB8500_USB_LINE_CTRL2_REG, + USB_CH_DET, 0x00); /*Check link status*/ if (is_ab8500(di->parent)) ret = abx500_get_register_interruptible(di->dev, diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h index ee1c1626c886..f5214dc651f9 100644 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ b/include/linux/mfd/abx500/ab8500-bm.h @@ -23,6 +23,7 @@ * Bank : 0x5 */ #define AB8500_USB_LINE_STAT_REG 0x80 +#define AB8500_USB_LINE_CTRL2_REG 0x82 #define AB8500_USB_LINK1_STAT_REG 0x94 /* -- cgit From f4095a0f06476e5914f2c58b4e96258b2e2ba6b7 Mon Sep 17 00:00:00 2001 From: M BenZoubeir Date: Thu, 13 Sep 2012 10:34:18 +0200 Subject: pm2301-charger: Adjust interrupt handler behavior Signed-off-by: M BenZoubeir Signed-off-by: Lee Jones Reviewed-by: Philippe LANGLAIS --- drivers/power/pm2301_charger.c | 45 ++++++++++++++++++++++-------------------- include/linux/pm2301_charger.h | 2 +- 2 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c index b8eafc3850d0..eed8a89ba4f0 100644 --- a/drivers/power/pm2301_charger.c +++ b/drivers/power/pm2301_charger.c @@ -493,14 +493,16 @@ static irqreturn_t pm2xxx_irq_int(int irq, void *data) struct pm2xxx_interrupts *interrupt = pm2->pm2_int; int i; - for (i = 0; i < PM2XXX_NUM_INT_REG; i++) { - pm2xxx_reg_read(pm2, + do { + for (i = 0; i < PM2XXX_NUM_INT_REG; i++) { + pm2xxx_reg_read(pm2, pm2xxx_interrupt_registers[i], &(interrupt->reg[i])); - if (interrupt->reg[i] > 0) - interrupt->handler[i](pm2, interrupt->reg[i]); - } + if (interrupt->reg[i] > 0) + interrupt->handler[i](pm2, interrupt->reg[i]); + } + } while (gpio_get_value(pm2->pdata->gpio_irq_number) == 0); return IRQ_HANDLED; } @@ -951,6 +953,7 @@ static int pm2xxx_wall_charger_probe(struct i2c_client *i2c_client, struct pm2xxx_charger *pm2; int ret = 0; u8 val; + int i; pm2 = kzalloc(sizeof(struct pm2xxx_charger), GFP_KERNEL); if (!pm2) { @@ -1062,24 +1065,25 @@ static int pm2xxx_wall_charger_probe(struct i2c_client *i2c_client, } /* Register interrupts */ - ret = request_threaded_irq(pm2->pdata->irq_number, NULL, + ret = request_threaded_irq(gpio_to_irq(pm2->pdata->gpio_irq_number), + NULL, pm2xxx_charger_irq[0].isr, pm2->pdata->irq_type, pm2xxx_charger_irq[0].name, pm2); if (ret != 0) { dev_err(pm2->dev, "failed to request %s IRQ %d: %d\n", - pm2xxx_charger_irq[0].name, pm2->pdata->irq_number, ret); + pm2xxx_charger_irq[0].name, + gpio_to_irq(pm2->pdata->gpio_irq_number), ret); goto unregister_pm2xxx_charger; } /* pm interrupt can wake up system */ - ret = enable_irq_wake(pm2->pdata->irq_number); + ret = enable_irq_wake(gpio_to_irq(pm2->pdata->gpio_irq_number)); if (ret) { dev_err(pm2->dev, "failed to set irq wake\n"); goto unregister_pm2xxx_interrupt; } - /*Initialize lock*/ mutex_init(&pm2->lock); /* @@ -1099,16 +1103,16 @@ static int pm2xxx_wall_charger_probe(struct i2c_client *i2c_client, } set_lpn_pin(pm2); + + /* read interrupt registers */ + for (i = 0; i < PM2XXX_NUM_INT_REG; i++) + pm2xxx_reg_read(pm2, + pm2xxx_interrupt_registers[i], + &val); + ret = pm2xxx_charger_detection(pm2, &val); if ((ret == 0) && val) { - /* - * When boot is due to AC charger plug-in, - * read interrupt registers - */ - pm2xxx_reg_read(pm2, PM2XXX_REG_INT1, &val); - pm2xxx_reg_read(pm2, PM2XXX_REG_INT2, &val); - pm2xxx_reg_read(pm2, PM2XXX_REG_INT4, &val); pm2->ac.charger_connected = 1; ab8500_override_turn_on_stat(~AB8500_POW_KEY_1_ON, AB8500_MAIN_CH_DET); @@ -1122,10 +1126,10 @@ static int pm2xxx_wall_charger_probe(struct i2c_client *i2c_client, free_gpio: gpio_free(pm2->lpn_pin); disable_pm2_irq_wake: - disable_irq_wake(pm2->pdata->irq_number); + disable_irq_wake(gpio_to_irq(pm2->pdata->gpio_irq_number)); unregister_pm2xxx_interrupt: /* disable interrupt */ - free_irq(pm2->pdata->irq_number, pm2); + free_irq(gpio_to_irq(pm2->pdata->gpio_irq_number), pm2); unregister_pm2xxx_charger: /* unregister power supply */ power_supply_unregister(&pm2->ac_chg.psy); @@ -1148,10 +1152,10 @@ static int pm2xxx_wall_charger_remove(struct i2c_client *i2c_client) pm2xxx_charger_ac_en(&pm2->ac_chg, false, 0, 0); /* Disable wake by pm interrupt */ - disable_irq_wake(pm2->pdata->irq_number); + disable_irq_wake(gpio_to_irq(pm2->pdata->gpio_irq_number)); /* Disable interrupts */ - free_irq(pm2->pdata->irq_number, pm2); + free_irq(gpio_to_irq(pm2->pdata->gpio_irq_number), pm2); /* Delete the work queue */ destroy_workqueue(pm2->charger_wq); @@ -1163,7 +1167,6 @@ static int pm2xxx_wall_charger_remove(struct i2c_client *i2c_client) power_supply_unregister(&pm2->ac_chg.psy); - /*Free GPIO60*/ gpio_free(pm2->lpn_pin); kfree(pm2); diff --git a/include/linux/pm2301_charger.h b/include/linux/pm2301_charger.h index fc3f026922ae..85c16defe11a 100644 --- a/include/linux/pm2301_charger.h +++ b/include/linux/pm2301_charger.h @@ -48,7 +48,7 @@ struct pm2xxx_charger_platform_data { size_t num_supplicants; int i2c_bus; const char *label; - int irq_number; + int gpio_irq_number; unsigned int lpn_gpio; int irq_type; }; -- cgit From 9684819b5a29e62acd8265a92d8f3454de9bb71e Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 27 Feb 2013 16:38:17 +0100 Subject: HID: ll_driver: Extend the interface with idle requests Some drivers send the idle command directly to underlying device, creating an unwanted dependency on the underlying transport layer. This patch adds hid_hw_idle() to the interface, thereby removing usbhid from the lion share of the drivers. Signed-off-by: Benjamin Tissoires Reviewed-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 15 +++++++++++++++ include/linux/hid.h | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 420466bc481a..effcd3d6f5cf 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -1253,6 +1253,20 @@ static void usbhid_request(struct hid_device *hid, struct hid_report *rep, int r } } +static int usbhid_idle(struct hid_device *hid, int report, int idle, + int reqtype) +{ + struct usb_device *dev = hid_to_usb_dev(hid); + struct usb_interface *intf = to_usb_interface(hid->dev.parent); + struct usb_host_interface *interface = intf->cur_altsetting; + int ifnum = interface->desc.bInterfaceNumber; + + if (reqtype != HID_REQ_SET_IDLE) + return -EINVAL; + + return hid_set_idle(dev, ifnum, report, idle); +} + static struct hid_ll_driver usb_hid_driver = { .parse = usbhid_parse, .start = usbhid_start, @@ -1263,6 +1277,7 @@ static struct hid_ll_driver usb_hid_driver = { .hidinput_input_event = usb_hidinput_input_event, .request = usbhid_request, .wait = usbhid_wait_io, + .idle = usbhid_idle, }; static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id) diff --git a/include/linux/hid.h b/include/linux/hid.h index 7071eb3d36c7..863744c38ddc 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -664,6 +664,7 @@ struct hid_driver { * shouldn't allocate anything to not leak memory * @request: send report request to device (e.g. feature report) * @wait: wait for buffered io to complete (send/recv reports) + * @idle: send idle request to device */ struct hid_ll_driver { int (*start)(struct hid_device *hdev); @@ -683,6 +684,7 @@ struct hid_ll_driver { struct hid_report *report, int reqtype); int (*wait)(struct hid_device *hdev); + int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype); }; @@ -906,6 +908,23 @@ static inline void hid_hw_request(struct hid_device *hdev, hdev->ll_driver->request(hdev, report, reqtype); } +/** + * hid_hw_idle - send idle request to device + * + * @hdev: hid device + * @report: report to control + * @idle: idle state + * @reqtype: hid request type + */ +static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle, + int reqtype) +{ + if (hdev->ll_driver->idle) + return hdev->ll_driver->idle(hdev, report, idle, reqtype); + + return 0; +} + /** * hid_hw_wait - wait for buffered io to complete * -- cgit From d2348fb6fdc6d671ad45b62db237f76c8c115603 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 2 Mar 2013 11:10:11 +0100 Subject: tick: Dynamically set broadcast irq affinity When a cpu goes to a deep idle state where its local timer is shutdown, it notifies the time frame work to use the broadcast timer instead. Unfortunately, the broadcast device could wake up any CPU, including an idle one which is not concerned by the wake up at all. So in the worst case an idle CPU will wake up to send an IPI to the CPU whose timer expired. Provide an opt-in feature CLOCK_EVT_FEAT_DYNIRQ which tells the core that is should set the interrupt affinity of the broadcast interrupt to the cpu which has the earliest expiry time. This avoids unnecessary spurious wakeups and IPIs. [ tglx: Adopted to cpumask rework, silenced an uninitialized warning, massaged changelog ] Signed-off-by: Daniel Lezcano Cc: viresh.kumar@linaro.org Cc: jacob.jun.pan@linux.intel.com Cc: linux-arm-kernel@lists.infradead.org Cc: santosh.shilimkar@ti.com Cc: linaro-kernel@lists.linaro.org Cc: patches@linaro.org Cc: rickard.andersson@stericsson.com Cc: vincent.guittot@linaro.org Cc: linus.walleij@stericsson.com Cc: john.stultz@linaro.org Link: http://lkml.kernel.org/r/1362219013-18173-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 5 +++++ kernel/time/tick-broadcast.c | 39 +++++++++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 66346521cb65..494d33ea78f8 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -55,6 +55,11 @@ enum clock_event_nofitiers { #define CLOCK_EVT_FEAT_C3STOP 0x000008 #define CLOCK_EVT_FEAT_DUMMY 0x000010 +/* + * Core shall set the interrupt affinity dynamically in broadcast mode + */ +#define CLOCK_EVT_FEAT_DYNIRQ 0x000020 + /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 70dd98ce18d7..380910db7157 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -401,13 +401,34 @@ struct cpumask *tick_get_broadcast_oneshot_mask(void) return tick_broadcast_oneshot_mask; } -static int tick_broadcast_set_event(struct clock_event_device *bc, +/* + * Set broadcast interrupt affinity + */ +static void tick_broadcast_set_affinity(struct clock_event_device *bc, + const struct cpumask *cpumask) +{ + if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) + return; + + if (cpumask_equal(bc->cpumask, cpumask)) + return; + + bc->cpumask = cpumask; + irq_set_affinity(bc->irq, bc->cpumask); +} + +static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, ktime_t expires, int force) { + int ret; + if (bc->mode != CLOCK_EVT_MODE_ONESHOT) clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); - return clockevents_program_event(bc, expires, force); + ret = clockevents_program_event(bc, expires, force); + if (!ret) + tick_broadcast_set_affinity(bc, cpumask_of(cpu)); + return ret; } int tick_resume_broadcast_oneshot(struct clock_event_device *bc) @@ -436,7 +457,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) { struct tick_device *td; ktime_t now, next_event; - int cpu; + int cpu, next_cpu = 0; raw_spin_lock(&tick_broadcast_lock); again: @@ -447,10 +468,12 @@ again: /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { td = &per_cpu(tick_cpu_device, cpu); - if (td->evtdev->next_event.tv64 <= now.tv64) + if (td->evtdev->next_event.tv64 <= now.tv64) { cpumask_set_cpu(cpu, tmpmask); - else if (td->evtdev->next_event.tv64 < next_event.tv64) + } else if (td->evtdev->next_event.tv64 < next_event.tv64) { next_event.tv64 = td->evtdev->next_event.tv64; + next_cpu = cpu; + } } /* @@ -473,7 +496,7 @@ again: * Rearm the broadcast device. If event expired, * repeat the above */ - if (tick_broadcast_set_event(dev, next_event, 0)) + if (tick_broadcast_set_event(dev, next_cpu, next_event, 0)) goto again; } raw_spin_unlock(&tick_broadcast_lock); @@ -515,7 +538,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); if (dev->next_event.tv64 < bc->next_event.tv64) - tick_broadcast_set_event(bc, dev->next_event, 1); + tick_broadcast_set_event(bc, cpu, dev->next_event, 1); } } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { @@ -581,7 +604,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); tick_broadcast_init_next_event(tmpmask, tick_next_period); - tick_broadcast_set_event(bc, tick_next_period, 1); + tick_broadcast_set_event(bc, cpu, tick_next_period, 1); } else bc->next_event.tv64 = KTIME_MAX; } else { -- cgit From 56dd9470d7c8734f055da2a6bac553caf4a468eb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 24 Feb 2013 00:23:25 +0100 Subject: context_tracking: Move exception handling to generic code Exceptions handling on context tracking should share common treatment: on entry we exit user mode if the exception triggered in that context. Then on exception exit we return to that previous context. Generalize this to avoid duplication across archs. Signed-off-by: Frederic Weisbecker Cc: Li Zhong Cc: Kevin Hilman Cc: Mats Liljegren Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Steven Rostedt Cc: Namhyung Kim Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney --- arch/x86/include/asm/context_tracking.h | 21 --------------------- arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/traps.c | 3 +-- arch/x86/mm/fault.c | 2 +- include/linux/context_tracking.h | 17 ++++++++++++++++- 5 files changed, 19 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h index 1616562683e9..1fe49704b146 100644 --- a/arch/x86/include/asm/context_tracking.h +++ b/arch/x86/include/asm/context_tracking.h @@ -1,31 +1,10 @@ #ifndef _ASM_X86_CONTEXT_TRACKING_H #define _ASM_X86_CONTEXT_TRACKING_H -#ifndef __ASSEMBLY__ -#include -#include - -static inline void exception_enter(struct pt_regs *regs) -{ - user_exit(); -} - -static inline void exception_exit(struct pt_regs *regs) -{ -#ifdef CONFIG_CONTEXT_TRACKING - if (user_mode(regs)) - user_enter(); -#endif -} - -#else /* __ASSEMBLY__ */ - #ifdef CONFIG_CONTEXT_TRACKING # define SCHEDULE_USER call schedule_user #else # define SCHEDULE_USER call schedule #endif -#endif /* !__ASSEMBLY__ */ - #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b686a904d7c3..e8bb0d61ecdc 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -20,6 +20,7 @@ * Authors: Anthony Liguori */ +#include #include #include #include @@ -43,7 +44,6 @@ #include #include #include -#include static int kvmapf = 1; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 68bda7a84159..ecc4ccbdd0cf 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -55,8 +56,6 @@ #include #include #include -#include - #include #ifdef CONFIG_X86_64 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2b97525246d4..f946e6ce3315 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -13,12 +13,12 @@ #include /* perf_sw_event */ #include /* hstate_index_to_shift */ #include /* prefetchw */ +#include /* exception_enter(), ... */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ #include /* kmemcheck_*(), ... */ #include /* VSYSCALL_START */ -#include /* exception_enter(), ... */ /* * Page fault error code bits: diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index b28d161c1091..5a69273e93e6 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -1,10 +1,11 @@ #ifndef _LINUX_CONTEXT_TRACKING_H #define _LINUX_CONTEXT_TRACKING_H -#ifdef CONFIG_CONTEXT_TRACKING #include #include +#include +#ifdef CONFIG_CONTEXT_TRACKING struct context_tracking { /* * When active is false, probes are unset in order @@ -33,12 +34,26 @@ static inline bool context_tracking_active(void) extern void user_enter(void); extern void user_exit(void); + +static inline void exception_enter(struct pt_regs *regs) +{ + user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ + if (user_mode(regs)) + user_enter(); +} + extern void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next); #else static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } +static inline void exception_enter(struct pt_regs *regs) { } +static inline void exception_exit(struct pt_regs *regs) { } static inline void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { } #endif /* !CONFIG_CONTEXT_TRACKING */ -- cgit From 6c1e0256fad84a843d915414e4b5973b7443d48d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 24 Feb 2013 01:19:14 +0100 Subject: context_tracking: Restore correct previous context state on exception exit On exception exit, we restore the previous context tracking state based on the regs of the interrupted frame. Iff that frame is in user mode as stated by user_mode() helper, we restore the context tracking user mode. However there is a tiny chunck of low level arch code after we pass through user_enter() and until the CPU eventually resumes userspace. If an exception happens in this tiny area, exception_enter() correctly exits the context tracking user mode but exception_exit() won't restore it because of the value returned by user_mode(regs). As a result we may return to userspace with the wrong context tracking state. To fix this, change exception_enter() to return the context tracking state prior to its call and pass this saved state to exception_exit(). This restores the real context tracking state of the interrupted frame. (May be this patch was suggested to me, I don't recall exactly. If so, sorry for the missing credit). Signed-off-by: Frederic Weisbecker Cc: Li Zhong Cc: Kevin Hilman Cc: Mats Liljegren Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Steven Rostedt Cc: Namhyung Kim Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney --- arch/x86/kernel/kvm.c | 6 ++-- arch/x86/kernel/traps.c | 65 +++++++++++++++++++++++++--------------- arch/x86/mm/fault.c | 6 ++-- include/linux/context_tracking.h | 19 +++++++----- 4 files changed, 61 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e8bb0d61ecdc..cd6d9a5a42f6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -254,16 +254,18 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); dotraplinkage void __kprobes do_async_page_fault(struct pt_regs *regs, unsigned long error_code) { + enum ctx_state prev_state; + switch (kvm_read_and_reset_pf_reason()) { default: do_page_fault(regs, error_code); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ - exception_enter(regs); + prev_state = exception_enter(); exit_idle(); kvm_async_pf_task_wait((u32)read_cr2()); - exception_exit(regs); + exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ecc4ccbdd0cf..ff6d2271cbe2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -175,34 +175,38 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - exception_enter(regs); \ + enum ctx_state prev_state; \ + \ + prev_state = exception_enter(); \ if (notify_die(DIE_TRAP, str, regs, error_code, \ trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(regs); \ + exception_exit(prev_state); \ return; \ } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ - exception_exit(regs); \ + exception_exit(prev_state); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ + enum ctx_state prev_state; \ + \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - exception_enter(regs); \ + prev_state = exception_enter(); \ if (notify_die(DIE_TRAP, str, regs, error_code, \ trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(regs); \ + exception_exit(prev_state); \ return; \ } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ - exception_exit(regs); \ + exception_exit(prev_state); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, @@ -225,14 +229,16 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { preempt_conditional_sti(regs); do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); preempt_conditional_cli(regs); } - exception_exit(regs); + exception_exit(prev_state); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -240,7 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; - exception_enter(regs); + exception_enter(); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -260,8 +266,9 @@ dotraplinkage void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; + enum ctx_state prev_state; - exception_enter(regs); + prev_state = exception_enter(); conditional_sti(regs); #ifdef CONFIG_X86_32 @@ -299,12 +306,14 @@ do_general_protection(struct pt_regs *regs, long error_code) force_sig(SIGSEGV, tsk); exit: - exception_exit(regs); + exception_exit(prev_state); } /* May run on IST stack. */ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) { + enum ctx_state prev_state; + #ifdef CONFIG_DYNAMIC_FTRACE /* * ftrace must be first, everything else may cause a recursive crash. @@ -314,7 +323,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co ftrace_int3_handler(regs)) return; #endif - exception_enter(regs); + prev_state = exception_enter(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -335,7 +344,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: - exception_exit(regs); + exception_exit(prev_state); } #ifdef CONFIG_X86_64 @@ -392,11 +401,12 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; + enum ctx_state prev_state; int user_icebp = 0; unsigned long dr6; int si_code; - exception_enter(regs); + prev_state = exception_enter(); get_debugreg(dr6, 6); @@ -466,7 +476,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - exception_exit(regs); + exception_exit(prev_state); } /* @@ -560,17 +570,21 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); math_error(regs, error_code, X86_TRAP_MF); - exception_exit(regs); + exception_exit(prev_state); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); math_error(regs, error_code, X86_TRAP_XF); - exception_exit(regs); + exception_exit(prev_state); } dotraplinkage void @@ -638,7 +652,9 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION @@ -649,7 +665,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); - exception_exit(regs); + exception_exit(prev_state); return; } #endif @@ -657,15 +673,16 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif - exception_exit(regs); + exception_exit(prev_state); } #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; + enum ctx_state prev_state; - exception_enter(regs); + prev_state = exception_enter(); local_irq_enable(); info.si_signo = SIGILL; @@ -677,7 +694,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, &info); } - exception_exit(regs); + exception_exit(prev_state); } #endif diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f946e6ce3315..fa8c02de0d25 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1222,7 +1222,9 @@ good_area: dotraplinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); __do_page_fault(regs, error_code); - exception_exit(regs); + exception_exit(prev_state); } diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 5a69273e93e6..365f4a61bf04 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -5,7 +5,6 @@ #include #include -#ifdef CONFIG_CONTEXT_TRACKING struct context_tracking { /* * When active is false, probes are unset in order @@ -14,12 +13,13 @@ struct context_tracking { * may be further optimized using static keys. */ bool active; - enum { + enum ctx_state { IN_KERNEL = 0, IN_USER, } state; }; +#ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); static inline bool context_tracking_in_user(void) @@ -35,14 +35,19 @@ static inline bool context_tracking_active(void) extern void user_enter(void); extern void user_exit(void); -static inline void exception_enter(struct pt_regs *regs) +static inline enum ctx_state exception_enter(void) { + enum ctx_state prev_ctx; + + prev_ctx = this_cpu_read(context_tracking.state); user_exit(); + + return prev_ctx; } -static inline void exception_exit(struct pt_regs *regs) +static inline void exception_exit(enum ctx_state prev_ctx) { - if (user_mode(regs)) + if (prev_ctx == IN_USER) user_enter(); } @@ -52,8 +57,8 @@ extern void context_tracking_task_switch(struct task_struct *prev, static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } -static inline void exception_enter(struct pt_regs *regs) { } -static inline void exception_exit(struct pt_regs *regs) { } +static inline enum ctx_state exception_enter(void) { return 0; } +static inline void exception_exit(enum ctx_state prev_ctx) { } static inline void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { } #endif /* !CONFIG_CONTEXT_TRACKING */ -- cgit From 9fbc42eac1f6917081dc3b39922b2f1c57fdff28 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 25 Feb 2013 17:25:39 +0100 Subject: cputime: Dynamically scale cputime for full dynticks accounting The full dynticks cputime accounting is able to account either using the tick or the context tracking subsystem. This way the housekeeping CPU can keep the low overhead tick based solution. This latter mode has a low jiffies resolution granularity and need to be scaled against CFS precise runtime accounting to improve its result. We are doing this for CONFIG_TICK_CPU_ACCOUNTING, now we also need to expand it to full dynticks accounting dynamic off-case as well. Signed-off-by: Frederic Weisbecker Cc: Li Zhong Cc: Kevin Hilman Cc: Mats Liljegren Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Steven Rostedt Cc: Namhyung Kim Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney --- include/linux/sched.h | 4 +- kernel/fork.c | 2 +- kernel/sched/cputime.c | 154 +++++++++++++++++++++++++------------------------ 3 files changed, 83 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..8d1b6034d80b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -570,7 +570,7 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct cputime prev_cputime; #endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -1327,7 +1327,7 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct cputime prev_cputime; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/fork.c b/kernel/fork.c index 8d932b1c9056..f3146ed49074 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1230,7 +1230,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ed12cbb135f4..024fe1998ad5 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -388,82 +388,10 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_ struct rq *rq) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -/* - * Account a single tick of cpu time. - * @p: the process that the cpu time gets accounted to - * @user_tick: indicates if the tick is a user or a system tick - */ -void account_process_tick(struct task_struct *p, int user_tick) -{ - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); - struct rq *rq = this_rq(); - - if (vtime_accounting_enabled()) - return; - - if (sched_clock_irqtime) { - irqtime_account_process_tick(p, user_tick, rq); - return; - } - - if (steal_account_process_tick()) - return; - - if (user_tick) - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); - else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) - account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, - one_jiffy_scaled); - else - account_idle_time(cputime_one_jiffy); -} - -/* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - -/* - * Account multiple ticks of idle time. - * @ticks: number of stolen ticks - */ -void account_idle_ticks(unsigned long ticks) -{ - - if (sched_clock_irqtime) { - irqtime_account_idle_ticks(ticks); - return; - } - - account_idle_time(jiffies_to_cputime(ticks)); -} -#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - /* * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING -void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - *ut = p->utime; - *st = p->stime; -} - -void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - struct task_cputime cputime; - - thread_group_cputime(p, &cputime); - - *ut = cputime.utime; - *st = cputime.stime; -} #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) @@ -518,8 +446,80 @@ void vtime_account_irq_enter(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + *ut = p->utime; + *st = p->stime; +} -#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + + *ut = cputime.utime; + *st = cputime.stime; +} +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +/* + * Account a single tick of cpu time. + * @p: the process that the cpu time gets accounted to + * @user_tick: indicates if the tick is a user or a system tick + */ +void account_process_tick(struct task_struct *p, int user_tick) +{ + cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + struct rq *rq = this_rq(); + + if (vtime_accounting_enabled()) + return; + + if (sched_clock_irqtime) { + irqtime_account_process_tick(p, user_tick, rq); + return; + } + + if (steal_account_process_tick()) + return; + + if (user_tick) + account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) + account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, + one_jiffy_scaled); + else + account_idle_time(cputime_one_jiffy); +} + +/* + * Account multiple ticks of steal time. + * @p: the process from which the cpu time has been stolen + * @ticks: number of stolen ticks + */ +void account_steal_ticks(unsigned long ticks) +{ + account_steal_time(jiffies_to_cputime(ticks)); +} + +/* + * Account multiple ticks of idle time. + * @ticks: number of stolen ticks + */ +void account_idle_ticks(unsigned long ticks) +{ + + if (sched_clock_irqtime) { + irqtime_account_idle_ticks(ticks); + return; + } + + account_idle_time(jiffies_to_cputime(ticks)); +} static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total) { @@ -545,6 +545,12 @@ static void cputime_adjust(struct task_cputime *curr, { cputime_t rtime, stime, total; + if (vtime_accounting_enabled()) { + *ut = curr->utime; + *st = curr->stime; + return; + } + stime = curr->stime; total = stime + curr->utime; @@ -597,7 +603,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } -#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static unsigned long long vtime_delta(struct task_struct *tsk) -- cgit From 090096bf3db1c281ddd034573260045888a68fea Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 6 Mar 2013 15:39:42 +0000 Subject: net: generic fdb support for drivers without ndo_fdb_ If the driver does not support the ndo_op use the generic handler for it. This should work in the majority of cases. Eventually the fdb_dflt_add call gets translated into a __dev_set_rx_mode() call which should handle hardware support for filtering via the IFF_UNICAST_FLT flag. Namely IFF_UNICAST_FLT indicates if the hardware can do unicast address filtering. If no support is available the device is put into promisc mode. Signed-off-by: Vlad Yasevich Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 9 ++++++ net/core/rtnetlink.c | 81 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 84 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 489dd7bb28ec..f28544b2f9af 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -69,6 +69,15 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, int idx); +extern int ndo_dflt_fdb_add(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags); +extern int ndo_dflt_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b376410ff259..f95b6fbc29e9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2048,6 +2048,38 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } +/** + * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry + */ +int ndo_dflt_fdb_add(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags) +{ + int err = -EINVAL; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return err; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_add); + static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -2100,10 +2132,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { - err = dev->netdev_ops->ndo_fdb_add(ndm, tb, - dev, addr, - nlh->nlmsg_flags); + if ((ndm->ndm_flags & NTF_SELF)) { + if (dev->netdev_ops->ndo_fdb_add) + err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); + else + err = ndo_dflt_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); if (!err) { rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); @@ -2114,6 +2149,35 @@ out: return err; } +/** + * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry + */ +int ndo_dflt_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr) +{ + int err = -EOPNOTSUPP; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state & NUD_PERMANENT) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_del); + static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -2171,8 +2235,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + if (ndm->ndm_flags & NTF_SELF) { + if (dev->netdev_ops->ndo_fdb_del) + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + else + err = ndo_dflt_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2257,6 +2324,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) if (dev->netdev_ops->ndo_fdb_dump) idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); + else + ndo_dflt_fdb_dump(skb, cb, dev, idx); } rcu_read_unlock(); -- cgit From c031e234ee304b507b79f76a7677ea0a7a8890e8 Mon Sep 17 00:00:00 2001 From: Andy King Date: Thu, 7 Mar 2013 05:26:13 +0000 Subject: VSOCK: Split vm_sockets.h into kernel/uapi Split the vSockets header into kernel and UAPI parts. The former gets the bits that used to be in __KERNEL__ guards, while the latter gets everything that is user-visible. Tested by compiling vsock (+transport) and a simple user-mode vSockets application. Reported-by: David Howells Acked-by: Dmitry Torokhov Signed-off-by: Andy King Acked-by: David Howells Signed-off-by: David S. Miller --- include/linux/vm_sockets.h | 23 +++++++++++++++++++++++ include/uapi/linux/vm_sockets.h | 23 ++++++++--------------- 2 files changed, 31 insertions(+), 15 deletions(-) create mode 100644 include/linux/vm_sockets.h (limited to 'include/linux') diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h new file mode 100644 index 000000000000..0805eecba8f7 --- /dev/null +++ b/include/linux/vm_sockets.h @@ -0,0 +1,23 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VM_SOCKETS_H +#define _VM_SOCKETS_H + +#include + +int vm_sockets_get_local_cid(void); + +#endif /* _VM_SOCKETS_H */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index df91301847ec..b4ed5d895699 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -13,12 +13,10 @@ * more details. */ -#ifndef _VM_SOCKETS_H_ -#define _VM_SOCKETS_H_ +#ifndef _UAPI_VM_SOCKETS_H +#define _UAPI_VM_SOCKETS_H -#if !defined(__KERNEL__) -#include -#endif +#include /* Option name for STREAM socket buffer size. Use as the option name in * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that @@ -137,14 +135,13 @@ #define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) /* Address structure for vSockets. The address family should be set to - * whatever vmci_sock_get_af_value_fd() returns. The structure members should - * all align on their natural boundaries without resorting to compiler packing - * directives. The total size of this structure should be exactly the same as - * that of struct sockaddr. + * AF_VSOCK. The structure members should all align on their natural + * boundaries without resorting to compiler packing directives. The total size + * of this structure should be exactly the same as that of struct sockaddr. */ struct sockaddr_vm { - sa_family_t svm_family; + __kernel_sa_family_t svm_family; unsigned short svm_reserved1; unsigned int svm_port; unsigned int svm_cid; @@ -156,8 +153,4 @@ struct sockaddr_vm { #define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) -#if defined(__KERNEL__) -int vm_sockets_get_local_cid(void); -#endif - -#endif +#endif /* _UAPI_VM_SOCKETS_H */ -- cgit From ec5f061564238892005257c83565a0b58ec79295 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 7 Mar 2013 09:28:01 +0000 Subject: net: Kill link between CSUM and SG features. Earlier SG was unset if CSUM was not available for given device to force skb copy to avoid sending inconsistent csum. Commit c9af6db4c11c (net: Fix possible wrong checksum generation) added explicit flag to force copy to fix this issue. Therefore there is no need to link SG and CSUM, following patch kills this link between there two features. This patch is also required following patch in series. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 ++++++++++ net/core/dev.c | 63 +++++++++++++++++++++++++---------------------- net/core/skbuff.c | 13 ++++++++++ net/ipv4/af_inet.c | 3 --- net/ipv6/ip6_offload.c | 3 --- 5 files changed, 59 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 896eb4985f97..e1ebeffa6b35 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2683,6 +2683,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } +__be16 skb_network_protocol(struct sk_buff *skb); + +static inline bool can_checksum_protocol(netdev_features_t features, + __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_V4_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_V6_CSUM) && + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); +} #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 96103894ad69..bb999931729f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2208,16 +2208,8 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/** - * skb_mac_gso_segment - mac layer segmentation handler. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features) +__be16 skb_network_protocol(struct sk_buff *skb) { - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; __be16 type = skb->protocol; while (type == htons(ETH_P_8021Q)) { @@ -2225,13 +2217,31 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) - return ERR_PTR(-EINVAL); + return 0; vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } + return type; +} + +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + __be16 type = skb_network_protocol(skb); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + __skb_pull(skb, skb->mac_len); rcu_read_lock(); @@ -2398,24 +2408,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; - features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -4921,20 +4919,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } - /* Fix illegal SG+CSUM combinations. */ - if ((features & NETIF_F_SG) && - !(features & NETIF_F_ALL_CSUM)) { - netdev_dbg(dev, - "Dropping NETIF_F_SG since no checksum feature.\n"); - features &= ~NETIF_F_SG; - } - /* TSO requires that SG is present as well. */ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); features &= ~NETIF_F_ALL_TSO; } + if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IP_CSUM)) { + netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO; + features &= ~NETIF_F_TSO_ECN; + } + + if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IPV6_CSUM)) { + netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO6; + } + /* TSO ECN requires that TSO is present as well. */ if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) features &= ~NETIF_F_TSO_ECN; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 33245ef54c3b..0a48ae20c903 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2741,12 +2741,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; + __be16 proto; + bool csum; int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; + proto = skb_network_protocol(skb); + if (unlikely(!proto)) + return ERR_PTR(-EINVAL); + + csum = !!can_checksum_protocol(features, proto); __skb_push(skb, doffset); headroom = skb_headroom(skb); pos = skb_headlen(skb); @@ -2884,6 +2891,12 @@ skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; + + if (!csum) { + nskb->csum = skb_checksum(nskb, doffset, + nskb->len - doffset, 0); + nskb->ip_summed = CHECKSUM_NONE; + } } while ((offset += len) < skb->len); return segs; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 68f6a94f7661..dc3f677360a5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1284,9 +1284,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int id; unsigned int offset = 0; - if (!(features & NETIF_F_V4_CSUM)) - features &= ~NETIF_F_SG; - if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | SKB_GSO_UDP | diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 8234c1dcdf72..7a0d25a5479c 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -92,9 +92,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u8 *prevhdr; int offset = 0; - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | -- cgit From aefbd2b3c2a9c657605e4337f9919d6c6273e8e6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 7 Mar 2013 13:21:46 +0000 Subject: tunneling: Capture inner mac header during encapsulation. This patch adds inner mac header. This will be used in next patch to find tunner header length. Header len is required to copy tunnel header to each gso segment. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 34 ++++++++++++++++++++++++++++++++++ net/core/skbuff.c | 2 ++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 821c7f45d2a7..d7f96ff68f77 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -387,6 +387,7 @@ typedef unsigned char *sk_buff_data_t; * @vlan_tci: vlan tag control information * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) + * @inner_mac_header: Link layer header (encapsulation) * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header @@ -505,6 +506,7 @@ struct sk_buff { sk_buff_data_t inner_transport_header; sk_buff_data_t inner_network_header; + sk_buff_data_t inner_mac_header; sk_buff_data_t transport_header; sk_buff_data_t network_header; sk_buff_data_t mac_header; @@ -1466,6 +1468,7 @@ static inline void skb_reserve(struct sk_buff *skb, int len) static inline void skb_reset_inner_headers(struct sk_buff *skb) { + skb->inner_mac_header = skb->mac_header; skb->inner_network_header = skb->network_header; skb->inner_transport_header = skb->transport_header; } @@ -1511,6 +1514,22 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) +{ + return skb->head + skb->inner_mac_header; +} + +static inline void skb_reset_inner_mac_header(struct sk_buff *skb) +{ + skb->inner_mac_header = skb->data - skb->head; +} + +static inline void skb_set_inner_mac_header(struct sk_buff *skb, + const int offset) +{ + skb_reset_inner_mac_header(skb); + skb->inner_mac_header += offset; +} static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != ~0U; @@ -1604,6 +1623,21 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header = skb->data + offset; } +static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) +{ + return skb->inner_mac_header; +} + +static inline void skb_reset_inner_mac_header(struct sk_buff *skb) +{ + skb->inner_mac_header = skb->data; +} + +static inline void skb_set_inner_mac_header(struct sk_buff *skb, + const int offset) +{ + skb->inner_mac_header = skb->data + offset; +} static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != NULL; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0278c7f787bf..31c6737d3189 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -673,6 +673,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; new->inner_transport_header = old->inner_transport_header; new->inner_network_header = old->inner_network_header; + new->inner_mac_header = old->inner_mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; @@ -876,6 +877,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) skb->mac_header += off; skb->inner_transport_header += off; skb->inner_network_header += off; + skb->inner_mac_header += off; } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -- cgit From 731362674580cb0c696cd1b1a03d8461a10cf90a Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 7 Mar 2013 13:21:51 +0000 Subject: tunneling: Add generic Tunnel segmentation. Adds generic tunneling offloading support for IPv4-UDP based tunnels. GSO type is added to request this offload for a skb. netdev feature NETIF_F_UDP_TUNNEL is added for hardware offloaded udp-tunnel support. Currently no device supports this feature, software offload is used. This can be used by tunneling protocols like VXLAN. CC: Jesse Gross Signed-off-by: Pravin B Shelar Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 7 +-- include/linux/skbuff.h | 2 + net/core/ethtool.c | 1 + net/ipv4/af_inet.c | 6 ++- net/ipv4/tcp.c | 1 + net/ipv4/udp.c | 115 +++++++++++++++++++++++++++++++--------- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 8 ++- 8 files changed, 111 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 3dd39340430e..f5e797c0c2a4 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -42,9 +42,9 @@ enum { NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ - /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ - NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ - = NETIF_F_GSO_LAST, + NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ + /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ + NETIF_F_GSO_UDP_TUNNEL_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ @@ -103,6 +103,7 @@ enum { #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) +#define NETIF_F_UDP_TUNNEL __NETIF_F(UDP_TUNNEL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d7f96ff68f77..eb2106fe3bb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -316,6 +316,8 @@ enum { SKB_GSO_FCOE = 1 << 5, SKB_GSO_GRE = 1 << 6, + + SKB_GSO_UDP_TUNNEL = 1 << 7, }; #if BITS_PER_LONG > 32 diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3e9b2c3e30f0..adc1351e6873 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -78,6 +78,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dc3f677360a5..9e5882caf8a7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1283,6 +1283,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int ihl; int id; unsigned int offset = 0; + bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -1290,6 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0))) goto out; @@ -1304,6 +1306,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, ihl))) goto out; + tunnel = !!skb->encapsulation; + __skb_pull(skb, ihl); skb_reset_transport_header(skb); iph = ip_hdr(skb); @@ -1323,7 +1327,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = ip_hdr(skb); - if (proto == IPPROTO_UDP) { + if (!tunnel && proto == IPPROTO_UDP) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 47e854fcae24..8d14573ade77 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3044,6 +3044,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 265c42cf963c..41760e043bf5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2272,31 +2272,88 @@ void __init udp_init(void) int udp4_ufo_send_check(struct sk_buff *skb) { - const struct iphdr *iph; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) + if (!pskb_may_pull(skb, sizeof(struct udphdr))) return -EINVAL; - iph = ip_hdr(skb); - uh = udp_hdr(skb); + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } return 0; } +static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + int mac_len = skb->mac_len; + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + int outer_hlen; + netdev_features_t enc_features; + + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) + goto out; + + skb->encapsulation = 0; + __skb_pull(skb, tnl_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + outer_hlen = skb_tnl_header_len(skb); + skb = segs; + do { + struct udphdr *uh; + int udp_offset = outer_hlen - tnl_hlen; + + skb->mac_len = mac_len; + + skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, udp_offset); + uh = udp_hdr(skb); + uh->len = htons(skb->len - udp_offset); + + /* csum segment if tunnel sets skb with csum. */ + if (unlikely(uh->check)) { + struct iphdr *iph = ip_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + skb->len - udp_offset, + IPPROTO_UDP, 0); + uh->check = csum_fold(skb_checksum(skb, udp_offset, + skb->len - udp_offset, 0)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + } + skb->ip_summed = CHECKSUM_NONE; + } while ((skb = skb->next)); +out: + return segs; +} + struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; - int offset; - __wsum csum; - mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) goto out; @@ -2306,6 +2363,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int type = skb_shinfo(skb)->gso_type; if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; @@ -2316,20 +2374,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - segs = skb_segment(skb, features); + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } out: return segs; } - diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 7a0d25a5479c..71b766ee821d 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -97,6 +97,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index cf05cf073c51..3bb3a891a424 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb) const struct ipv6hdr *ipv6h; struct udphdr *uh; + /* UDP Tunnel offload on ipv6 is not yet supported. */ + if (skb->encapsulation) + return -EINVAL; + if (!pskb_may_pull(skb, sizeof(*uh))) return -EINVAL; @@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + if (unlikely(type & ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; -- cgit From 6150f3bc0b4f94f0eea3e32b4e7462025e4bd972 Mon Sep 17 00:00:00 2001 From: Nicolas Royer Date: Wed, 20 Feb 2013 17:10:23 +0100 Subject: ARM: AT91SAM9G45: same platform data structure for all crypto peripherals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only AES use DMA in AT91SAM9G45 (TDES and SHA use PDC). However latest Atmel TDES and SHA IP releases use DMA instead of PDC. --> Atmel TDES and SHA drivers need DMA platform data for those IP releases. Goal of this patch is to use the same platform data structure for all Atmel crypto peripherals. This structure contains information about DMA interface. Signed-off-by: Nicolas Royer Acked-by: Nicolas Ferre Acked-by: Eric Bénard Tested-by: Eric Bénard Signed-off-by: Herbert Xu --- arch/arm/mach-at91/at91sam9g45_devices.c | 14 ++++++-------- include/linux/platform_data/atmel-aes.h | 22 ---------------------- include/linux/platform_data/crypto-atmel.h | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 30 deletions(-) delete mode 100644 include/linux/platform_data/atmel-aes.h create mode 100644 include/linux/platform_data/crypto-atmel.h (limited to 'include/linux') diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 827c9f2a70fb..f0bf68268ca2 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include @@ -1900,7 +1900,8 @@ static void __init at91_add_device_tdes(void) {} * -------------------------------------------------------------------- */ #if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE) -static struct aes_platform_data aes_data; +static struct crypto_platform_data aes_data; +static struct crypto_dma_data alt_atslave; static u64 aes_dmamask = DMA_BIT_MASK(32); static struct resource aes_resources[] = { @@ -1931,23 +1932,20 @@ static struct platform_device at91sam9g45_aes_device = { static void __init at91_add_device_aes(void) { struct at_dma_slave *atslave; - struct aes_dma_data *alt_atslave; - - alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL); /* DMA TX slave channel configuration */ - atslave = &alt_atslave->txdata; + atslave = &alt_atslave.txdata; atslave->dma_dev = &at_hdmac_device.dev; atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW | ATC_SRC_PER(AT_DMA_ID_AES_RX); /* DMA RX slave channel configuration */ - atslave = &alt_atslave->rxdata; + atslave = &alt_atslave.rxdata; atslave->dma_dev = &at_hdmac_device.dev; atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW | ATC_DST_PER(AT_DMA_ID_AES_TX); - aes_data.dma_slave = alt_atslave; + aes_data.dma_slave = &alt_atslave; platform_device_register(&at91sam9g45_aes_device); } #else diff --git a/include/linux/platform_data/atmel-aes.h b/include/linux/platform_data/atmel-aes.h deleted file mode 100644 index ab68082fbcb0..000000000000 --- a/include/linux/platform_data/atmel-aes.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __LINUX_ATMEL_AES_H -#define __LINUX_ATMEL_AES_H - -#include - -/** - * struct aes_dma_data - DMA data for AES - */ -struct aes_dma_data { - struct at_dma_slave txdata; - struct at_dma_slave rxdata; -}; - -/** - * struct aes_platform_data - board-specific AES configuration - * @dma_slave: DMA slave interface to use in data transfers. - */ -struct aes_platform_data { - struct aes_dma_data *dma_slave; -}; - -#endif /* __LINUX_ATMEL_AES_H */ diff --git a/include/linux/platform_data/crypto-atmel.h b/include/linux/platform_data/crypto-atmel.h new file mode 100644 index 000000000000..b46e0d9062a0 --- /dev/null +++ b/include/linux/platform_data/crypto-atmel.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_CRYPTO_ATMEL_H +#define __LINUX_CRYPTO_ATMEL_H + +#include + +/** + * struct crypto_dma_data - DMA data for AES/TDES/SHA + */ +struct crypto_dma_data { + struct at_dma_slave txdata; + struct at_dma_slave rxdata; +}; + +/** + * struct crypto_platform_data - board-specific AES/TDES/SHA configuration + * @dma_slave: DMA slave interface to use in data transfers. + */ +struct crypto_platform_data { + struct crypto_dma_data *dma_slave; +}; + +#endif /* __LINUX_CRYPTO_ATMEL_H */ -- cgit From e61667af2f77d481411f2ccd307fed2247d785a8 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sun, 10 Mar 2013 05:18:39 +0000 Subject: tcp: Remove unused tw_cookie_values from tcp_timewait_sock tw_cookie_values is never used in the TCP-stack. It was added by 435cf559f (TCPCT part 1d: define TCP cookie option, extend existing struct's), but already at that time it was not used at all, nor mentioned in the commit-message. Signed-off-by: Christoph Paasch Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f28408c07dc2..515c3746b675 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -361,10 +361,6 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif - /* Few sockets in timewait have cookies; in that case, then this - * object holds a reference to them (tw_cookie_values->kref). - */ - struct tcp_cookie_values *tw_cookie_values; }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) -- cgit From 26fd76cab2e61cedc5c25f7151fb31b57ddc53c7 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 22 Feb 2013 10:53:25 +0100 Subject: NFC: llcp: Implement socket options Some LLCP services (e.g. the validation ones) require some control over the LLCP link parameters like the receive window (RW) or the MIU extension (MIUX). This can only be done through socket options. Signed-off-by: Samuel Ortiz --- include/linux/socket.h | 1 + include/uapi/linux/nfc.h | 4 ++ net/nfc/llcp/llcp.h | 3 ++ net/nfc/llcp/sock.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 125 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 2b9f74b0ffea..428c37a1f95c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -298,6 +298,7 @@ struct ucred { #define SOL_IUCV 277 #define SOL_CAIF 278 #define SOL_ALG 279 +#define SOL_NFC 280 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 7969f46f1bb3..855630fe731d 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -220,4 +220,8 @@ struct sockaddr_nfc_llcp { #define NFC_LLCP_DIRECTION_RX 0x00 #define NFC_LLCP_DIRECTION_TX 0x01 +/* socket option names */ +#define NFC_LLCP_RW 0 +#define NFC_LLCP_MIUX 1 + #endif /*__LINUX_NFC_H */ diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 32cec81939e6..5f117adac2e5 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -104,6 +104,9 @@ struct nfc_llcp_sock { u8 dsap; char *service_name; size_t service_name_len; + u8 rw; + u16 miux; + /* Remote link parameters */ u8 remote_rw; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index cc564992ba95..9357a756f7a9 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -223,6 +223,121 @@ error: return ret; } +static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + u32 opt; + int err = 0; + + pr_debug("%p optname %d\n", sk, optname); + + if (level != SOL_NFC) + return -ENOPROTOOPT; + + lock_sock(sk); + + switch (optname) { + case NFC_LLCP_RW: + if (sk->sk_state == LLCP_CONNECTED || + sk->sk_state == LLCP_BOUND || + sk->sk_state == LLCP_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > LLCP_MAX_RW) { + err = -EINVAL; + break; + } + + llcp_sock->rw = (u8) opt; + + break; + + case NFC_LLCP_MIUX: + if (sk->sk_state == LLCP_CONNECTED || + sk->sk_state == LLCP_BOUND || + sk->sk_state == LLCP_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > LLCP_MAX_MIUX) { + err = -EINVAL; + break; + } + + llcp_sock->miux = (u16) opt; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + + return err; +} + +static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + int len, err = 0; + + pr_debug("%p optname %d\n", sk, optname); + + if (level != SOL_NFC) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(u32, len, sizeof(u32)); + + lock_sock(sk); + + switch (optname) { + case NFC_LLCP_RW: + if (put_user(llcp_sock->rw, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case NFC_LLCP_MIUX: + if (put_user(llcp_sock->miux, (u32 __user *) optval)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + + if (put_user(len, optlen)) + return -EFAULT; + + return err; +} + void nfc_llcp_accept_unlink(struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -735,8 +850,8 @@ static const struct proto_ops llcp_sock_ops = { .ioctl = sock_no_ioctl, .listen = llcp_sock_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = nfc_llcp_setsockopt, + .getsockopt = nfc_llcp_getsockopt, .sendmsg = llcp_sock_sendmsg, .recvmsg = llcp_sock_recvmsg, .mmap = sock_no_mmap, -- cgit From 3a08a8f9f0936e182d387afd85fdc5d303381521 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Mon, 4 Mar 2013 23:32:07 +0530 Subject: kvm: Record the preemption status of vcpus using preempt notifiers Note that we mark as preempted only when vcpu's task state was Running during preemption. Thanks Jiannan, Avi for preemption notifier ideas. Thanks Gleb, PeterZ for their precious suggestions. Thanks Srikar for an idea on avoiding rcu lock while checking task state that improved overcommit numbers. Reviewed-by: Chegu Vinod Reviewed-by: Marcelo Tosatti Signed-off-by: Raghavendra K T Signed-off-by: Gleb Natapov --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9fa13ebc3381..0f4941a9c9c8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -253,6 +253,7 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + bool preempted; struct kvm_vcpu_arch arch; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index faf05bddd131..470f2bc8205a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); + vcpu->preempted = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -2880,6 +2881,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) + vcpu->preempted = false; kvm_arch_vcpu_load(vcpu, cpu); } @@ -2889,6 +2892,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (current->state == TASK_RUNNING) + vcpu->preempted = true; kvm_arch_vcpu_put(vcpu); } -- cgit From e0c25362384f4be9c755c98560cd4b1cdb2ec79c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 10 Mar 2013 21:52:53 -0500 Subject: clocksource: add empty version of clocksource_of_init Add an empty clocksource_of_init when !CLKSRC_OF. This is needed for builds where no timer has selected CLKSRC_OF. Signed-off-by: Rob Herring Cc: John Stultz Cc: Thomas Gleixner --- include/linux/clocksource.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 27cfda427dd9..08ed5e19d8c6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -340,6 +340,7 @@ extern void clocksource_of_init(void); __used __section(__clksrc_of_table) \ = { .compatible = compat, .data = fn }; #else +static inline void clocksource_of_init(void) {} #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) #endif -- cgit From 44f507163d9e51238458ee6904b4d71fb0723723 Mon Sep 17 00:00:00 2001 From: Vijay Mohan Pandarathil Date: Mon, 11 Mar 2013 09:28:44 -0600 Subject: VFIO: Wrapper for getting reference to vfio_device - Added vfio_device_get_from_dev() as wrapper to get reference to vfio_device from struct device. - Added vfio_device_data() as a wrapper to get device_data from vfio_device. Signed-off-by: Vijay Mohan Pandarathil Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 30 +++++++++++++++++++++++++++++- include/linux/vfio.h | 3 +++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index fcc12f3e60a3..21eddd9e0f26 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -392,12 +392,13 @@ static void vfio_device_release(struct kref *kref) } /* Device reference always implies a group reference */ -static void vfio_device_put(struct vfio_device *device) +void vfio_device_put(struct vfio_device *device) { struct vfio_group *group = device->group; kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); vfio_group_put(group); } +EXPORT_SYMBOL_GPL(vfio_device_put); static void vfio_device_get(struct vfio_device *device) { @@ -627,6 +628,33 @@ int vfio_add_group_dev(struct device *dev, } EXPORT_SYMBOL_GPL(vfio_add_group_dev); +/** + * Get a reference to the vfio_device for a device that is known to + * be bound to a vfio driver. The driver implicitly holds a + * vfio_device reference between vfio_add_group_dev and + * vfio_del_group_dev. We can therefore use drvdata to increment + * that reference from the struct device. This additional + * reference must be released by calling vfio_device_put. + */ +struct vfio_device *vfio_device_get_from_dev(struct device *dev) +{ + struct vfio_device *device = dev_get_drvdata(dev); + + vfio_device_get(device); + + return device; +} +EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); + +/* + * Caller must hold a reference to the vfio_device + */ +void *vfio_device_data(struct vfio_device *device) +{ + return device->device_data; +} +EXPORT_SYMBOL_GPL(vfio_device_data); + /* Given a referenced group, check if it contains the device */ static bool vfio_dev_present(struct vfio_group *group, struct device *dev) { diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ab9e86224c54..ac8d488e4372 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -45,6 +45,9 @@ extern int vfio_add_group_dev(struct device *dev, void *device_data); extern void *vfio_del_group_dev(struct device *dev); +extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); +extern void vfio_device_put(struct vfio_device *device); +extern void *vfio_device_data(struct vfio_device *device); /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks -- cgit From b818d1a7f72575eef17e00dc4085512c9cc8897d Mon Sep 17 00:00:00 2001 From: Hector Palacios Date: Sun, 10 Mar 2013 22:50:02 +0000 Subject: phy/micrel: Add support for KSZ8031 Micrel PHY KSZ8031 is similar to KSZ8021 and also requires the special initialization of "Operation Mode Strap Override" in reg 0x16 introduced in 212ea99 (phy/micrel: Implement support for KSZ8021). Signed-off-by: Hector Palacios Reviewed-by: Marek Vasut Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 14 ++++++++++++++ include/linux/micrel_phy.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index abf7b6153d00..018af1852fe1 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -191,6 +191,19 @@ static struct phy_driver ksphy_driver[] = { .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_KSZ8031, + .phy_id_mask = 0x00ffffff, + .name = "Micrel KSZ8031", + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = ksz8021_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8041, .phy_id_mask = 0x00fffff0, @@ -325,6 +338,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ8001, 0x00ffffff }, { PHY_ID_KS8737, 0x00fffff0 }, { PHY_ID_KSZ8021, 0x00ffffff }, + { PHY_ID_KSZ8031, 0x00ffffff }, { PHY_ID_KSZ8041, 0x00fffff0 }, { PHY_ID_KSZ8051, 0x00fffff0 }, { PHY_ID_KSZ8061, 0x00fffff0 }, diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 9dbb41a4e250..8752dbbc6135 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -19,6 +19,7 @@ #define PHY_ID_KSZ9021 0x00221610 #define PHY_ID_KS8737 0x00221720 #define PHY_ID_KSZ8021 0x00221555 +#define PHY_ID_KSZ8031 0x00221556 #define PHY_ID_KSZ8041 0x00221510 #define PHY_ID_KSZ8051 0x00221550 /* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */ -- cgit From 6ba8a3b19e764b6a65e4030ab0999be50c291e6c Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 11 Mar 2013 10:00:43 +0000 Subject: tcp: Tail loss probe (TLP) This patch series implement the Tail loss probe (TLP) algorithm described in http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01. The first patch implements the basic algorithm. TLP's goal is to reduce tail latency of short transactions. It achieves this by converting retransmission timeouts (RTOs) occuring due to tail losses (losses at end of transactions) into fast recovery. TLP transmits one packet in two round-trips when a connection is in Open state and isn't receiving any ACKs. The transmitted packet, aka loss probe, can be either new or a retransmission. When there is tail loss, the ACK from a loss probe triggers FACK/early-retransmit based fast recovery, thus avoiding a costly RTO. In the absence of loss, there is no change in the connection state. PTO stands for probe timeout. It is a timer event indicating that an ACK is overdue and triggers a loss probe packet. The PTO value is set to max(2*SRTT, 10ms) and is adjusted to account for delayed ACK timer when there is only one oustanding packet. TLP Algorithm On transmission of new data in Open state: -> packets_out > 1: schedule PTO in max(2*SRTT, 10ms). -> packets_out == 1: schedule PTO in max(2*RTT, 1.5*RTT + 200ms) -> PTO = min(PTO, RTO) Conditions for scheduling PTO: -> Connection is in Open state. -> Connection is either cwnd limited or no new data to send. -> Number of probes per tail loss episode is limited to one. -> Connection is SACK enabled. When PTO fires: new_segment_exists: -> transmit new segment. -> packets_out++. cwnd remains same. no_new_packet: -> retransmit the last segment. Its ACK triggers FACK or early retransmit based recovery. ACK path: -> rearm RTO at start of ACK processing. -> reschedule PTO if need be. In addition, the patch includes a small variation to the Early Retransmit (ER) algorithm, such that ER and TLP together can in principle recover any N-degree of tail loss through fast recovery. TLP is controlled by the same sysctl as ER, tcp_early_retrans sysctl. tcp_early_retrans==0; disables TLP and ER. ==1; enables RFC5827 ER. ==2; delayed ER. ==3; TLP and delayed ER. [DEFAULT] ==4; TLP only. The TLP patch series have been extensively tested on Google Web servers. It is most effective for short Web trasactions, where it reduced RTOs by 15% and improved HTTP response time (average by 6%, 99th percentile by 10%). The transmitted probes account for <0.5% of the overall transmissions. Signed-off-by: Nandita Dukkipati Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++- include/linux/tcp.h | 1 - include/net/inet_connection_sock.h | 5 +- include/net/tcp.h | 6 +- include/uapi/linux/snmp.h | 1 + net/ipv4/inet_diag.c | 4 +- net/ipv4/proc.c | 1 + net/ipv4/sysctl_net_ipv4.c | 4 +- net/ipv4/tcp_input.c | 24 ++++--- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/tcp_output.c | 128 +++++++++++++++++++++++++++++++-- net/ipv4/tcp_timer.c | 13 ++-- 12 files changed, 171 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index dc2dc87d2557..1cae6c383e1b 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -190,7 +190,9 @@ tcp_early_retrans - INTEGER Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold for triggering fast retransmit when the amount of outstanding data is small and when no previously unsent data can be transmitted (such - that limited transmit could be used). + that limited transmit could be used). Also controls the use of + Tail loss probe (TLP) that converts RTOs occuring due to tail + losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01). Possible values: 0 disables ER 1 enables ER @@ -198,7 +200,9 @@ tcp_early_retrans - INTEGER by a fourth of RTT. This mitigates connection falsely recovers when network has a small degree of reordering (less than 3 packets). - Default: 2 + 3 enables delayed ER and TLP. + 4 enables TLP only. + Default: 3 tcp_ecn - INTEGER Control use of Explicit Congestion Notification (ECN) by TCP. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 515c3746b675..01860d74555c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,7 +201,6 @@ struct tcp_sock { unused : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - early_retrans_delayed:1, /* Delayed ER timer installed */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 183292722f6e..de2c78529afa 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -133,6 +133,8 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ +#define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { @@ -222,7 +224,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, when = max_when; } - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || + what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); diff --git a/include/net/tcp.h b/include/net/tcp.h index a2baa5e4ba31..ab9f947b118b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -543,6 +543,8 @@ extern bool tcp_syn_flood_action(struct sock *sk, extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); +extern void tcp_send_loss_probe(struct sock *sk); +extern bool tcp_schedule_loss_probe(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); @@ -873,8 +875,8 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) static inline void tcp_enable_early_retrans(struct tcp_sock *tp) { tp->do_early_retrans = sysctl_tcp_early_retrans && - !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3; - tp->early_retrans_delayed = 0; + sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && + sysctl_tcp_reordering == 3; } static inline void tcp_disable_early_retrans(struct tcp_sock *tp) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b49eab89c9fd..290bed6b085f 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -202,6 +202,7 @@ enum LINUX_MIB_TCPFORWARDRETRANS, /* TCPForwardRetrans */ LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ + LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7afa2c3c788f..8620408af574 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 32030a24e776..4c35911d935f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -224,6 +224,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), + SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 960fd29d9b8e..cca4550f4082 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,7 +28,7 @@ static int zero; static int one = 1; -static int two = 2; +static int four = 4; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -760,7 +760,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, - .extra2 = &two, + .extra2 = &four, }, { .procname = "udp_mem", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99f..b794f89ac1f2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -98,7 +98,7 @@ int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 2; +int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -2150,15 +2150,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples * available, or RTO is scheduled to fire first. */ - if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) + if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || + (flag & FLAG_ECE) || !tp->srtt) return false; delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); - tp->early_retrans_delayed = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, + TCP_RTO_MAX); return true; } @@ -2321,7 +2322,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) * interval if appropriate. */ if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && + (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && !tcp_may_send_now(sk)) return !tcp_pause_early_retransmit(sk, flag); @@ -3081,6 +3082,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ void tcp_rearm_rto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* If the retrans timer is currently being used by Fast Open @@ -3094,12 +3096,13 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (tp->early_retrans_delayed) { + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); /* delta may not be positive if the socket is locked - * when the delayed ER timer fires and is rescheduled. + * when the retrans timer fires and is rescheduled. */ if (delta > 0) rto = delta; @@ -3107,7 +3110,6 @@ void tcp_rearm_rto(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); } - tp->early_retrans_delayed = 0; } /* This function is called when the delayed ER timer fires. TCP enters @@ -3601,7 +3603,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (tp->early_retrans_delayed) + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) @@ -3678,6 +3681,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (dst) dst_confirm(dst); } + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) + tcp_schedule_loss_probe(sk); return 1; no_queue: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8cdee120a50c..b7ab868c8284 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2703,7 +2703,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) __u16 srcp = ntohs(inet->inet_sport); int rx_queue; - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461074da..beb63dbc85f5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -74,6 +74,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; @@ -85,7 +86,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tp->frto_counter = 3; tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || tp->early_retrans_delayed) + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); } @@ -1959,6 +1961,9 @@ static int tcp_mtu_probe(struct sock *sk) * snd_up-64k-mss .. snd_up cannot be large. However, taking into * account rare use of URG, this is not a big flaw. * + * Send at most one packet when push_one > 0. Temporarily ignore + * cwnd limit to force at most one packet out when push_one == 2. + * Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. */ @@ -1994,8 +1999,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, goto repair; /* Skip network transmission */ cwnd_quota = tcp_cwnd_test(tp, skb); - if (!cwnd_quota) - break; + if (!cwnd_quota) { + if (push_one == 2) + /* Force out a loss probe pkt. */ + cwnd_quota = 1; + else + break; + } if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; @@ -2049,10 +2059,120 @@ repair: if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; + + /* Send one loss probe per tail loss episode. */ + if (push_one != 2) + tcp_schedule_loss_probe(sk); tcp_cwnd_validate(sk); return false; } - return !tp->packets_out && tcp_send_head(sk); + return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); +} + +bool tcp_schedule_loss_probe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 timeout, tlp_time_stamp, rto_time_stamp; + u32 rtt = tp->srtt >> 3; + + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) + return false; + /* No consecutive loss probes. */ + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { + tcp_rearm_rto(sk); + return false; + } + /* Don't do any loss probe on a Fast Open connection before 3WHS + * finishes. + */ + if (sk->sk_state == TCP_SYN_RECV) + return false; + + /* TLP is only scheduled when next timer event is RTO. */ + if (icsk->icsk_pending != ICSK_TIME_RETRANS) + return false; + + /* Schedule a loss probe in 2*RTT for SACK capable connections + * in Open state, that are either limited by cwnd or application. + */ + if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || + !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + return false; + + if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && + tcp_send_head(sk)) + return false; + + /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + * for delayed ack when there's one outstanding packet. + */ + timeout = rtt << 1; + if (tp->packets_out == 1) + timeout = max_t(u32, timeout, + (rtt + (rtt >> 1) + TCP_DELACK_MAX)); + timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + + /* If RTO is shorter, just schedule TLP in its place. */ + tlp_time_stamp = tcp_time_stamp + timeout; + rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; + if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { + s32 delta = rto_time_stamp - tcp_time_stamp; + if (delta > 0) + timeout = delta; + } + + inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, + TCP_RTO_MAX); + return true; +} + +/* When probe timeout (PTO) fires, send a new segment if one exists, else + * retransmit the last segment. + */ +void tcp_send_loss_probe(struct sock *sk) +{ + struct sk_buff *skb; + int pcount; + int mss = tcp_current_mss(sk); + int err = -1; + + if (tcp_send_head(sk) != NULL) { + err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); + goto rearm_timer; + } + + /* Retransmit last segment. */ + skb = tcp_write_queue_tail(sk); + if (WARN_ON(!skb)) + goto rearm_timer; + + pcount = tcp_skb_pcount(skb); + if (WARN_ON(!pcount)) + goto rearm_timer; + + if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { + if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) + goto rearm_timer; + skb = tcp_write_queue_tail(sk); + } + + if (WARN_ON(!skb || !tcp_skb_pcount(skb))) + goto rearm_timer; + + /* Probe with zero data doesn't trigger fast recovery. */ + if (skb->len > 0) + err = __tcp_retransmit_skb(sk, skb); + +rearm_timer: + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); + + if (likely(!err)) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBES); + return; } /* Push out any pending frames which were held back due to diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b78aac30c498..ecd61d54147f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - if (tp->early_retrans_delayed) { - tcp_resume_early_retransmit(sk); - return; - } if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); @@ -495,13 +491,20 @@ void tcp_write_timer_handler(struct sock *sk) } event = icsk->icsk_pending; - icsk->icsk_pending = 0; switch (event) { + case ICSK_TIME_EARLY_RETRANS: + tcp_resume_early_retransmit(sk); + break; + case ICSK_TIME_LOSS_PROBE: + tcp_send_loss_probe(sk); + break; case ICSK_TIME_RETRANS: + icsk->icsk_pending = 0; tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: + icsk->icsk_pending = 0; tcp_probe_timer(sk); break; } -- cgit From 9b717a8d245075ffb8e95a2dfb4ee97ce4747457 Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 11 Mar 2013 10:00:44 +0000 Subject: tcp: TLP loss detection. This is the second of the TLP patch series; it augments the basic TLP algorithm with a loss detection scheme. This patch implements a mechanism for loss detection when a Tail loss probe retransmission plugs a hole thereby masking packet loss from the sender. The loss detection algorithm relies on counting TLP dupacks as outlined in Sec. 3 of: http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01 The basic idea is: Sender keeps track of TLP "episode" upon retransmission of a TLP packet. An episode ends when the sender receives an ACK above the SND.NXT (tracked by tlp_high_seq) at the time of the episode. We want to make sure that before the episode ends the sender receives a "TLP dupack", indicating that the TLP retransmission was unnecessary, so there was no loss/hole that needed plugging. If the sender gets no TLP dupack before the end of the episode, then it reduces ssthresh and the congestion window, because the TLP packet arriving at the receiver probably plugged a hole. Signed-off-by: Nandita Dukkipati Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 39 +++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 9 +++++++++ net/ipv4/tcp_timer.c | 2 ++ 7 files changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 01860d74555c..763c108ee03d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -204,6 +204,7 @@ struct tcp_sock { syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 290bed6b085f..e00013a1debc 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -203,6 +203,7 @@ enum LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ + LINUX_MIB_TCPLOSSPROBERECOVERY, /* TCPLossProbeRecovery */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4c35911d935f..b6f2ea174898 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -225,6 +225,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), + SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b794f89ac1f2..836d74dd0187 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2682,6 +2682,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; + tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -3569,6 +3570,38 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +/* This routine deals with acks during a TLP episode. + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. + */ +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool is_tlp_dupack = (ack == tp->tlp_high_seq) && + !(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED)); + + /* Mark the end of TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. + */ + if (is_tlp_dupack) { + tp->tlp_high_seq = 0; + return; + } + + if (after(ack, tp->tlp_high_seq)) { + tp->tlp_high_seq = 0; + /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ + if (!(flag & FLAG_DSACKING_ACK)) { + tcp_init_cwnd_reduction(sk, true); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_Open); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3676,6 +3709,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_cong_avoid(sk, ack, prior_in_flight); } + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst) @@ -3697,6 +3733,9 @@ no_queue: */ if (tcp_send_head(sk)) tcp_ack_probe(sk); + + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); return 1; invalid_ack: diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b83a49cc3816..4bdb09fca401 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -440,6 +440,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_enable_early_retrans(newtp); + newtp->tlp_high_seq = 0; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index beb63dbc85f5..8e7742f0b5d2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2132,6 +2132,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) */ void tcp_send_loss_probe(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int pcount; int mss = tcp_current_mss(sk); @@ -2142,6 +2143,10 @@ void tcp_send_loss_probe(struct sock *sk) goto rearm_timer; } + /* At most one outstanding TLP retransmission. */ + if (tp->tlp_high_seq) + goto rearm_timer; + /* Retransmit last segment. */ skb = tcp_write_queue_tail(sk); if (WARN_ON(!skb)) @@ -2164,6 +2169,10 @@ void tcp_send_loss_probe(struct sock *sk) if (skb->len > 0) err = __tcp_retransmit_skb(sk, skb); + /* Record snd_nxt for loss detection. */ + if (likely(!err)) + tp->tlp_high_seq = tp->snd_nxt; + rearm_timer: inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ecd61d54147f..eeccf795e917 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -356,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk) WARN_ON(tcp_write_queue_empty(sk)); + tp->tlp_high_seq = 0; + if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits -- cgit From 42e836eb4527fb635cb799a701fe4c9fe741c03a Mon Sep 17 00:00:00 2001 From: Michael Stapelberg Date: Mon, 11 Mar 2013 13:56:44 +0000 Subject: phy: add set_wol/get_wol functions This allows ethernet drivers (such as the mv643xx_eth) to support Wake on LAN on platforms where PHY registers have to be configured for Wake on LAN (e.g. the Marvell Kirkwood based qnap TS-119P II). Signed-off-by: Michael Stapelberg Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 16 ++++++++++++++++ include/linux/phy.h | 10 ++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ef9ea9248223..298b4c201733 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1188,3 +1188,19 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data) return 0; } EXPORT_SYMBOL(phy_ethtool_set_eee); + +int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + if (phydev->drv->set_wol) + return phydev->drv->set_wol(phydev, wol); + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(phy_ethtool_set_wol); + +void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + if (phydev->drv->get_wol) + phydev->drv->get_wol(phydev, wol); +} +EXPORT_SYMBOL(phy_ethtool_get_wol); diff --git a/include/linux/phy.h b/include/linux/phy.h index 33999adbf8c8..9e11039dd7a3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -455,6 +455,14 @@ struct phy_driver { */ void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type); + /* Some devices (e.g. qnap TS-119P II) require PHY register changes to + * enable Wake on LAN, so set_wol is provided to be called in the + * ethernet driver's set_wol function. */ + int (*set_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); + + /* See set_wol, but for checking whether Wake on LAN is enabled. */ + void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) @@ -560,6 +568,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); int __init mdio_bus_init(void); void mdio_bus_exit(void); -- cgit From d84ff0512f1bfc0d8c864efadb4523fce68919cc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:29:59 -0700 Subject: workqueue: consistently use int for @cpu variables Workqueue is mixing unsigned int and int for @cpu variables. There's no point in using unsigned int for cpus - many of cpu related APIs take int anyway. Consistently use int for @cpu variables so that we can use negative values to mark special ones. This patch doesn't introduce any visible behavior changes. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 6 +++--- kernel/workqueue.c | 24 +++++++++++------------- kernel/workqueue_internal.h | 5 ++--- 3 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 5bd030f630a9..899be6636d20 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -435,7 +435,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); -extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq); +extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); /* @@ -466,12 +466,12 @@ static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwo } #ifndef CONFIG_SMP -static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } #else -long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg); +long work_on_cpu(int cpu, long (*fn)(void *), void *arg); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 26c67c76b6c5..73c5f68065b5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -124,7 +124,7 @@ enum { struct worker_pool { spinlock_t lock; /* the pool lock */ - unsigned int cpu; /* I: the associated cpu */ + int cpu; /* I: the associated cpu */ int id; /* I: pool ID */ unsigned int flags; /* X: flags */ @@ -467,8 +467,7 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri) return &pools[highpri]; } -static struct pool_workqueue *get_pwq(unsigned int cpu, - struct workqueue_struct *wq) +static struct pool_workqueue *get_pwq(int cpu, struct workqueue_struct *wq) { if (!(wq->flags & WQ_UNBOUND)) { if (likely(cpu < nr_cpu_ids)) @@ -730,7 +729,7 @@ static void wake_up_worker(struct worker_pool *pool) * CONTEXT: * spin_lock_irq(rq->lock) */ -void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) +void wq_worker_waking_up(struct task_struct *task, int cpu) { struct worker *worker = kthread_data(task); @@ -755,8 +754,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) * RETURNS: * Worker task on @cpu to wake up, %NULL if none. */ -struct task_struct *wq_worker_sleeping(struct task_struct *task, - unsigned int cpu) +struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu) { struct worker *worker = kthread_data(task), *to_wakeup = NULL; struct worker_pool *pool; @@ -1159,7 +1157,7 @@ static bool is_chained_work(struct workqueue_struct *wq) return worker && worker->current_pwq->wq == wq; } -static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, +static void __queue_work(int cpu, struct workqueue_struct *wq, struct work_struct *work) { struct pool_workqueue *pwq; @@ -1714,7 +1712,7 @@ static struct worker *create_worker(struct worker_pool *pool) if (pool->cpu != WORK_CPU_UNBOUND) worker->task = kthread_create_on_node(worker_thread, worker, cpu_to_node(pool->cpu), - "kworker/%u:%d%s", pool->cpu, id, pri); + "kworker/%d:%d%s", pool->cpu, id, pri); else worker->task = kthread_create(worker_thread, worker, "kworker/u:%d%s", id, pri); @@ -3345,7 +3343,7 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active); * RETURNS: * %true if congested, %false otherwise. */ -bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq) +bool workqueue_congested(int cpu, struct workqueue_struct *wq) { struct pool_workqueue *pwq = get_pwq(cpu, wq); @@ -3461,7 +3459,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned int cpu = (unsigned long)hcpu; + int cpu = (unsigned long)hcpu; struct worker_pool *pool; switch (action & ~CPU_TASKS_FROZEN) { @@ -3507,7 +3505,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned int cpu = (unsigned long)hcpu; + int cpu = (unsigned long)hcpu; struct work_struct unbind_work; switch (action & ~CPU_TASKS_FROZEN) { @@ -3547,7 +3545,7 @@ static void work_for_cpu_fn(struct work_struct *work) * It is up to the caller to ensure that the cpu doesn't go offline. * The caller must not hold any locks which would prevent @fn from completing. */ -long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { struct work_for_cpu wfc = { .fn = fn, .arg = arg }; @@ -3705,7 +3703,7 @@ out_unlock: static int __init init_workqueues(void) { - unsigned int cpu; + int cpu; /* make sure we have enough bits for OFFQ pool ID */ BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) < diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index f9c887731e2b..f116f071d919 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -59,8 +59,7 @@ static inline struct worker *current_wq_worker(void) * Scheduler hooks for concurrency managed workqueue. Only to be used from * sched.c and workqueue.c. */ -void wq_worker_waking_up(struct task_struct *task, unsigned int cpu); -struct task_struct *wq_worker_sleeping(struct task_struct *task, - unsigned int cpu); +void wq_worker_waking_up(struct task_struct *task, int cpu); +struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu); #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ -- cgit From 7a4e344c5675eefbde93ed9a98ef45e0e4957bc2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:00 -0700 Subject: workqueue: introduce workqueue_attrs Introduce struct workqueue_attrs which carries worker attributes - currently the nice level and allowed cpumask along with helper routines alloc_workqueue_attrs() and free_workqueue_attrs(). Each worker_pool now carries ->attrs describing the attributes of its workers. All functions dealing with cpumask and nice level of workers are updated to follow worker_pool->attrs instead of determining them from other characteristics of the worker_pool, and init_workqueues() is updated to set worker_pool->attrs appropriately for all standard pools. Note that create_worker() is updated to always perform set_user_nice() and use set_cpus_allowed_ptr() combined with manual assertion of PF_THREAD_BOUND instead of kthread_bind(). This simplifies handling random attributes without affecting the outcome. This patch doesn't introduce any behavior changes. v2: Missing cpumask_var_t definition caused build failure on some archs. linux/cpumask.h included. Signed-off-by: Tejun Heo Reported-by: kbuild test robot Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 13 ++++++ kernel/workqueue.c | 103 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 94 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 899be6636d20..00c1b9ba8252 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -11,6 +11,7 @@ #include #include #include +#include struct workqueue_struct; @@ -115,6 +116,15 @@ struct delayed_work { int cpu; }; +/* + * A struct for workqueue attributes. This can be used to change + * attributes of an unbound workqueue. + */ +struct workqueue_attrs { + int nice; /* nice level */ + cpumask_var_t cpumask; /* allowed CPUs */ +}; + static inline struct delayed_work *to_delayed_work(struct work_struct *work) { return container_of(work, struct delayed_work, work); @@ -399,6 +409,9 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, extern void destroy_workqueue(struct workqueue_struct *wq); +struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); +void free_workqueue_attrs(struct workqueue_attrs *attrs); + extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 094f16668e1b..b0d3cbb83f63 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -148,6 +148,8 @@ struct worker_pool { struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */ struct ida worker_ida; /* L: for worker IDs */ + struct workqueue_attrs *attrs; /* I: worker attributes */ + /* * The current concurrency level. As it's likely to be accessed * from other CPUs during try_to_wake_up(), put it in a separate @@ -1566,14 +1568,13 @@ __acquires(&pool->lock) * against POOL_DISASSOCIATED. */ if (!(pool->flags & POOL_DISASSOCIATED)) - set_cpus_allowed_ptr(current, get_cpu_mask(pool->cpu)); + set_cpus_allowed_ptr(current, pool->attrs->cpumask); spin_lock_irq(&pool->lock); if (pool->flags & POOL_DISASSOCIATED) return false; if (task_cpu(current) == pool->cpu && - cpumask_equal(¤t->cpus_allowed, - get_cpu_mask(pool->cpu))) + cpumask_equal(¤t->cpus_allowed, pool->attrs->cpumask)) return true; spin_unlock_irq(&pool->lock); @@ -1679,7 +1680,7 @@ static void rebind_workers(struct worker_pool *pool) * wq doesn't really matter but let's keep @worker->pool * and @pwq->pool consistent for sanity. */ - if (std_worker_pool_pri(worker->pool)) + if (worker->pool->attrs->nice < 0) wq = system_highpri_wq; else wq = system_wq; @@ -1721,7 +1722,7 @@ static struct worker *alloc_worker(void) */ static struct worker *create_worker(struct worker_pool *pool) { - const char *pri = std_worker_pool_pri(pool) ? "H" : ""; + const char *pri = pool->attrs->nice < 0 ? "H" : ""; struct worker *worker = NULL; int id = -1; @@ -1751,24 +1752,23 @@ static struct worker *create_worker(struct worker_pool *pool) if (IS_ERR(worker->task)) goto fail; - if (std_worker_pool_pri(pool)) - set_user_nice(worker->task, HIGHPRI_NICE_LEVEL); + set_user_nice(worker->task, pool->attrs->nice); + set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); /* - * Determine CPU binding of the new worker depending on - * %POOL_DISASSOCIATED. The caller is responsible for ensuring the - * flag remains stable across this function. See the comments - * above the flag definition for details. - * - * As an unbound worker may later become a regular one if CPU comes - * online, make sure every worker has %PF_THREAD_BOUND set. + * %PF_THREAD_BOUND is used to prevent userland from meddling with + * cpumask of workqueue workers. This is an abuse. We need + * %PF_NO_SETAFFINITY. */ - if (!(pool->flags & POOL_DISASSOCIATED)) { - kthread_bind(worker->task, pool->cpu); - } else { - worker->task->flags |= PF_THREAD_BOUND; + worker->task->flags |= PF_THREAD_BOUND; + + /* + * The caller is responsible for ensuring %POOL_DISASSOCIATED + * remains stable across this function. See the comments above the + * flag definition for details. + */ + if (pool->flags & POOL_DISASSOCIATED) worker->flags |= WORKER_UNBOUND; - } return worker; fail: @@ -3123,7 +3123,52 @@ int keventd_up(void) return system_wq != NULL; } -static void init_worker_pool(struct worker_pool *pool) +/** + * free_workqueue_attrs - free a workqueue_attrs + * @attrs: workqueue_attrs to free + * + * Undo alloc_workqueue_attrs(). + */ +void free_workqueue_attrs(struct workqueue_attrs *attrs) +{ + if (attrs) { + free_cpumask_var(attrs->cpumask); + kfree(attrs); + } +} + +/** + * alloc_workqueue_attrs - allocate a workqueue_attrs + * @gfp_mask: allocation mask to use + * + * Allocate a new workqueue_attrs, initialize with default settings and + * return it. Returns NULL on failure. + */ +struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask) +{ + struct workqueue_attrs *attrs; + + attrs = kzalloc(sizeof(*attrs), gfp_mask); + if (!attrs) + goto fail; + if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask)) + goto fail; + + cpumask_setall(attrs->cpumask); + return attrs; +fail: + free_workqueue_attrs(attrs); + return NULL; +} + +/** + * init_worker_pool - initialize a newly zalloc'd worker_pool + * @pool: worker_pool to initialize + * + * Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs. + * Returns 0 on success, -errno on failure. + */ +static int init_worker_pool(struct worker_pool *pool) { spin_lock_init(&pool->lock); pool->flags |= POOL_DISASSOCIATED; @@ -3141,6 +3186,11 @@ static void init_worker_pool(struct worker_pool *pool) mutex_init(&pool->manager_arb); mutex_init(&pool->assoc_mutex); ida_init(&pool->worker_ida); + + pool->attrs = alloc_workqueue_attrs(GFP_KERNEL); + if (!pool->attrs) + return -ENOMEM; + return 0; } static int alloc_and_link_pwqs(struct workqueue_struct *wq) @@ -3792,7 +3842,8 @@ out_unlock: static int __init init_workqueues(void) { - int cpu; + int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; + int i, cpu; /* make sure we have enough bits for OFFQ pool ID */ BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) < @@ -3809,10 +3860,18 @@ static int __init init_workqueues(void) for_each_wq_cpu(cpu) { struct worker_pool *pool; + i = 0; for_each_std_worker_pool(pool, cpu) { - init_worker_pool(pool); + BUG_ON(init_worker_pool(pool)); pool->cpu = cpu; + if (cpu != WORK_CPU_UNBOUND) + cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu)); + else + cpumask_setall(pool->attrs->cpumask); + + pool->attrs->nice = std_nice[i++]; + /* alloc pool ID */ BUG_ON(worker_pool_assign_id(pool)); } -- cgit From 493008a8e475771a2126e0ce95a73e35b371d277 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:03 -0700 Subject: workqueue: drop WQ_RESCUER and test workqueue->rescuer for NULL instead WQ_RESCUER is superflous. WQ_MEM_RECLAIM indicates that the user wants a rescuer and testing wq->rescuer for NULL can answer whether a given workqueue has a rescuer or not. Drop WQ_RESCUER and test wq->rescuer directly. This will help simplifying __alloc_workqueue_key() failure path by allowing it to use destroy_workqueue() on a partially constructed workqueue, which in turn will help implementing dynamic management of pool_workqueues. While at it, clear wq->rescuer after freeing it in destroy_workqueue(). This is a precaution as scheduled changes will make destruction more complex. This patch doesn't introduce any functional changes. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 1 - kernel/workqueue.c | 22 ++++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 00c1b9ba8252..c270b4eedf16 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -295,7 +295,6 @@ enum { WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ WQ_DRAINING = 1 << 6, /* internal: workqueue is draining */ - WQ_RESCUER = 1 << 7, /* internal: workqueue has rescuer */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a8b86f7b6e34..7ff2b9c5cc3a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1827,7 +1827,7 @@ static void send_mayday(struct work_struct *work) lockdep_assert_held(&workqueue_lock); - if (!(wq->flags & WQ_RESCUER)) + if (!wq->rescuer) return; /* mayday mayday mayday */ @@ -2285,7 +2285,7 @@ sleep: * @__rescuer: self * * Workqueue rescuer thread function. There's one rescuer for each - * workqueue which has WQ_RESCUER set. + * workqueue which has WQ_MEM_RECLAIM set. * * Regular work processing on a pool may block trying to create a new * worker which uses GFP_KERNEL allocation which has slight chance of @@ -2769,7 +2769,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) * flusher is not running on the same workqueue by verifying write * access. */ - if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER) + if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) lock_map_acquire(&pwq->wq->lockdep_map); else lock_map_acquire_read(&pwq->wq->lockdep_map); @@ -3412,13 +3412,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, va_end(args); va_end(args1); - /* - * Workqueues which may be used during memory reclaim should - * have a rescuer to guarantee forward progress. - */ - if (flags & WQ_MEM_RECLAIM) - flags |= WQ_RESCUER; - max_active = max_active ?: WQ_DFL_ACTIVE; max_active = wq_clamp_max_active(max_active, flags, wq->name); @@ -3449,7 +3442,11 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, } local_irq_enable(); - if (flags & WQ_RESCUER) { + /* + * Workqueues which may be used during memory reclaim should + * have a rescuer to guarantee forward progress. + */ + if (flags & WQ_MEM_RECLAIM) { struct worker *rescuer; wq->rescuer = rescuer = alloc_worker(); @@ -3533,9 +3530,10 @@ void destroy_workqueue(struct workqueue_struct *wq) spin_unlock_irq(&workqueue_lock); - if (wq->flags & WQ_RESCUER) { + if (wq->rescuer) { kthread_stop(wq->rescuer->task); kfree(wq->rescuer); + wq->rescuer = NULL; } /* -- cgit From 9e8cd2f5898ab6710ad81f4583fada08bf8049a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:04 -0700 Subject: workqueue: implement apply_workqueue_attrs() Implement apply_workqueue_attrs() which applies workqueue_attrs to the specified unbound workqueue by creating a new pwq (pool_workqueue) linked to worker_pool with the specified attributes. A new pwq is linked at the head of wq->pwqs instead of tail and __queue_work() verifies that the first unbound pwq has positive refcnt before choosing it for the actual queueing. This is to cover the case where creation of a new pwq races with queueing. As base ref on a pwq won't be dropped without making another pwq the first one, __queue_work() is guaranteed to make progress and not add work item to a dead pwq. init_and_link_pwq() is updated to return the last first pwq the new pwq replaced, which is put by apply_workqueue_attrs(). Note that apply_workqueue_attrs() is almost identical to unbound pwq part of alloc_and_link_pwqs(). The only difference is that there is no previous first pwq. apply_workqueue_attrs() is implemented to handle such cases and replaces unbound pwq handling in alloc_and_link_pwqs(). Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 2 ++ kernel/workqueue.c | 91 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 73 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index c270b4eedf16..e152394fa7eb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -410,6 +410,8 @@ extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); void free_workqueue_attrs(struct workqueue_attrs *attrs); +int apply_workqueue_attrs(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 16fb6747276a..2a67fbbd192c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1228,7 +1228,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, if (unlikely(wq->flags & WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq))) return; - +retry: /* pwq which will be used unless @work is executing elsewhere */ if (!(wq->flags & WQ_UNBOUND)) { if (cpu == WORK_CPU_UNBOUND) @@ -1262,6 +1262,25 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, spin_lock(&pwq->pool->lock); } + /* + * pwq is determined and locked. For unbound pools, we could have + * raced with pwq release and it could already be dead. If its + * refcnt is zero, repeat pwq selection. Note that pwqs never die + * without another pwq replacing it as the first pwq or while a + * work item is executing on it, so the retying is guaranteed to + * make forward-progress. + */ + if (unlikely(!pwq->refcnt)) { + if (wq->flags & WQ_UNBOUND) { + spin_unlock(&pwq->pool->lock); + cpu_relax(); + goto retry; + } + /* oops */ + WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt", + wq->name, cpu); + } + /* pwq determined, queue */ trace_workqueue_queue_work(req_cpu, pwq, work); @@ -3425,7 +3444,8 @@ static void pwq_unbound_release_workfn(struct work_struct *work) static void init_and_link_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq, - struct worker_pool *pool) + struct worker_pool *pool, + struct pool_workqueue **p_last_pwq) { BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK); @@ -3445,13 +3465,58 @@ static void init_and_link_pwq(struct pool_workqueue *pwq, mutex_lock(&wq->flush_mutex); spin_lock_irq(&workqueue_lock); + if (p_last_pwq) + *p_last_pwq = first_pwq(wq); pwq->work_color = wq->work_color; - list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs); + list_add_rcu(&pwq->pwqs_node, &wq->pwqs); spin_unlock_irq(&workqueue_lock); mutex_unlock(&wq->flush_mutex); } +/** + * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue + * @wq: the target workqueue + * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() + * + * Apply @attrs to an unbound workqueue @wq. If @attrs doesn't match the + * current attributes, a new pwq is created and made the first pwq which + * will serve all new work items. Older pwqs are released as in-flight + * work items finish. Note that a work item which repeatedly requeues + * itself back-to-back will stay on its current pwq. + * + * Performs GFP_KERNEL allocations. Returns 0 on success and -errno on + * failure. + */ +int apply_workqueue_attrs(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) +{ + struct pool_workqueue *pwq, *last_pwq; + struct worker_pool *pool; + + if (WARN_ON(!(wq->flags & WQ_UNBOUND))) + return -EINVAL; + + pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL); + if (!pwq) + return -ENOMEM; + + pool = get_unbound_pool(attrs); + if (!pool) { + kmem_cache_free(pwq_cache, pwq); + return -ENOMEM; + } + + init_and_link_pwq(pwq, wq, pool, &last_pwq); + if (last_pwq) { + spin_lock_irq(&last_pwq->pool->lock); + put_pwq(last_pwq); + spin_unlock_irq(&last_pwq->pool->lock); + } + + return 0; +} + static int alloc_and_link_pwqs(struct workqueue_struct *wq) { bool highpri = wq->flags & WQ_HIGHPRI; @@ -3468,26 +3533,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) struct worker_pool *cpu_pools = per_cpu(cpu_worker_pools, cpu); - init_and_link_pwq(pwq, wq, &cpu_pools[highpri]); + init_and_link_pwq(pwq, wq, &cpu_pools[highpri], NULL); } + return 0; } else { - struct pool_workqueue *pwq; - struct worker_pool *pool; - - pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL); - if (!pwq) - return -ENOMEM; - - pool = get_unbound_pool(unbound_std_wq_attrs[highpri]); - if (!pool) { - kmem_cache_free(pwq_cache, pwq); - return -ENOMEM; - } - - init_and_link_pwq(pwq, wq, pool); + return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); } - - return 0; } static int wq_clamp_max_active(int max_active, unsigned int flags, -- cgit From 618b01eb426dd2d73a4b5e5ebc6379e4eee3b123 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:04 -0700 Subject: workqueue: make it clear that WQ_DRAINING is an internal flag We're gonna add another internal WQ flag. Let's make the distinction clear. Prefix WQ_DRAINING with __ and move it to bit 16. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 2 +- kernel/workqueue.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index e152394fa7eb..1751ec4c47c9 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -294,7 +294,7 @@ enum { WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ - WQ_DRAINING = 1 << 6, /* internal: workqueue is draining */ + __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2a67fbbd192c..590f4d048ec7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1225,7 +1225,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, debug_work_activate(work); /* if dying, only works from the same workqueue are allowed */ - if (unlikely(wq->flags & WQ_DRAINING) && + if (unlikely(wq->flags & __WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq))) return; retry: @@ -2763,11 +2763,11 @@ void drain_workqueue(struct workqueue_struct *wq) /* * __queue_work() needs to test whether there are drainers, is much * hotter than drain_workqueue() and already looks at @wq->flags. - * Use WQ_DRAINING so that queue doesn't have to check nr_drainers. + * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers. */ spin_lock_irq(&workqueue_lock); if (!wq->nr_drainers++) - wq->flags |= WQ_DRAINING; + wq->flags |= __WQ_DRAINING; spin_unlock_irq(&workqueue_lock); reflush: flush_workqueue(wq); @@ -2795,7 +2795,7 @@ reflush: spin_lock(&workqueue_lock); if (!--wq->nr_drainers) - wq->flags &= ~WQ_DRAINING; + wq->flags &= ~__WQ_DRAINING; spin_unlock(&workqueue_lock); local_irq_enable(); -- cgit From 8719dceae2f98a578507c0f6b49c93f320bd729c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:04 -0700 Subject: workqueue: reject adjusting max_active or applying attrs to ordered workqueues Adjusting max_active of or applying new workqueue_attrs to an ordered workqueue breaks its ordering guarantee. The former is obvious. The latter is because applying attrs creates a new pwq (pool_workqueue) and there is no ordering constraint between the old and new pwqs. Make apply_workqueue_attrs() and workqueue_set_max_active() trigger WARN_ON() if those operations are requested on an ordered workqueue and fail / ignore respectively. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 3 ++- kernel/workqueue.c | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1751ec4c47c9..5668ab249af5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -295,6 +295,7 @@ enum { WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ + __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ @@ -397,7 +398,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue((name), WQ_MEM_RECLAIM, 1) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 590f4d048ec7..cecd4ffe2c40 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3494,9 +3494,14 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, struct pool_workqueue *pwq, *last_pwq; struct worker_pool *pool; + /* only unbound workqueues can change attributes */ if (WARN_ON(!(wq->flags & WQ_UNBOUND))) return -EINVAL; + /* creating multiple pwqs breaks ordering guarantee */ + if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) + return -EINVAL; + pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL); if (!pwq) return -ENOMEM; @@ -3752,6 +3757,10 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) { struct pool_workqueue *pwq; + /* disallow meddling with max_active for ordered workqueues */ + if (WARN_ON(wq->flags & __WQ_ORDERED)) + return; + max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); spin_lock_irq(&workqueue_lock); -- cgit From ba630e4940924ad1962883c207a62890778ced63 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:04 -0700 Subject: cpumask: implement cpumask_parse() We have cpulist_parse() but not cpumask_parse(). Implement it using bitmap_parse(). bitmap_parse() is weird in that it takes @len for a string in kernel-memory which also is inconsistent with bitmap_parselist(). Make cpumask_parse() calculate the length and don't expose the inconsistency to cpumask users. Maybe we can fix up bitmap_parse() later. This will be used to expose workqueue cpumask knobs to userland via sysfs. Signed-off-by: Tejun Heo Cc: Rusty Russell --- include/linux/cpumask.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 032560295fcb..d08e4d2a9b92 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -590,6 +590,21 @@ static inline int cpulist_scnprintf(char *buf, int len, nr_cpumask_bits); } +/** + * cpumask_parse - extract a cpumask from from a string + * @buf: the buffer to extract from + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpumask_parse(const char *buf, struct cpumask *dstp) +{ + char *nl = strchr(buf, '\n'); + int len = nl ? nl - buf : strlen(buf); + + return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); +} + /** * cpulist_parse - extract a cpumask from a user string of ranges * @buf: the buffer to extract from -- cgit From d73ce004225a7b2ed75f4340bb63721d55552265 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:05 -0700 Subject: driver/base: implement subsys_virtual_register() Kay tells me the most appropriate place to expose workqueues to userland would be /sys/devices/virtual/workqueues/WQ_NAME which is symlinked to /sys/bus/workqueue/devices/WQ_NAME and that we're lacking a way to do that outside of driver core as virtual_device_parent() isn't exported and there's no inteface to conveniently create a virtual subsystem. This patch implements subsys_virtual_register() by factoring out subsys_register() from subsys_system_register() and using it with virtual_device_parent() as the origin directory. It's identical to subsys_system_register() other than the origin directory but we aren't gonna restrict the device names which should be used under it. This will be used to expose workqueue attributes to userland. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Kay Sievers --- drivers/base/base.h | 2 ++ drivers/base/bus.c | 73 +++++++++++++++++++++++++++++++++++--------------- drivers/base/core.c | 2 +- include/linux/device.h | 2 ++ 4 files changed, 57 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/base.h b/drivers/base/base.h index 6ee17bb391a9..b8bdfe61daa6 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -101,6 +101,8 @@ static inline int hypervisor_init(void) { return 0; } extern int platform_bus_init(void); extern void cpu_dev_init(void); +struct kobject *virtual_device_parent(struct device *dev); + extern int bus_add_device(struct device *dev); extern void bus_probe_device(struct device *dev); extern void bus_remove_device(struct device *dev); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 519865b53f76..2ae2d2f92b6b 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -1205,26 +1205,10 @@ static void system_root_device_release(struct device *dev) { kfree(dev); } -/** - * subsys_system_register - register a subsystem at /sys/devices/system/ - * @subsys: system subsystem - * @groups: default attributes for the root device - * - * All 'system' subsystems have a /sys/devices/system/ root device - * with the name of the subsystem. The root device can carry subsystem- - * wide attributes. All registered devices are below this single root - * device and are named after the subsystem with a simple enumeration - * number appended. The registered devices are not explicitely named; - * only 'id' in the device needs to be set. - * - * Do not use this interface for anything new, it exists for compatibility - * with bad ideas only. New subsystems should use plain subsystems; and - * add the subsystem-wide attributes should be added to the subsystem - * directory itself and not some create fake root-device placed in - * /sys/devices/system/. - */ -int subsys_system_register(struct bus_type *subsys, - const struct attribute_group **groups) + +static int subsys_register(struct bus_type *subsys, + const struct attribute_group **groups, + struct kobject *parent_of_root) { struct device *dev; int err; @@ -1243,7 +1227,7 @@ int subsys_system_register(struct bus_type *subsys, if (err < 0) goto err_name; - dev->kobj.parent = &system_kset->kobj; + dev->kobj.parent = parent_of_root; dev->groups = groups; dev->release = system_root_device_release; @@ -1263,8 +1247,55 @@ err_dev: bus_unregister(subsys); return err; } + +/** + * subsys_system_register - register a subsystem at /sys/devices/system/ + * @subsys: system subsystem + * @groups: default attributes for the root device + * + * All 'system' subsystems have a /sys/devices/system/ root device + * with the name of the subsystem. The root device can carry subsystem- + * wide attributes. All registered devices are below this single root + * device and are named after the subsystem with a simple enumeration + * number appended. The registered devices are not explicitely named; + * only 'id' in the device needs to be set. + * + * Do not use this interface for anything new, it exists for compatibility + * with bad ideas only. New subsystems should use plain subsystems; and + * add the subsystem-wide attributes should be added to the subsystem + * directory itself and not some create fake root-device placed in + * /sys/devices/system/. + */ +int subsys_system_register(struct bus_type *subsys, + const struct attribute_group **groups) +{ + return subsys_register(subsys, groups, &system_kset->kobj); +} EXPORT_SYMBOL_GPL(subsys_system_register); +/** + * subsys_virtual_register - register a subsystem at /sys/devices/virtual/ + * @subsys: virtual subsystem + * @groups: default attributes for the root device + * + * All 'virtual' subsystems have a /sys/devices/system/ root device + * with the name of the subystem. The root device can carry subsystem-wide + * attributes. All registered devices are below this single root device. + * There's no restriction on device naming. This is for kernel software + * constructs which need sysfs interface. + */ +int subsys_virtual_register(struct bus_type *subsys, + const struct attribute_group **groups) +{ + struct kobject *virtual_dir; + + virtual_dir = virtual_device_parent(NULL); + if (!virtual_dir) + return -ENOMEM; + + return subsys_register(subsys, groups, virtual_dir); +} + int __init buses_init(void) { bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL); diff --git a/drivers/base/core.c b/drivers/base/core.c index 56536f4b0f6b..f58084a86e8c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -690,7 +690,7 @@ void device_initialize(struct device *dev) set_dev_node(dev, -1); } -static struct kobject *virtual_device_parent(struct device *dev) +struct kobject *virtual_device_parent(struct device *dev) { static struct kobject *virtual_dir = NULL; diff --git a/include/linux/device.h b/include/linux/device.h index 9d6464ea99c6..ee10d4e7be1a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -302,6 +302,8 @@ void subsys_interface_unregister(struct subsys_interface *sif); int subsys_system_register(struct bus_type *subsys, const struct attribute_group **groups); +int subsys_virtual_register(struct bus_type *subsys, + const struct attribute_group **groups); /** * struct class - device classes -- cgit From 226223ab3c4118ddd10688cc2c131135848371ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 11:30:05 -0700 Subject: workqueue: implement sysfs interface for workqueues There are cases where workqueue users want to expose control knobs to userland. e.g. Unbound workqueues with custom attributes are scheduled to be used for writeback workers and depending on configuration it can be useful to allow admins to tinker with the priority or allowed CPUs. This patch implements workqueue_sysfs_register(), which makes the workqueue visible under /sys/bus/workqueue/devices/WQ_NAME. There currently are two attributes common to both per-cpu and unbound pools and extra attributes for unbound pools including nice level and cpumask. If alloc_workqueue*() is called with WQ_SYSFS, workqueue_sysfs_register() is called automatically as part of workqueue creation. This is the preferred method unless the workqueue user wants to apply workqueue_attrs before making the workqueue visible to userland. v2: Disallow exposing ordered workqueues as ordered workqueues can't be tuned in any way. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 8 ++ kernel/workqueue.c | 288 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 5668ab249af5..7f6d29a417c0 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -293,6 +293,7 @@ enum { WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ + WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ @@ -495,4 +496,11 @@ extern bool freeze_workqueues_busy(void); extern void thaw_workqueues(void); #endif /* CONFIG_FREEZER */ +#ifdef CONFIG_SYSFS +int workqueue_sysfs_register(struct workqueue_struct *wq); +#else /* CONFIG_SYSFS */ +static inline int workqueue_sysfs_register(struct workqueue_struct *wq) +{ return 0; } +#endif /* CONFIG_SYSFS */ + #endif diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cecd4ffe2c40..c82feac0a878 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -210,6 +210,8 @@ struct wq_flusher { struct completion done; /* flush completion */ }; +struct wq_device; + /* * The externally visible workqueue abstraction is an array of * per-CPU workqueues: @@ -233,6 +235,10 @@ struct workqueue_struct { int nr_drainers; /* W: drain in progress */ int saved_max_active; /* W: saved pwq max_active */ + +#ifdef CONFIG_SYSFS + struct wq_device *wq_dev; /* I: for sysfs interface */ +#endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif @@ -442,6 +448,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], static DEFINE_IDR(worker_pool_idr); static int worker_thread(void *__worker); +static void copy_workqueue_attrs(struct workqueue_attrs *to, + const struct workqueue_attrs *from); /* allocate ID and assign it to @pool */ static int worker_pool_assign_id(struct worker_pool *pool) @@ -3153,6 +3161,281 @@ int keventd_up(void) return system_wq != NULL; } +#ifdef CONFIG_SYSFS +/* + * Workqueues with WQ_SYSFS flag set is visible to userland via + * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the + * following attributes. + * + * per_cpu RO bool : whether the workqueue is per-cpu or unbound + * max_active RW int : maximum number of in-flight work items + * + * Unbound workqueues have the following extra attributes. + * + * id RO int : the associated pool ID + * nice RW int : nice value of the workers + * cpumask RW mask : bitmask of allowed CPUs for the workers + */ +struct wq_device { + struct workqueue_struct *wq; + struct device dev; +}; + +static struct workqueue_struct *dev_to_wq(struct device *dev) +{ + struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); + + return wq_dev->wq; +} + +static ssize_t wq_per_cpu_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND)); +} + +static ssize_t wq_max_active_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); +} + +static ssize_t wq_max_active_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + int val; + + if (sscanf(buf, "%d", &val) != 1 || val <= 0) + return -EINVAL; + + workqueue_set_max_active(wq, val); + return count; +} + +static struct device_attribute wq_sysfs_attrs[] = { + __ATTR(per_cpu, 0444, wq_per_cpu_show, NULL), + __ATTR(max_active, 0644, wq_max_active_show, wq_max_active_store), + __ATTR_NULL, +}; + +static ssize_t wq_pool_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + struct worker_pool *pool; + int written; + + rcu_read_lock_sched(); + pool = first_pwq(wq)->pool; + written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id); + rcu_read_unlock_sched(); + + return written; +} + +static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + int written; + + rcu_read_lock_sched(); + written = scnprintf(buf, PAGE_SIZE, "%d\n", + first_pwq(wq)->pool->attrs->nice); + rcu_read_unlock_sched(); + + return written; +} + +/* prepare workqueue_attrs for sysfs store operations */ +static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq) +{ + struct workqueue_attrs *attrs; + + attrs = alloc_workqueue_attrs(GFP_KERNEL); + if (!attrs) + return NULL; + + rcu_read_lock_sched(); + copy_workqueue_attrs(attrs, first_pwq(wq)->pool->attrs); + rcu_read_unlock_sched(); + return attrs; +} + +static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + struct workqueue_attrs *attrs; + int ret; + + attrs = wq_sysfs_prep_attrs(wq); + if (!attrs) + return -ENOMEM; + + if (sscanf(buf, "%d", &attrs->nice) == 1 && + attrs->nice >= -20 && attrs->nice <= 19) + ret = apply_workqueue_attrs(wq, attrs); + else + ret = -EINVAL; + + free_workqueue_attrs(attrs); + return ret ?: count; +} + +static ssize_t wq_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + int written; + + rcu_read_lock_sched(); + written = cpumask_scnprintf(buf, PAGE_SIZE, + first_pwq(wq)->pool->attrs->cpumask); + rcu_read_unlock_sched(); + + written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); + return written; +} + +static ssize_t wq_cpumask_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + struct workqueue_attrs *attrs; + int ret; + + attrs = wq_sysfs_prep_attrs(wq); + if (!attrs) + return -ENOMEM; + + ret = cpumask_parse(buf, attrs->cpumask); + if (!ret) + ret = apply_workqueue_attrs(wq, attrs); + + free_workqueue_attrs(attrs); + return ret ?: count; +} + +static struct device_attribute wq_sysfs_unbound_attrs[] = { + __ATTR(pool_id, 0444, wq_pool_id_show, NULL), + __ATTR(nice, 0644, wq_nice_show, wq_nice_store), + __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), + __ATTR_NULL, +}; + +static struct bus_type wq_subsys = { + .name = "workqueue", + .dev_attrs = wq_sysfs_attrs, +}; + +static int __init wq_sysfs_init(void) +{ + return subsys_virtual_register(&wq_subsys, NULL); +} +core_initcall(wq_sysfs_init); + +static void wq_device_release(struct device *dev) +{ + struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); + + kfree(wq_dev); +} + +/** + * workqueue_sysfs_register - make a workqueue visible in sysfs + * @wq: the workqueue to register + * + * Expose @wq in sysfs under /sys/bus/workqueue/devices. + * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set + * which is the preferred method. + * + * Workqueue user should use this function directly iff it wants to apply + * workqueue_attrs before making the workqueue visible in sysfs; otherwise, + * apply_workqueue_attrs() may race against userland updating the + * attributes. + * + * Returns 0 on success, -errno on failure. + */ +int workqueue_sysfs_register(struct workqueue_struct *wq) +{ + struct wq_device *wq_dev; + int ret; + + /* + * Adjusting max_active or creating new pwqs by applyting + * attributes breaks ordering guarantee. Disallow exposing ordered + * workqueues. + */ + if (WARN_ON(wq->flags & __WQ_ORDERED)) + return -EINVAL; + + wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); + if (!wq_dev) + return -ENOMEM; + + wq_dev->wq = wq; + wq_dev->dev.bus = &wq_subsys; + wq_dev->dev.init_name = wq->name; + wq_dev->dev.release = wq_device_release; + + /* + * unbound_attrs are created separately. Suppress uevent until + * everything is ready. + */ + dev_set_uevent_suppress(&wq_dev->dev, true); + + ret = device_register(&wq_dev->dev); + if (ret) { + kfree(wq_dev); + wq->wq_dev = NULL; + return ret; + } + + if (wq->flags & WQ_UNBOUND) { + struct device_attribute *attr; + + for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) { + ret = device_create_file(&wq_dev->dev, attr); + if (ret) { + device_unregister(&wq_dev->dev); + wq->wq_dev = NULL; + return ret; + } + } + } + + kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); + return 0; +} + +/** + * workqueue_sysfs_unregister - undo workqueue_sysfs_register() + * @wq: the workqueue to unregister + * + * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister. + */ +static void workqueue_sysfs_unregister(struct workqueue_struct *wq) +{ + struct wq_device *wq_dev = wq->wq_dev; + + if (!wq->wq_dev) + return; + + wq->wq_dev = NULL; + device_unregister(&wq_dev->dev); +} +#else /* CONFIG_SYSFS */ +static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { } +#endif /* CONFIG_SYSFS */ + /** * free_workqueue_attrs - free a workqueue_attrs * @attrs: workqueue_attrs to free @@ -3625,6 +3908,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, wake_up_process(rescuer->task); } + if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq)) + goto err_destroy; + /* * workqueue_lock protects global freeze state and workqueues * list. Grab it, set max_active accordingly and add the new @@ -3693,6 +3979,8 @@ void destroy_workqueue(struct workqueue_struct *wq) spin_unlock_irq(&workqueue_lock); + workqueue_sysfs_unregister(wq); + if (wq->rescuer) { kthread_stop(wq->rescuer->task); kfree(wq->rescuer); -- cgit From cc2a8b1a5595a435191fb197d92d1f3e193c9a6d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 12 Mar 2013 13:59:14 -0700 Subject: async: remove unused @node from struct async_domain The @node in struct async_domain is unused after we introduce async_global_pending, remove it. tj: Unnecessary whitespace adjustments dropped. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo Cc: Arjan van de Ven --- include/linux/async.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async.h b/include/linux/async.h index a2e3f18b2ad6..98ea0fef30d5 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -18,7 +18,6 @@ typedef u64 async_cookie_t; typedef void (async_func_ptr) (void *data, async_cookie_t cookie); struct async_domain { - struct list_head node; struct list_head pending; unsigned registered:1; }; @@ -27,8 +26,7 @@ struct async_domain { * domain participates in global async_synchronize_full */ #define ASYNC_DOMAIN(_name) \ - struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \ - .pending = LIST_HEAD_INIT(_name.pending), \ + struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 1 } /* @@ -36,8 +34,7 @@ struct async_domain { * complete, this domain does not participate in async_synchronize_full */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ - struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \ - .pending = LIST_HEAD_INIT(_name.pending), \ + struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); -- cgit From 362f2b098b188ede9c4350cc20e58040dbfa515e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 12 Mar 2013 13:59:14 -0700 Subject: async: rename and redefine async_func_ptr A function type is typically defined as typedef ret_type (*func)(args..) but async_func_ptr is not. Redefine it. Also rename async_func_ptr to async_func_t for _func_t suffix is more generic. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo Cc: Arjan van de Ven --- arch/sh/drivers/pci/pcie-sh7786.c | 2 +- include/linux/async.h | 6 +++--- kernel/async.c | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c index c2c85f6cd738..a162a7f86b2e 100644 --- a/arch/sh/drivers/pci/pcie-sh7786.c +++ b/arch/sh/drivers/pci/pcie-sh7786.c @@ -35,7 +35,7 @@ static unsigned int nr_ports; static struct sh7786_pcie_hwops { int (*core_init)(void); - async_func_ptr *port_init_hw; + async_func_t port_init_hw; } *sh7786_pcie_hwops; static struct resource sh7786_pci0_resources[] = { diff --git a/include/linux/async.h b/include/linux/async.h index 98ea0fef30d5..6b0226bdaadc 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -16,7 +16,7 @@ #include typedef u64 async_cookie_t; -typedef void (async_func_ptr) (void *data, async_cookie_t cookie); +typedef void (*async_func_t) (void *data, async_cookie_t cookie); struct async_domain { struct list_head pending; unsigned registered:1; @@ -37,8 +37,8 @@ struct async_domain { struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } -extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); -extern async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, +extern async_cookie_t async_schedule(async_func_t func, void *data); +extern async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain); void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); diff --git a/kernel/async.c b/kernel/async.c index ab99c92f6b68..61f023ce0228 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -73,7 +73,7 @@ struct async_entry { struct list_head global_list; struct work_struct work; async_cookie_t cookie; - async_func_ptr *func; + async_func_t func; void *data; struct async_domain *domain; }; @@ -145,7 +145,7 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } -static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *domain) +static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain) { struct async_entry *entry; unsigned long flags; @@ -165,13 +165,13 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a spin_unlock_irqrestore(&async_lock, flags); /* low on memory.. run synchronously */ - ptr(data, newcookie); + func(data, newcookie); return newcookie; } INIT_LIST_HEAD(&entry->domain_list); INIT_LIST_HEAD(&entry->global_list); INIT_WORK(&entry->work, async_run_entry_fn); - entry->func = ptr; + entry->func = func; entry->data = data; entry->domain = domain; @@ -198,21 +198,21 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a /** * async_schedule - schedule a function for asynchronous execution - * @ptr: function to execute asynchronously + * @func: function to execute asynchronously * @data: data pointer to pass to the function * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. */ -async_cookie_t async_schedule(async_func_ptr *ptr, void *data) +async_cookie_t async_schedule(async_func_t func, void *data) { - return __async_schedule(ptr, data, &async_dfl_domain); + return __async_schedule(func, data, &async_dfl_domain); } EXPORT_SYMBOL_GPL(async_schedule); /** * async_schedule_domain - schedule a function for asynchronous execution within a certain domain - * @ptr: function to execute asynchronously + * @func: function to execute asynchronously * @data: data pointer to pass to the function * @domain: the domain * @@ -222,10 +222,10 @@ EXPORT_SYMBOL_GPL(async_schedule); * synchronization domain is specified via @domain. Note: This function * may be called from atomic or non-atomic contexts. */ -async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, +async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain) { - return __async_schedule(ptr, data, domain); + return __async_schedule(func, data, domain); } EXPORT_SYMBOL_GPL(async_schedule_domain); -- cgit From 49d0de082c31de34cc896c14eec5f1c2ade0415a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 14 Feb 2013 16:42:34 -0800 Subject: rcu: Fix hlist_bl_set_first_rcu() annotation Abhi noticed that we were getting a complaint from the RCU subsystem about access of an RCU protected list under the write side bit lock. This commit adds additional annotation to check both the RCU read lock and the write side bit lock before printing a message. Reported by: Abhijith Das Signed-off-by: Steven Whitehouse Tested-by: Abhijith Das Signed-off-by: Paul E. McKenney --- include/linux/list_bl.h | 5 +++++ include/linux/rculist_bl.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 31f9d75adc5b..2eb88556c5c5 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -125,6 +125,11 @@ static inline void hlist_bl_unlock(struct hlist_bl_head *b) __bit_spin_unlock(0, (unsigned long *)b); } +static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) +{ + return bit_spin_is_locked(0, (unsigned long *)b); +} + /** * hlist_bl_for_each_entry - iterate over list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index cf1244fbf3b6..4f216c59e7db 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -20,7 +20,7 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) { return (struct hlist_bl_node *) - ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK); + ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); } /** -- cgit From e7b2dcc52b0e2d598a469f01cc460ccdde6869f2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 12 Mar 2013 15:35:58 -0700 Subject: cgroup: remove cgroup_is_descendant() It was used by ns cgroup, and ns cgroup was removed long ago. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 3 --- kernel/cgroup.c | 28 ---------------------------- 2 files changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5f76829dd75e..7e818a3ef60a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -448,9 +448,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); int cgroup_task_count(const struct cgroup *cgrp); -/* Return true if cgrp is a descendant of the task's cgroup */ -int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); - /* * Control Group taskset, used to pass around set of tasks to cgroup_subsys * methods. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7a6c4c72ca55..f51443fd5f71 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5035,34 +5035,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) put_css_set_taskexit(cg); } -/** - * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp - * @cgrp: the cgroup in question - * @task: the task in question - * - * See if @cgrp is a descendant of @task's cgroup in the appropriate - * hierarchy. - * - * If we are sending in dummytop, then presumably we are creating - * the top cgroup in the subsystem. - * - * Called only by the ns (nsproxy) cgroup. - */ -int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) -{ - int ret; - struct cgroup *target; - - if (cgrp == dummytop) - return 1; - - target = task_cgroup_from_root(task, cgrp->root); - while (cgrp != target && cgrp!= cgrp->top_cgroup) - cgrp = cgrp->parent; - ret = (cgrp == target); - return ret; -} - static void check_for_release(struct cgroup *cgrp) { /* All of these checks rely on RCU to keep the cgroup -- cgit From e62676169118bc2d42e5008b3f8872646313f077 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Mar 2013 17:41:37 -0700 Subject: workqueue: implement current_is_workqueue_rescuer() Implement a function which queries whether it currently is running off a workqueue rescuer. This will be used to convert writeback to workqueue. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 1 + kernel/workqueue.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 7f6d29a417c0..df30763c8682 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -451,6 +451,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c82feac0a878..f5c8bbb9ada3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4071,6 +4071,19 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) } EXPORT_SYMBOL_GPL(workqueue_set_max_active); +/** + * current_is_workqueue_rescuer - is %current workqueue rescuer? + * + * Determine whether %current is a workqueue rescuer. Can be used from + * work functions to determine whether it's being run off the rescuer task. + */ +bool current_is_workqueue_rescuer(void) +{ + struct worker *worker = current_wq_worker(); + + return worker && worker == worker->current_pwq->wq->rescuer; +} + /** * workqueue_congested - test whether a workqueue is congested * @cpu: CPU in question -- cgit From e86ac13b031cf71d8f40ff513e627aac80e6b765 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Mon, 11 Mar 2013 23:16:35 +0000 Subject: drivers: net: ethernet: cpsw: change cpts_active_slave to active_slave Change cpts_active_slave to active_slave so that the same DT property can be used to ethtool and SIOCGMIIPHY. CC: Richard Cochran Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 7 ++++--- arch/arm/boot/dts/am33xx.dtsi | 2 +- drivers/net/ethernet/ti/cpsw.c | 10 +++++----- include/linux/platform_data/cpsw.h | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 8e49c4200928..4f2ca6b4a182 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -15,7 +15,8 @@ Required properties: - mac_control : Specifies Default MAC control register content for the specific platform - slaves : Specifies number for slaves -- cpts_active_slave : Specifies the slave to use for time stamping +- active_slave : Specifies the slave to use for time stamping, + ethtool and SIOCGMIIPHY - cpts_clock_mult : Numerator to convert input clock ticks into nanoseconds - cpts_clock_shift : Denominator to convert input clock ticks into nanoseconds @@ -52,7 +53,7 @@ Examples: rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; cpsw_emac0: slave@0 { @@ -78,7 +79,7 @@ Examples: rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; cpsw_emac0: slave@0 { diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 0957645b73af..91fe4f148f80 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -349,7 +349,7 @@ rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; reg = <0x4a100000 0x800 diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 01ffbc486982..98aa17a9516a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -942,7 +942,7 @@ static void cpsw_ndo_change_rx_flags(struct net_device *ndev, int flags) static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) { - struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave]; + struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave]; u32 ts_en, seq_id; if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) { @@ -971,7 +971,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) if (priv->data.dual_emac) slave = &priv->slaves[priv->emac_port]; else - slave = &priv->slaves[priv->data.cpts_active_slave]; + slave = &priv->slaves[priv->data.active_slave]; ctrl = slave_read(slave, CPSW2_CONTROL); ctrl &= ~CTRL_ALL_TS_MASK; @@ -1282,12 +1282,12 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->slaves = prop; - if (of_property_read_u32(node, "cpts_active_slave", &prop)) { - pr_err("Missing cpts_active_slave property in the DT.\n"); + if (of_property_read_u32(node, "active_slave", &prop)) { + pr_err("Missing active_slave property in the DT.\n"); ret = -EINVAL; goto error_ret; } - data->cpts_active_slave = prop; + data->active_slave = prop; if (of_property_read_u32(node, "cpts_clock_mult", &prop)) { pr_err("Missing cpts_clock_mult property in the DT.\n"); diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h index 798fb80b024b..bb3cd58d71e3 100644 --- a/include/linux/platform_data/cpsw.h +++ b/include/linux/platform_data/cpsw.h @@ -30,7 +30,7 @@ struct cpsw_platform_data { u32 channels; /* number of cpdma channels (symmetric) */ u32 slaves; /* number of slave cpgmac ports */ struct cpsw_slave_data *slave_data; - u32 cpts_active_slave; /* time stamping slave */ + u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */ u32 cpts_clock_mult; /* convert input clock ticks to nanoseconds */ u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */ u32 ale_entries; /* ale table size */ -- cgit From eaa907c546f76222227dfc41784b22588af1e3d7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Mar 2013 11:18:36 +0000 Subject: tick: Provide a check for a forced broadcast pending On the CPU which gets woken along with the target CPU of the broadcast the following happens: deep_idle() <-- spurious wakeup broadcast_exit() set forced bit enable interrupts <-- Nothing happens disable interrupts broadcast_enter() <-- Here we observe the forced bit is set deep_idle() Now after that the target CPU of the broadcast runs the broadcast handler and finds the other CPU in both the broadcast and the forced mask, sends the IPI and stuff gets back to normal. So it's not actually harmful, just more evidence for the theory, that hardware designers have access to very special drug supplies. Now there is no point in going back to deep idle just to wake up again right away via an IPI. Provide a check which allows the idle code to avoid the deep idle transition. Signed-off-by: Thomas Gleixner Cc: LAK Cc: John Stultz Cc: Arjan van de Veen Cc: Lorenzo Pieralisi Tested-by: Santosh Shilimkar Cc: Jason Liu Link: http://lkml.kernel.org/r/20130306111537.565418308@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 6 ++++++ kernel/time/tick-broadcast.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 494d33ea78f8..646aac136eed 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -175,6 +175,12 @@ extern void tick_broadcast(const struct cpumask *mask); extern int tick_receive_broadcast(void); #endif +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern int tick_check_broadcast_expired(void); +#else +static inline int tick_check_broadcast_expired(void) { return 0; } +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); #else diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 2100aad6b5f2..d76d816afc5d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -403,6 +403,18 @@ struct cpumask *tick_get_broadcast_oneshot_mask(void) return tick_broadcast_oneshot_mask; } +/* + * Called before going idle with interrupts disabled. Checks whether a + * broadcast event from the other core is about to happen. We detected + * that in tick_broadcast_oneshot_control(). The callsite can use this + * to avoid a deep idle transition as we are about to get the + * broadcast IPI right away. + */ +int tick_check_broadcast_expired(void) +{ + return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); +} + /* * Set broadcast interrupt affinity */ -- cgit From be871b7e54711479d3b9d3617d49898770830db2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 12 Mar 2013 17:21:19 +0100 Subject: device: separate all subsys mutexes ca22e56d (driver-core: implement 'sysdev' functionality for regular devices and buses) has introduced bus_register macro with a static key to distinguish different subsys mutex classes. This however doesn't work for different subsys which use a common registering function. One example is subsys_system_register (and mce_device and cpu_device). In the end this leads to the following lockdep splat: [ 207.271924] ====================================================== [ 207.271932] [ INFO: possible circular locking dependency detected ] [ 207.271942] 3.9.0-rc1-0.7-default+ #34 Not tainted [ 207.271948] ------------------------------------------------------- [ 207.271957] bash/10493 is trying to acquire lock: [ 207.271963] (subsys mutex){+.+.+.}, at: [] bus_remove_device+0x37/0x1c0 [ 207.271987] [ 207.271987] but task is already holding lock: [ 207.271995] (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2f/0x60 [ 207.272012] [ 207.272012] which lock already depends on the new lock. [ 207.272012] [ 207.272023] [ 207.272023] the existing dependency chain (in reverse order) is: [ 207.272033] [ 207.272033] -> #4 (cpu_hotplug.lock){+.+.+.}: [ 207.272044] [] lock_acquire+0xe9/0x120 [ 207.272056] [] mutex_lock_nested+0x37/0x360 [ 207.272069] [] get_online_cpus+0x29/0x40 [ 207.272082] [] drain_all_stock+0x30/0x150 [ 207.272094] [] mem_cgroup_reclaim+0xaa/0xe0 [ 207.272104] [] __mem_cgroup_try_charge+0x51e/0xcf0 [ 207.272114] [] mem_cgroup_charge_common+0x36/0x60 [ 207.272125] [] mem_cgroup_newpage_charge+0x2a/0x30 [ 207.272135] [] do_wp_page+0x231/0x830 [ 207.272147] [] handle_pte_fault+0x19e/0x8d0 [ 207.272157] [] handle_mm_fault+0x158/0x1e0 [ 207.272166] [] do_page_fault+0x2a3/0x4e0 [ 207.272178] [] page_fault+0x28/0x30 [ 207.272189] [ 207.272189] -> #3 (&mm->mmap_sem){++++++}: [ 207.272199] [] lock_acquire+0xe9/0x120 [ 207.272208] [] might_fault+0x6d/0x90 [ 207.272218] [] filldir64+0xb3/0x120 [ 207.272229] [] call_filldir+0x89/0x130 [ext3] [ 207.272248] [] ext3_readdir+0x6b7/0x7e0 [ext3] [ 207.272263] [] vfs_readdir+0xa9/0xc0 [ 207.272273] [] sys_getdents64+0x9b/0x110 [ 207.272284] [] system_call_fastpath+0x16/0x1b [ 207.272296] [ 207.272296] -> #2 (&type->i_mutex_dir_key#3){+.+.+.}: [ 207.272309] [] lock_acquire+0xe9/0x120 [ 207.272319] [] mutex_lock_nested+0x37/0x360 [ 207.272329] [] link_path_walk+0x6f4/0x9a0 [ 207.272339] [] path_openat+0xba/0x470 [ 207.272349] [] do_filp_open+0x48/0xa0 [ 207.272358] [] file_open_name+0xdc/0x110 [ 207.272369] [] filp_open+0x35/0x40 [ 207.272378] [] _request_firmware+0x52e/0xb20 [ 207.272389] [] request_firmware+0x16/0x20 [ 207.272399] [] request_microcode_fw+0x61/0xd0 [microcode] [ 207.272416] [] microcode_init_cpu+0x104/0x150 [microcode] [ 207.272431] [] mc_device_add+0x7c/0xb0 [microcode] [ 207.272444] [] subsys_interface_register+0xc9/0x100 [ 207.272457] [] 0xffffffffa04fc0f4 [ 207.272472] [] do_one_initcall+0x42/0x180 [ 207.272485] [] load_module+0x19df/0x1b70 [ 207.272499] [] sys_init_module+0xe6/0x130 [ 207.272511] [] system_call_fastpath+0x16/0x1b [ 207.272523] [ 207.272523] -> #1 (umhelper_sem){++++.+}: [ 207.272537] [] lock_acquire+0xe9/0x120 [ 207.272548] [] down_read+0x34/0x50 [ 207.272559] [] usermodehelper_read_trylock+0x4f/0x100 [ 207.272575] [] _request_firmware+0x59d/0xb20 [ 207.272587] [] request_firmware+0x16/0x20 [ 207.272599] [] request_microcode_fw+0x61/0xd0 [microcode] [ 207.272613] [] microcode_init_cpu+0x104/0x150 [microcode] [ 207.272627] [] mc_device_add+0x7c/0xb0 [microcode] [ 207.272641] [] subsys_interface_register+0xc9/0x100 [ 207.272654] [] 0xffffffffa04fc0f4 [ 207.272666] [] do_one_initcall+0x42/0x180 [ 207.272678] [] load_module+0x19df/0x1b70 [ 207.272690] [] sys_init_module+0xe6/0x130 [ 207.272702] [] system_call_fastpath+0x16/0x1b [ 207.272715] [ 207.272715] -> #0 (subsys mutex){+.+.+.}: [ 207.272729] [] __lock_acquire+0x13b2/0x15f0 [ 207.272740] [] lock_acquire+0xe9/0x120 [ 207.272751] [] mutex_lock_nested+0x37/0x360 [ 207.272763] [] bus_remove_device+0x37/0x1c0 [ 207.272775] [] device_del+0x134/0x1f0 [ 207.272786] [] device_unregister+0x22/0x60 [ 207.272798] [] mce_cpu_callback+0x15e/0x1ad [ 207.272812] [] notifier_call_chain+0x72/0x130 [ 207.272824] [] __raw_notifier_call_chain+0xe/0x10 [ 207.272839] [] _cpu_down+0x1d6/0x350 [ 207.272851] [] cpu_down+0x40/0x60 [ 207.272862] [] store_online+0x75/0xe0 [ 207.272874] [] dev_attr_store+0x20/0x30 [ 207.272886] [] sysfs_write_file+0xd9/0x150 [ 207.272900] [] vfs_write+0xcb/0x130 [ 207.272911] [] sys_write+0x64/0xa0 [ 207.272923] [] system_call_fastpath+0x16/0x1b [ 207.272936] [ 207.272936] other info that might help us debug this: [ 207.272936] [ 207.272952] Chain exists of: [ 207.272952] subsys mutex --> &mm->mmap_sem --> cpu_hotplug.lock [ 207.272952] [ 207.272973] Possible unsafe locking scenario: [ 207.272973] [ 207.272984] CPU0 CPU1 [ 207.272992] ---- ---- [ 207.273000] lock(cpu_hotplug.lock); [ 207.273009] lock(&mm->mmap_sem); [ 207.273020] lock(cpu_hotplug.lock); [ 207.273031] lock(subsys mutex); [ 207.273040] [ 207.273040] *** DEADLOCK *** [ 207.273040] [ 207.273055] 5 locks held by bash/10493: [ 207.273062] #0: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x49/0x150 [ 207.273080] #1: (s_active#150){.+.+.+}, at: [] sysfs_write_file+0xc2/0x150 [ 207.273099] #2: (x86_cpu_hotplug_driver_mutex){+.+.+.}, at: [] cpu_hotplug_driver_lock+0x17/0x20 [ 207.273121] #3: (cpu_add_remove_lock){+.+.+.}, at: [] cpu_down+0x2c/0x60 [ 207.273140] #4: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2f/0x60 [ 207.273158] [ 207.273158] stack backtrace: [ 207.273170] Pid: 10493, comm: bash Not tainted 3.9.0-rc1-0.7-default+ #34 [ 207.273180] Call Trace: [ 207.273192] [] print_circular_bug+0x223/0x310 [ 207.273204] [] __lock_acquire+0x13b2/0x15f0 [ 207.273216] [] ? sysfs_hash_and_remove+0x60/0xc0 [ 207.273227] [] lock_acquire+0xe9/0x120 [ 207.273239] [] ? bus_remove_device+0x37/0x1c0 [ 207.273251] [] mutex_lock_nested+0x37/0x360 [ 207.273263] [] ? bus_remove_device+0x37/0x1c0 [ 207.273274] [] ? sysfs_hash_and_remove+0x60/0xc0 [ 207.273286] [] bus_remove_device+0x37/0x1c0 [ 207.273298] [] device_del+0x134/0x1f0 [ 207.273309] [] device_unregister+0x22/0x60 [ 207.273321] [] mce_cpu_callback+0x15e/0x1ad [ 207.273332] [] notifier_call_chain+0x72/0x130 [ 207.273344] [] __raw_notifier_call_chain+0xe/0x10 [ 207.273356] [] _cpu_down+0x1d6/0x350 [ 207.273368] [] ? cpu_hotplug_driver_lock+0x17/0x20 [ 207.273380] [] cpu_down+0x40/0x60 [ 207.273391] [] store_online+0x75/0xe0 [ 207.273402] [] dev_attr_store+0x20/0x30 [ 207.273413] [] sysfs_write_file+0xd9/0x150 [ 207.273425] [] vfs_write+0xcb/0x130 [ 207.273436] [] sys_write+0x64/0xa0 [ 207.273447] [] system_call_fastpath+0x16/0x1b Which reports a false possitive deadlock because it sees: 1) load_module -> subsys_interface_register -> mc_deveice_add (*) -> subsys->p->mutex -> link_path_walk -> lookup_slow -> i_mutex 2) sys_write -> _cpu_down -> cpu_hotplug_begin -> cpu_hotplug.lock -> mce_cpu_callback -> mce_device_remove(**) -> device_unregister -> bus_remove_device -> subsys mutex 3) vfs_readdir -> i_mutex -> filldir64 -> might_fault -> might_lock_read(mmap_sem) -> page_fault -> mmap_sem -> drain_all_stock -> cpu_hotplug.lock but 1) takes cpu_subsys subsys (*) but 2) takes mce_device subsys (**) so the deadlock is not possible AFAICS. The fix is quite simple. We can pull the key inside bus_type structure because they are defined per device so the pointer will be unique as well. bus_register doesn't need to be a macro anymore so change it to the inline. We could get rid of __bus_register as there is no other caller but maybe somebody will want to use a different key so keep it around for now. Reported-by: Li Zefan Signed-off-by: Michal Hocko Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 8 ++++---- include/linux/device.h | 12 +++--------- 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 519865b53f76..8a00dec574d6 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -898,18 +898,18 @@ static ssize_t bus_uevent_store(struct bus_type *bus, static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store); /** - * __bus_register - register a driver-core subsystem + * bus_register - register a driver-core subsystem * @bus: bus to register - * @key: lockdep class key * * Once we have that, we register the bus with the kobject * infrastructure, then register the children subsystems it has: * the devices and drivers that belong to the subsystem. */ -int __bus_register(struct bus_type *bus, struct lock_class_key *key) +int bus_register(struct bus_type *bus) { int retval; struct subsys_private *priv; + struct lock_class_key *key = &bus->lock_key; priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL); if (!priv) @@ -981,7 +981,7 @@ out: bus->p = NULL; return retval; } -EXPORT_SYMBOL_GPL(__bus_register); +EXPORT_SYMBOL_GPL(bus_register); /** * bus_unregister - remove a bus from the system diff --git a/include/linux/device.h b/include/linux/device.h index 9d6464ea99c6..4a7c4a84afee 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -111,17 +111,11 @@ struct bus_type { struct iommu_ops *iommu_ops; struct subsys_private *p; + struct lock_class_key lock_key; }; -/* This is a #define to keep the compiler from merging different - * instances of the __key variable */ -#define bus_register(subsys) \ -({ \ - static struct lock_class_key __key; \ - __bus_register(subsys, &__key); \ -}) -extern int __must_check __bus_register(struct bus_type *bus, - struct lock_class_key *key); +extern int __must_check bus_register(struct bus_type *bus); + extern void bus_unregister(struct bus_type *bus); extern int __must_check bus_rescan_devices(struct bus_type *bus); -- cgit From f792685006274a850e6cc0ea9ade275ccdfc90bc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 5 Mar 2013 18:05:46 +0100 Subject: math64: New div64_u64_rem helper Provide an extended version of div64_u64() that also returns the remainder of the division. We are going to need this to refine the cputime scaling code. Signed-off-by: Frederic Weisbecker Cc: Stanislaw Gruszka Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Andrew Morton --- include/linux/math64.h | 19 ++++++++++++++++++- lib/div64.c | 19 +++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index b8ba85544721..931a619407bf 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -29,6 +29,15 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) return dividend / divisor; } +/** + * div64_u64_rem - unsigned 64bit divide with 64bit divisor + */ +static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + /** * div64_u64 - unsigned 64bit divide with 64bit divisor */ @@ -61,8 +70,16 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder); #endif +#ifndef div64_u64_rem +extern u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder); +#endif + #ifndef div64_u64 -extern u64 div64_u64(u64 dividend, u64 divisor); +static inline u64 div64_u64(u64 dividend, u64 divisor) +{ + u64 remainder; + return div64_u64_rem(dividend, divisor, &remainder); +} #endif #ifndef div64_s64 diff --git a/lib/div64.c b/lib/div64.c index a163b6caef73..3af5728d95fd 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -79,9 +79,10 @@ EXPORT_SYMBOL(div_s64_rem); #endif /** - * div64_u64 - unsigned 64bit divide with 64bit divisor + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and 64bit remainder * @dividend: 64bit dividend * @divisor: 64bit divisor + * @remainder: 64bit remainder * * This implementation is a modified version of the algorithm proposed * by the book 'Hacker's Delight'. The original source and full proof @@ -89,27 +90,33 @@ EXPORT_SYMBOL(div_s64_rem); * * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt' */ -#ifndef div64_u64 -u64 div64_u64(u64 dividend, u64 divisor) +#ifndef div64_u64_rem +u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) { u32 high = divisor >> 32; u64 quot; if (high == 0) { - quot = div_u64(dividend, divisor); + u32 rem32; + quot = div_u64_rem(dividend, divisor, &rem32); + *remainder = rem32; } else { int n = 1 + fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) quot--; - if ((dividend - quot * divisor) >= divisor) + + *remainder = dividend - quot * divisor; + if (*remainder >= divisor) { quot++; + *remainder -= divisor; + } } return quot; } -EXPORT_SYMBOL(div64_u64); +EXPORT_SYMBOL(div64_u64_rem); #endif /** -- cgit From 8425e3d5bdbe8e741d2c73cf3189ed59b4038b84 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 Mar 2013 16:51:36 -0700 Subject: workqueue: inline trivial wrappers There's no reason to make these trivial wrappers full (exported) functions. Inline the followings. queue_work() queue_delayed_work() mod_delayed_work() schedule_work_on() schedule_work() schedule_delayed_work_on() schedule_delayed_work() keventd_up() Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 123 +++++++++++++++++++++++++++++++++++++++++----- kernel/workqueue.c | 111 ----------------------------------------- 2 files changed, 111 insertions(+), 123 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index df30763c8682..835d12b76960 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -417,28 +417,16 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); -extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work); extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); -extern bool queue_delayed_work(struct workqueue_struct *wq, - struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay); -extern bool mod_delayed_work(struct workqueue_struct *wq, - struct delayed_work *dwork, unsigned long delay); extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); -extern bool schedule_work_on(int cpu, struct work_struct *work); -extern bool schedule_work(struct work_struct *work); -extern bool schedule_delayed_work_on(int cpu, struct delayed_work *work, - unsigned long delay); -extern bool schedule_delayed_work(struct delayed_work *work, - unsigned long delay); extern int schedule_on_each_cpu(work_func_t func); -extern int keventd_up(void); int execute_in_process_context(work_func_t fn, struct execute_work *); @@ -455,6 +443,117 @@ extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); +/** + * queue_work - queue work on a workqueue + * @wq: workqueue to use + * @work: work to queue + * + * Returns %false if @work was already on a queue, %true otherwise. + * + * We queue the work to the CPU on which it was submitted, but if the CPU dies + * it can be processed by another CPU. + */ +static inline bool queue_work(struct workqueue_struct *wq, + struct work_struct *work) +{ + return queue_work_on(WORK_CPU_UNBOUND, wq, work); +} + +/** + * queue_delayed_work - queue work on a workqueue after delay + * @wq: workqueue to use + * @dwork: delayable work to queue + * @delay: number of jiffies to wait before queueing + * + * Equivalent to queue_delayed_work_on() but tries to use the local CPU. + */ +static inline bool queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); +} + +/** + * mod_delayed_work - modify delay of or queue a delayed work + * @wq: workqueue to use + * @dwork: work to queue + * @delay: number of jiffies to wait before queueing + * + * mod_delayed_work_on() on local CPU. + */ +static inline bool mod_delayed_work(struct workqueue_struct *wq, + struct delayed_work *dwork, + unsigned long delay) +{ + return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); +} + +/** + * schedule_work_on - put work task on a specific cpu + * @cpu: cpu to put the work task on + * @work: job to be done + * + * This puts a job on a specific cpu + */ +static inline bool schedule_work_on(int cpu, struct work_struct *work) +{ + return queue_work_on(cpu, system_wq, work); +} + +/** + * schedule_work - put work task in global workqueue + * @work: job to be done + * + * Returns %false if @work was already on the kernel-global workqueue and + * %true otherwise. + * + * This puts a job in the kernel-global workqueue if it was not already + * queued and leaves it in the same position on the kernel-global + * workqueue otherwise. + */ +static inline bool schedule_work(struct work_struct *work) +{ + return queue_work(system_wq, work); +} + +/** + * schedule_delayed_work_on - queue work in global workqueue on CPU after delay + * @cpu: cpu to use + * @dwork: job to be done + * @delay: number of jiffies to wait + * + * After waiting for a given time this puts a job in the kernel-global + * workqueue on the specified CPU. + */ +static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work_on(cpu, system_wq, dwork, delay); +} + +/** + * schedule_delayed_work - put work task in global workqueue after delay + * @dwork: job to be done + * @delay: number of jiffies to wait or 0 for immediate execution + * + * After waiting for a given time this puts a job in the kernel-global + * workqueue. + */ +static inline bool schedule_delayed_work(struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work(system_wq, dwork, delay); +} + +/** + * keventd_up - is workqueue initialized yet? + */ +static inline bool keventd_up(void) +{ + return system_wq != NULL; +} + /* * Like above, but uses del_timer() instead of del_timer_sync(). This means, * if it returns 0 the timer function may be running and the queueing is in diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 147fc5a784f0..f37421fb4f35 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1340,22 +1340,6 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(queue_work_on); -/** - * queue_work - queue work on a workqueue - * @wq: workqueue to use - * @work: work to queue - * - * Returns %false if @work was already on a queue, %true otherwise. - * - * We queue the work to the CPU on which it was submitted, but if the CPU dies - * it can be processed by another CPU. - */ -bool queue_work(struct workqueue_struct *wq, struct work_struct *work) -{ - return queue_work_on(WORK_CPU_UNBOUND, wq, work); -} -EXPORT_SYMBOL_GPL(queue_work); - void delayed_work_timer_fn(unsigned long __data) { struct delayed_work *dwork = (struct delayed_work *)__data; @@ -1430,21 +1414,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(queue_delayed_work_on); -/** - * queue_delayed_work - queue work on a workqueue after delay - * @wq: workqueue to use - * @dwork: delayable work to queue - * @delay: number of jiffies to wait before queueing - * - * Equivalent to queue_delayed_work_on() but tries to use the local CPU. - */ -bool queue_delayed_work(struct workqueue_struct *wq, - struct delayed_work *dwork, unsigned long delay) -{ - return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); -} -EXPORT_SYMBOL_GPL(queue_delayed_work); - /** * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU * @cpu: CPU number to execute work on @@ -1483,21 +1452,6 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(mod_delayed_work_on); -/** - * mod_delayed_work - modify delay of or queue a delayed work - * @wq: workqueue to use - * @dwork: work to queue - * @delay: number of jiffies to wait before queueing - * - * mod_delayed_work_on() on local CPU. - */ -bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, - unsigned long delay) -{ - return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); -} -EXPORT_SYMBOL_GPL(mod_delayed_work); - /** * worker_enter_idle - enter idle state * @worker: worker which is entering idle state @@ -3001,66 +2955,6 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork) } EXPORT_SYMBOL(cancel_delayed_work_sync); -/** - * schedule_work_on - put work task on a specific cpu - * @cpu: cpu to put the work task on - * @work: job to be done - * - * This puts a job on a specific cpu - */ -bool schedule_work_on(int cpu, struct work_struct *work) -{ - return queue_work_on(cpu, system_wq, work); -} -EXPORT_SYMBOL(schedule_work_on); - -/** - * schedule_work - put work task in global workqueue - * @work: job to be done - * - * Returns %false if @work was already on the kernel-global workqueue and - * %true otherwise. - * - * This puts a job in the kernel-global workqueue if it was not already - * queued and leaves it in the same position on the kernel-global - * workqueue otherwise. - */ -bool schedule_work(struct work_struct *work) -{ - return queue_work(system_wq, work); -} -EXPORT_SYMBOL(schedule_work); - -/** - * schedule_delayed_work_on - queue work in global workqueue on CPU after delay - * @cpu: cpu to use - * @dwork: job to be done - * @delay: number of jiffies to wait - * - * After waiting for a given time this puts a job in the kernel-global - * workqueue on the specified CPU. - */ -bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, - unsigned long delay) -{ - return queue_delayed_work_on(cpu, system_wq, dwork, delay); -} -EXPORT_SYMBOL(schedule_delayed_work_on); - -/** - * schedule_delayed_work - put work task in global workqueue after delay - * @dwork: job to be done - * @delay: number of jiffies to wait or 0 for immediate execution - * - * After waiting for a given time this puts a job in the kernel-global - * workqueue. - */ -bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) -{ - return queue_delayed_work(system_wq, dwork, delay); -} -EXPORT_SYMBOL(schedule_delayed_work); - /** * schedule_on_each_cpu - execute a function synchronously on each online CPU * @func: the function to call @@ -3154,11 +3048,6 @@ int execute_in_process_context(work_func_t fn, struct execute_work *ew) } EXPORT_SYMBOL_GPL(execute_in_process_context); -int keventd_up(void) -{ - return system_wq != NULL; -} - #ifdef CONFIG_SYSFS /* * Workqueues with WQ_SYSFS flag set is visible to userland via -- cgit From ae63b31e4d0e2ec09c569306ea46f664508ef717 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 May 2012 23:09:03 -0400 Subject: tracing: Separate out trace events from global variables The trace events for ftrace are all defined via global variables. The arrays of events and event systems are linked to a global list. This prevents multiple users of the event system (what to enable and what not to). By adding descriptors to represent the event/file relation, as well as to which trace_array descriptor they are associated with, allows for more than one set of events to be defined. Once the trace events files have a link between the trace event and the trace_array they are associated with, we can create multiple trace_arrays that can record separate events in separate buffers. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 51 ++- include/trace/ftrace.h | 3 +- kernel/trace/trace.c | 8 + kernel/trace/trace.h | 39 +- kernel/trace/trace_events.c | 776 +++++++++++++++++++++++++------------ kernel/trace/trace_events_filter.c | 5 +- 6 files changed, 622 insertions(+), 260 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 13a54d0bdfa8..c7191d482f98 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -182,18 +182,20 @@ extern int ftrace_event_reg(struct ftrace_event_call *event, enum trace_reg type, void *data); enum { - TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, - TRACE_EVENT_FL_RECORDED_CMD_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, }; +/* + * Event flags: + * FILTERED - The event has a filter attached + * CAP_ANY - Any user can enable for perf + * NO_SET_FILTER - Set when filter has error and is to be ignored + */ enum { - TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), - TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), @@ -203,12 +205,44 @@ struct ftrace_event_call { struct list_head list; struct ftrace_event_class *class; char *name; - struct dentry *dir; struct trace_event event; const char *print_fmt; struct event_filter *filter; + struct list_head *files; void *mod; void *data; + int flags; /* static flags of different events */ + +#ifdef CONFIG_PERF_EVENTS + int perf_refcount; + struct hlist_head __percpu *perf_events; +#endif +}; + +struct trace_array; +struct ftrace_subsystem_dir; + +enum { + FTRACE_EVENT_FL_ENABLED_BIT, + FTRACE_EVENT_FL_RECORDED_CMD_BIT, +}; + +/* + * Ftrace event file flags: + * ENABELD - The event is enabled + * RECORDED_CMD - The comms should be recorded at sched_switch + */ +enum { + FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), + FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT), +}; + +struct ftrace_event_file { + struct list_head list; + struct ftrace_event_call *event_call; + struct dentry *dir; + struct trace_array *tr; + struct ftrace_subsystem_dir *system; /* * 32 bit flags: @@ -223,17 +257,12 @@ struct ftrace_event_call { * * Note: Reads of flags do not hold the event_mutex since * they occur in critical sections. But the way flags - * is currently used, these changes do no affect the code + * is currently used, these changes do not affect the code * except that when a change is made, it may have a slight * delay in propagating the changes to other CPUs due to * caching and such. */ unsigned int flags; - -#ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head __percpu *perf_events; -#endif }; #define __TRACE_EVENT_FLAGS(name, value) \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 40dc5e8fe340..191d9661e277 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -518,7 +518,8 @@ static inline notrace int ftrace_get_offsets_##call( \ static notrace void \ ftrace_raw_event_##call(void *__data, proto) \ { \ - struct ftrace_event_call *event_call = __data; \ + struct ftrace_event_file *ftrace_file = __data; \ + struct ftrace_event_call *event_call = ftrace_file->event_call; \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4f1dade56981..932931897b8d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -189,6 +189,8 @@ unsigned long long ns2usecs(cycle_t nsec) */ static struct trace_array global_trace; +LIST_HEAD(ftrace_trace_arrays); + static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); int filter_current_check_discard(struct ring_buffer *buffer, @@ -5359,6 +5361,12 @@ __init static int tracer_alloc_buffers(void) register_die_notifier(&trace_die_notifier); + global_trace.flags = TRACE_ARRAY_FL_GLOBAL; + + INIT_LIST_HEAD(&global_trace.systems); + INIT_LIST_HEAD(&global_trace.events); + list_add(&global_trace.list, &ftrace_trace_arrays); + while (trace_boot_options) { char *option; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2081971367ea..037f7eb03d69 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -158,13 +158,39 @@ struct trace_array_cpu { */ struct trace_array { struct ring_buffer *buffer; + struct list_head list; int cpu; int buffer_disabled; + unsigned int flags; cycle_t time_start; + struct dentry *dir; + struct dentry *event_dir; + struct list_head systems; + struct list_head events; struct task_struct *waiter; struct trace_array_cpu *data[NR_CPUS]; }; +enum { + TRACE_ARRAY_FL_GLOBAL = (1 << 0) +}; + +extern struct list_head ftrace_trace_arrays; + +/* + * The global tracer (top) should be the first trace array added, + * but we check the flag anyway. + */ +static inline struct trace_array *top_trace_array(void) +{ + struct trace_array *tr; + + tr = list_entry(ftrace_trace_arrays.prev, + typeof(*tr), list); + WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); + return tr; +} + #define FTRACE_CMP_TYPE(var, type) \ __builtin_types_compatible_p(typeof(var), type *) @@ -851,12 +877,19 @@ struct event_filter { struct event_subsystem { struct list_head list; const char *name; - struct dentry *entry; struct event_filter *filter; - int nr_events; int ref_count; }; +struct ftrace_subsystem_dir { + struct list_head list; + struct event_subsystem *subsystem; + struct trace_array *tr; + struct dentry *entry; + int ref_count; + int nr_events; +}; + #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) @@ -914,7 +947,7 @@ extern void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s); extern int apply_event_filter(struct ftrace_event_call *call, char *filter_string); -extern int apply_subsystem_event_filter(struct event_subsystem *system, +extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 57e9b284250c..439955239bae 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -36,6 +36,19 @@ EXPORT_SYMBOL_GPL(event_storage); LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_common_fields); +/* Double loops, do not use break, only goto's work */ +#define do_for_each_event_file(tr, file) \ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ + list_for_each_entry(file, &tr->events, list) + +#define do_for_each_event_file_safe(tr, file) \ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ + struct ftrace_event_file *___n; \ + list_for_each_entry_safe(file, ___n, &tr->events, list) + +#define while_for_each_event_file() \ + } + struct list_head * trace_get_fields(struct ftrace_event_call *event_call) { @@ -149,15 +162,17 @@ EXPORT_SYMBOL_GPL(trace_event_raw_init); int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type, void *data) { + struct ftrace_event_file *file = data; + switch (type) { case TRACE_REG_REGISTER: return tracepoint_probe_register(call->name, call->class->probe, - call); + file); case TRACE_REG_UNREGISTER: tracepoint_probe_unregister(call->name, call->class->probe, - call); + file); return 0; #ifdef CONFIG_PERF_EVENTS @@ -183,54 +198,57 @@ EXPORT_SYMBOL_GPL(ftrace_event_reg); void trace_event_enable_cmd_record(bool enable) { - struct ftrace_event_call *call; + struct ftrace_event_file *file; + struct trace_array *tr; mutex_lock(&event_mutex); - list_for_each_entry(call, &ftrace_events, list) { - if (!(call->flags & TRACE_EVENT_FL_ENABLED)) + do_for_each_event_file(tr, file) { + + if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) continue; if (enable) { tracing_start_cmdline_record(); - call->flags |= TRACE_EVENT_FL_RECORDED_CMD; + file->flags |= FTRACE_EVENT_FL_RECORDED_CMD; } else { tracing_stop_cmdline_record(); - call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; + file->flags &= ~FTRACE_EVENT_FL_RECORDED_CMD; } - } + } while_for_each_event_file(); mutex_unlock(&event_mutex); } -static int ftrace_event_enable_disable(struct ftrace_event_call *call, - int enable) +static int ftrace_event_enable_disable(struct ftrace_event_file *file, + int enable) { + struct ftrace_event_call *call = file->event_call; int ret = 0; switch (enable) { case 0: - if (call->flags & TRACE_EVENT_FL_ENABLED) { - call->flags &= ~TRACE_EVENT_FL_ENABLED; - if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) { + if (file->flags & FTRACE_EVENT_FL_ENABLED) { + file->flags &= ~FTRACE_EVENT_FL_ENABLED; + if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) { tracing_stop_cmdline_record(); - call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; + file->flags &= ~FTRACE_EVENT_FL_RECORDED_CMD; } - call->class->reg(call, TRACE_REG_UNREGISTER, NULL); + call->class->reg(call, TRACE_REG_UNREGISTER, file); } break; case 1: - if (!(call->flags & TRACE_EVENT_FL_ENABLED)) { + if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { if (trace_flags & TRACE_ITER_RECORD_CMD) { tracing_start_cmdline_record(); - call->flags |= TRACE_EVENT_FL_RECORDED_CMD; + file->flags |= FTRACE_EVENT_FL_RECORDED_CMD; } - ret = call->class->reg(call, TRACE_REG_REGISTER, NULL); + ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { tracing_stop_cmdline_record(); pr_info("event trace: Could not enable event " "%s\n", call->name); break; } - call->flags |= TRACE_EVENT_FL_ENABLED; + file->flags |= FTRACE_EVENT_FL_ENABLED; } break; } @@ -238,13 +256,13 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, return ret; } -static void ftrace_clear_events(void) +static void ftrace_clear_events(struct trace_array *tr) { - struct ftrace_event_call *call; + struct ftrace_event_file *file; mutex_lock(&event_mutex); - list_for_each_entry(call, &ftrace_events, list) { - ftrace_event_enable_disable(call, 0); + list_for_each_entry(file, &tr->events, list) { + ftrace_event_enable_disable(file, 0); } mutex_unlock(&event_mutex); } @@ -257,6 +275,8 @@ static void __put_system(struct event_subsystem *system) if (--system->ref_count) return; + list_del(&system->list); + if (filter) { kfree(filter->filter_string); kfree(filter); @@ -271,24 +291,45 @@ static void __get_system(struct event_subsystem *system) system->ref_count++; } -static void put_system(struct event_subsystem *system) +static void __get_system_dir(struct ftrace_subsystem_dir *dir) +{ + WARN_ON_ONCE(dir->ref_count == 0); + dir->ref_count++; + __get_system(dir->subsystem); +} + +static void __put_system_dir(struct ftrace_subsystem_dir *dir) +{ + WARN_ON_ONCE(dir->ref_count == 0); + /* If the subsystem is about to be freed, the dir must be too */ + WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); + + __put_system(dir->subsystem); + if (!--dir->ref_count) + kfree(dir); +} + +static void put_system(struct ftrace_subsystem_dir *dir) { mutex_lock(&event_mutex); - __put_system(system); + __put_system_dir(dir); mutex_unlock(&event_mutex); } /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ -static int __ftrace_set_clr_event(const char *match, const char *sub, - const char *event, int set) +static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) { + struct ftrace_event_file *file; struct ftrace_event_call *call; int ret = -EINVAL; mutex_lock(&event_mutex); - list_for_each_entry(call, &ftrace_events, list) { + list_for_each_entry(file, &tr->events, list) { + + call = file->event_call; if (!call->name || !call->class || !call->class->reg) continue; @@ -307,7 +348,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub, if (event && strcmp(event, call->name) != 0) continue; - ftrace_event_enable_disable(call, set); + ftrace_event_enable_disable(file, set); ret = 0; } @@ -316,7 +357,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub, return ret; } -static int ftrace_set_clr_event(char *buf, int set) +static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { char *event = NULL, *sub = NULL, *match; @@ -344,7 +385,7 @@ static int ftrace_set_clr_event(char *buf, int set) event = NULL; } - return __ftrace_set_clr_event(match, sub, event, set); + return __ftrace_set_clr_event(tr, match, sub, event, set); } /** @@ -361,7 +402,9 @@ static int ftrace_set_clr_event(char *buf, int set) */ int trace_set_clr_event(const char *system, const char *event, int set) { - return __ftrace_set_clr_event(NULL, system, event, set); + struct trace_array *tr = top_trace_array(); + + return __ftrace_set_clr_event(tr, NULL, system, event, set); } EXPORT_SYMBOL_GPL(trace_set_clr_event); @@ -373,6 +416,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_parser parser; + struct seq_file *m = file->private_data; + struct trace_array *tr = m->private; ssize_t read, ret; if (!cnt) @@ -395,7 +440,7 @@ ftrace_event_write(struct file *file, const char __user *ubuf, parser.buffer[parser.idx] = 0; - ret = ftrace_set_clr_event(parser.buffer + !set, set); + ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); if (ret) goto out_put; } @@ -411,17 +456,20 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = v; + struct ftrace_event_file *file = v; + struct ftrace_event_call *call; + struct trace_array *tr = m->private; (*pos)++; - list_for_each_entry_continue(call, &ftrace_events, list) { + list_for_each_entry_continue(file, &tr->events, list) { + call = file->event_call; /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ if (call->class && call->class->reg) - return call; + return file; } return NULL; @@ -429,30 +477,32 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_call *call; + struct ftrace_event_file *file; + struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); - call = list_entry(&ftrace_events, struct ftrace_event_call, list); + file = list_entry(&tr->events, struct ftrace_event_file, list); for (l = 0; l <= *pos; ) { - call = t_next(m, call, &l); - if (!call) + file = t_next(m, file, &l); + if (!file) break; } - return call; + return file; } static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = v; + struct ftrace_event_file *file = v; + struct trace_array *tr = m->private; (*pos)++; - list_for_each_entry_continue(call, &ftrace_events, list) { - if (call->flags & TRACE_EVENT_FL_ENABLED) - return call; + list_for_each_entry_continue(file, &tr->events, list) { + if (file->flags & FTRACE_EVENT_FL_ENABLED) + return file; } return NULL; @@ -460,23 +510,25 @@ s_next(struct seq_file *m, void *v, loff_t *pos) static void *s_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_call *call; + struct ftrace_event_file *file; + struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); - call = list_entry(&ftrace_events, struct ftrace_event_call, list); + file = list_entry(&tr->events, struct ftrace_event_file, list); for (l = 0; l <= *pos; ) { - call = s_next(m, call, &l); - if (!call) + file = s_next(m, file, &l); + if (!file) break; } - return call; + return file; } static int t_show(struct seq_file *m, void *v) { - struct ftrace_event_call *call = v; + struct ftrace_event_file *file = v; + struct ftrace_event_call *call = file->event_call; if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); @@ -494,10 +546,10 @@ static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_file *file = filp->private_data; char *buf; - if (call->flags & TRACE_EVENT_FL_ENABLED) + if (file->flags & FTRACE_EVENT_FL_ENABLED) buf = "1\n"; else buf = "0\n"; @@ -509,10 +561,13 @@ static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_file *file = filp->private_data; unsigned long val; int ret; + if (!file) + return -EINVAL; + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; @@ -525,7 +580,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, case 0: case 1: mutex_lock(&event_mutex); - ret = ftrace_event_enable_disable(call, val); + ret = ftrace_event_enable_disable(file, val); mutex_unlock(&event_mutex); break; @@ -543,14 +598,18 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { const char set_to_char[4] = { '?', '0', '1', 'X' }; - struct event_subsystem *system = filp->private_data; + struct ftrace_subsystem_dir *dir = filp->private_data; + struct event_subsystem *system = dir->subsystem; struct ftrace_event_call *call; + struct ftrace_event_file *file; + struct trace_array *tr = dir->tr; char buf[2]; int set = 0; int ret; mutex_lock(&event_mutex); - list_for_each_entry(call, &ftrace_events, list) { + list_for_each_entry(file, &tr->events, list) { + call = file->event_call; if (!call->name || !call->class || !call->class->reg) continue; @@ -562,7 +621,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, * or if all events or cleared, or if we have * a mixture. */ - set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED)); + set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED)); /* * If we have a mixture, no need to look further. @@ -584,7 +643,8 @@ static ssize_t system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct event_subsystem *system = filp->private_data; + struct ftrace_subsystem_dir *dir = filp->private_data; + struct event_subsystem *system = dir->subsystem; const char *name = NULL; unsigned long val; ssize_t ret; @@ -607,7 +667,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, if (system) name = system->name; - ret = __ftrace_set_clr_event(NULL, name, NULL, val); + ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val); if (ret) goto out; @@ -845,43 +905,75 @@ static LIST_HEAD(event_subsystems); static int subsystem_open(struct inode *inode, struct file *filp) { struct event_subsystem *system = NULL; + struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */ + struct trace_array *tr; int ret; - if (!inode->i_private) - goto skip_search; - /* Make sure the system still exists */ mutex_lock(&event_mutex); - list_for_each_entry(system, &event_subsystems, list) { - if (system == inode->i_private) { - /* Don't open systems with no events */ - if (!system->nr_events) { - system = NULL; - break; + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + list_for_each_entry(dir, &tr->systems, list) { + if (dir == inode->i_private) { + /* Don't open systems with no events */ + if (dir->nr_events) { + __get_system_dir(dir); + system = dir->subsystem; + } + goto exit_loop; } - __get_system(system); - break; } } + exit_loop: mutex_unlock(&event_mutex); - if (system != inode->i_private) + if (!system) return -ENODEV; - skip_search: + /* Some versions of gcc think dir can be uninitialized here */ + WARN_ON(!dir); + ret = tracing_open_generic(inode, filp); - if (ret < 0 && system) - put_system(system); + if (ret < 0) + put_system(dir); + + return ret; +} + +static int system_tr_open(struct inode *inode, struct file *filp) +{ + struct ftrace_subsystem_dir *dir; + struct trace_array *tr = inode->i_private; + int ret; + + /* Make a temporary dir that has no system but points to tr */ + dir = kzalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) + return -ENOMEM; + + dir->tr = tr; + + ret = tracing_open_generic(inode, filp); + if (ret < 0) + kfree(dir); + + filp->private_data = dir; return ret; } static int subsystem_release(struct inode *inode, struct file *file) { - struct event_subsystem *system = inode->i_private; + struct ftrace_subsystem_dir *dir = file->private_data; - if (system) - put_system(system); + /* + * If dir->subsystem is NULL, then this is a temporary + * descriptor that was made for a trace_array to enable + * all subsystems. + */ + if (dir->subsystem) + put_system(dir); + else + kfree(dir); return 0; } @@ -890,7 +982,8 @@ static ssize_t subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct event_subsystem *system = filp->private_data; + struct ftrace_subsystem_dir *dir = filp->private_data; + struct event_subsystem *system = dir->subsystem; struct trace_seq *s; int r; @@ -915,7 +1008,7 @@ static ssize_t subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct event_subsystem *system = filp->private_data; + struct ftrace_subsystem_dir *dir = filp->private_data; char *buf; int err; @@ -932,7 +1025,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, } buf[cnt] = '\0'; - err = apply_subsystem_event_filter(system, buf); + err = apply_subsystem_event_filter(dir, buf); free_page((unsigned long) buf); if (err < 0) return err; @@ -1041,30 +1134,35 @@ static const struct file_operations ftrace_system_enable_fops = { .release = subsystem_release, }; +static const struct file_operations ftrace_tr_enable_fops = { + .open = system_tr_open, + .read = system_enable_read, + .write = system_enable_write, + .llseek = default_llseek, + .release = subsystem_release, +}; + static const struct file_operations ftrace_show_header_fops = { .open = tracing_open_generic, .read = show_header, .llseek = default_llseek, }; -static struct dentry *event_trace_events_dir(void) +static int +ftrace_event_open(struct inode *inode, struct file *file, + const struct seq_operations *seq_ops) { - static struct dentry *d_tracer; - static struct dentry *d_events; - - if (d_events) - return d_events; - - d_tracer = tracing_init_dentry(); - if (!d_tracer) - return NULL; + struct seq_file *m; + int ret; - d_events = debugfs_create_dir("events", d_tracer); - if (!d_events) - pr_warning("Could not create debugfs " - "'events' directory\n"); + ret = seq_open(file, seq_ops); + if (ret < 0) + return ret; + m = file->private_data; + /* copy tr over to seq ops */ + m->private = inode->i_private; - return d_events; + return ret; } static int @@ -1072,117 +1170,169 @@ ftrace_event_avail_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_event_seq_ops; - return seq_open(file, seq_ops); + return ftrace_event_open(inode, file, seq_ops); } static int ftrace_event_set_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_event_seq_ops; + struct trace_array *tr = inode->i_private; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - ftrace_clear_events(); + ftrace_clear_events(tr); - return seq_open(file, seq_ops); + return ftrace_event_open(inode, file, seq_ops); +} + +static struct event_subsystem * +create_new_subsystem(const char *name) +{ + struct event_subsystem *system; + + /* need to create new entry */ + system = kmalloc(sizeof(*system), GFP_KERNEL); + if (!system) + return NULL; + + system->ref_count = 1; + system->name = kstrdup(name, GFP_KERNEL); + + if (!system->name) + goto out_free; + + system->filter = NULL; + + system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); + if (!system->filter) + goto out_free; + + list_add(&system->list, &event_subsystems); + + return system; + + out_free: + kfree(system->name); + kfree(system); + return NULL; } static struct dentry * -event_subsystem_dir(const char *name, struct dentry *d_events) +event_subsystem_dir(struct trace_array *tr, const char *name, + struct ftrace_event_file *file, struct dentry *parent) { + struct ftrace_subsystem_dir *dir; struct event_subsystem *system; struct dentry *entry; /* First see if we did not already create this dir */ - list_for_each_entry(system, &event_subsystems, list) { + list_for_each_entry(dir, &tr->systems, list) { + system = dir->subsystem; if (strcmp(system->name, name) == 0) { - system->nr_events++; - return system->entry; + dir->nr_events++; + file->system = dir; + return dir->entry; } } - /* need to create new entry */ - system = kmalloc(sizeof(*system), GFP_KERNEL); - if (!system) { - pr_warning("No memory to create event subsystem %s\n", - name); - return d_events; + /* Now see if the system itself exists. */ + list_for_each_entry(system, &event_subsystems, list) { + if (strcmp(system->name, name) == 0) + break; } + /* Reset system variable when not found */ + if (&system->list == &event_subsystems) + system = NULL; - system->entry = debugfs_create_dir(name, d_events); - if (!system->entry) { - pr_warning("Could not create event subsystem %s\n", - name); - kfree(system); - return d_events; - } + dir = kmalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) + goto out_fail; - system->nr_events = 1; - system->ref_count = 1; - system->name = kstrdup(name, GFP_KERNEL); - if (!system->name) { - debugfs_remove(system->entry); - kfree(system); - return d_events; + if (!system) { + system = create_new_subsystem(name); + if (!system) + goto out_free; + } else + __get_system(system); + + dir->entry = debugfs_create_dir(name, parent); + if (!dir->entry) { + pr_warning("Failed to create system directory %s\n", name); + __put_system(system); + goto out_free; } - list_add(&system->list, &event_subsystems); - - system->filter = NULL; - - system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); - if (!system->filter) { - pr_warning("Could not allocate filter for subsystem " - "'%s'\n", name); - return system->entry; - } + dir->tr = tr; + dir->ref_count = 1; + dir->nr_events = 1; + dir->subsystem = system; + file->system = dir; - entry = debugfs_create_file("filter", 0644, system->entry, system, + entry = debugfs_create_file("filter", 0644, dir->entry, dir, &ftrace_subsystem_filter_fops); if (!entry) { kfree(system->filter); system->filter = NULL; - pr_warning("Could not create debugfs " - "'%s/filter' entry\n", name); + pr_warning("Could not create debugfs '%s/filter' entry\n", name); } - trace_create_file("enable", 0644, system->entry, system, + trace_create_file("enable", 0644, dir->entry, dir, &ftrace_system_enable_fops); - return system->entry; + list_add(&dir->list, &tr->systems); + + return dir->entry; + + out_free: + kfree(dir); + out_fail: + /* Only print this message if failed on memory allocation */ + if (!dir || !system) + pr_warning("No memory to create event subsystem %s\n", + name); + return NULL; } static int -event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, +event_create_dir(struct dentry *parent, + struct ftrace_event_file *file, const struct file_operations *id, const struct file_operations *enable, const struct file_operations *filter, const struct file_operations *format) { + struct ftrace_event_call *call = file->event_call; + struct trace_array *tr = file->tr; struct list_head *head; + struct dentry *d_events; int ret; /* * If the trace point header did not define TRACE_SYSTEM * then the system would be called "TRACE_SYSTEM". */ - if (strcmp(call->class->system, TRACE_SYSTEM) != 0) - d_events = event_subsystem_dir(call->class->system, d_events); - - call->dir = debugfs_create_dir(call->name, d_events); - if (!call->dir) { - pr_warning("Could not create debugfs " - "'%s' directory\n", call->name); + if (strcmp(call->class->system, TRACE_SYSTEM) != 0) { + d_events = event_subsystem_dir(tr, call->class->system, file, parent); + if (!d_events) + return -ENOMEM; + } else + d_events = parent; + + file->dir = debugfs_create_dir(call->name, d_events); + if (!file->dir) { + pr_warning("Could not create debugfs '%s' directory\n", + call->name); return -1; } if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) - trace_create_file("enable", 0644, call->dir, call, + trace_create_file("enable", 0644, file->dir, file, enable); #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg) - trace_create_file("id", 0444, call->dir, call, + trace_create_file("id", 0444, file->dir, call, id); #endif @@ -1196,23 +1346,76 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, if (ret < 0) { pr_warning("Could not initialize trace point" " events/%s\n", call->name); - return ret; + return -1; } } - trace_create_file("filter", 0644, call->dir, call, + trace_create_file("filter", 0644, file->dir, call, filter); - trace_create_file("format", 0444, call->dir, call, + trace_create_file("format", 0444, file->dir, call, format); return 0; } +static void remove_subsystem(struct ftrace_subsystem_dir *dir) +{ + if (!dir) + return; + + if (!--dir->nr_events) { + debugfs_remove_recursive(dir->entry); + list_del(&dir->list); + __put_system_dir(dir); + } +} + +static void remove_event_from_tracers(struct ftrace_event_call *call) +{ + struct ftrace_event_file *file; + struct trace_array *tr; + + do_for_each_event_file_safe(tr, file) { + + if (file->event_call != call) + continue; + + list_del(&file->list); + debugfs_remove_recursive(file->dir); + remove_subsystem(file->system); + kfree(file); + + /* + * The do_for_each_event_file_safe() is + * a double loop. After finding the call for this + * trace_array, we use break to jump to the next + * trace_array. + */ + break; + } while_for_each_event_file(); +} + static void event_remove(struct ftrace_event_call *call) { - ftrace_event_enable_disable(call, 0); + struct trace_array *tr; + struct ftrace_event_file *file; + + do_for_each_event_file(tr, file) { + if (file->event_call != call) + continue; + ftrace_event_enable_disable(file, 0); + /* + * The do_for_each_event_file() is + * a double loop. After finding the call for this + * trace_array, we use break to jump to the next + * trace_array. + */ + break; + } while_for_each_event_file(); + if (call->event.funcs) __unregister_ftrace_event(&call->event); + remove_event_from_tracers(call); list_del(&call->list); } @@ -1234,61 +1437,58 @@ static int event_init(struct ftrace_event_call *call) } static int -__trace_add_event_call(struct ftrace_event_call *call, struct module *mod, - const struct file_operations *id, - const struct file_operations *enable, - const struct file_operations *filter, - const struct file_operations *format) +__register_event(struct ftrace_event_call *call, struct module *mod) { - struct dentry *d_events; int ret; ret = event_init(call); if (ret < 0) return ret; - d_events = event_trace_events_dir(); - if (!d_events) - return -ENOENT; - - ret = event_create_dir(call, d_events, id, enable, filter, format); - if (!ret) - list_add(&call->list, &ftrace_events); + list_add(&call->list, &ftrace_events); call->mod = mod; - return ret; + return 0; } +/* Add an event to a trace directory */ +static int +__trace_add_new_event(struct ftrace_event_call *call, + struct trace_array *tr, + const struct file_operations *id, + const struct file_operations *enable, + const struct file_operations *filter, + const struct file_operations *format) +{ + struct ftrace_event_file *file; + + file = kzalloc(sizeof(*file), GFP_KERNEL); + if (!file) + return -ENOMEM; + + file->event_call = call; + file->tr = tr; + list_add(&file->list, &tr->events); + + return event_create_dir(tr->event_dir, file, id, enable, filter, format); +} + +struct ftrace_module_file_ops; +static void __add_event_to_tracers(struct ftrace_event_call *call, + struct ftrace_module_file_ops *file_ops); + /* Add an additional event_call dynamically */ int trace_add_event_call(struct ftrace_event_call *call) { int ret; mutex_lock(&event_mutex); - ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); - mutex_unlock(&event_mutex); - return ret; -} -static void remove_subsystem_dir(const char *name) -{ - struct event_subsystem *system; + ret = __register_event(call, NULL); + if (ret >= 0) + __add_event_to_tracers(call, NULL); - if (strcmp(name, TRACE_SYSTEM) == 0) - return; - - list_for_each_entry(system, &event_subsystems, list) { - if (strcmp(system->name, name) == 0) { - if (!--system->nr_events) { - debugfs_remove_recursive(system->entry); - list_del(&system->list); - __put_system(system); - } - break; - } - } + mutex_unlock(&event_mutex); + return ret; } /* @@ -1299,8 +1499,6 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) event_remove(call); trace_destroy_fields(call); destroy_preds(call); - debugfs_remove_recursive(call->dir); - remove_subsystem_dir(call->class->system); } /* Remove an event_call */ @@ -1335,6 +1533,17 @@ struct ftrace_module_file_ops { struct file_operations filter; }; +static struct ftrace_module_file_ops *find_ftrace_file_ops(struct module *mod) +{ + struct ftrace_module_file_ops *file_ops; + + list_for_each_entry(file_ops, &ftrace_module_file_list, list) { + if (file_ops->mod == mod) + return file_ops; + } + return NULL; +} + static struct ftrace_module_file_ops * trace_create_file_ops(struct module *mod) { @@ -1386,9 +1595,8 @@ static void trace_module_add_events(struct module *mod) return; for_each_event(call, start, end) { - __trace_add_event_call(*call, mod, - &file_ops->id, &file_ops->enable, - &file_ops->filter, &file_ops->format); + __register_event(*call, mod); + __add_event_to_tracers(*call, file_ops); } } @@ -1444,6 +1652,10 @@ static int trace_module_notify(struct notifier_block *self, return 0; } #else +static struct ftrace_module_file_ops *find_ftrace_file_ops(struct module *mod) +{ + return NULL; +} static int trace_module_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -1451,6 +1663,72 @@ static int trace_module_notify(struct notifier_block *self, } #endif /* CONFIG_MODULES */ +/* Create a new event directory structure for a trace directory. */ +static void +__trace_add_event_dirs(struct trace_array *tr) +{ + struct ftrace_module_file_ops *file_ops = NULL; + struct ftrace_event_call *call; + int ret; + + list_for_each_entry(call, &ftrace_events, list) { + if (call->mod) { + /* + * Directories for events by modules need to + * keep module ref counts when opened (as we don't + * want the module to disappear when reading one + * of these files). The file_ops keep account of + * the module ref count. + * + * As event_calls are added in groups by module, + * when we find one file_ops, we don't need to search for + * each call in that module, as the rest should be the + * same. Only search for a new one if the last one did + * not match. + */ + if (!file_ops || call->mod != file_ops->mod) + file_ops = find_ftrace_file_ops(call->mod); + if (!file_ops) + continue; /* Warn? */ + ret = __trace_add_new_event(call, tr, + &file_ops->id, &file_ops->enable, + &file_ops->filter, &file_ops->format); + if (ret < 0) + pr_warning("Could not create directory for event %s\n", + call->name); + continue; + } + ret = __trace_add_new_event(call, tr, + &ftrace_event_id_fops, + &ftrace_enable_fops, + &ftrace_event_filter_fops, + &ftrace_event_format_fops); + if (ret < 0) + pr_warning("Could not create directory for event %s\n", + call->name); + } +} + +static void +__add_event_to_tracers(struct ftrace_event_call *call, + struct ftrace_module_file_ops *file_ops) +{ + struct trace_array *tr; + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (file_ops) + __trace_add_new_event(call, tr, + &file_ops->id, &file_ops->enable, + &file_ops->filter, &file_ops->format); + else + __trace_add_new_event(call, tr, + &ftrace_event_id_fops, + &ftrace_enable_fops, + &ftrace_event_filter_fops, + &ftrace_event_format_fops); + } +} + static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, .priority = 0, @@ -1471,8 +1749,43 @@ static __init int setup_trace_event(char *str) } __setup("trace_event=", setup_trace_event); +int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) +{ + struct dentry *d_events; + struct dentry *entry; + + entry = debugfs_create_file("set_event", 0644, parent, + tr, &ftrace_set_event_fops); + if (!entry) { + pr_warning("Could not create debugfs 'set_event' entry\n"); + return -ENOMEM; + } + + d_events = debugfs_create_dir("events", parent); + if (!d_events) + pr_warning("Could not create debugfs 'events' directory\n"); + + /* ring buffer internal formats */ + trace_create_file("header_page", 0444, d_events, + ring_buffer_print_page_header, + &ftrace_show_header_fops); + + trace_create_file("header_event", 0444, d_events, + ring_buffer_print_entry_header, + &ftrace_show_header_fops); + + trace_create_file("enable", 0644, d_events, + tr, &ftrace_tr_enable_fops); + + tr->event_dir = d_events; + __trace_add_event_dirs(tr); + + return 0; +} + static __init int event_trace_enable(void) { + struct trace_array *tr = top_trace_array(); struct ftrace_event_call **iter, *call; char *buf = bootup_event_buf; char *token; @@ -1494,7 +1807,7 @@ static __init int event_trace_enable(void) if (!*token) continue; - ret = ftrace_set_clr_event(token, 1); + ret = ftrace_set_clr_event(tr, token, 1); if (ret) pr_warn("Failed to enable trace event: %s\n", token); } @@ -1506,61 +1819,29 @@ static __init int event_trace_enable(void) static __init int event_trace_init(void) { - struct ftrace_event_call *call; + struct trace_array *tr; struct dentry *d_tracer; struct dentry *entry; - struct dentry *d_events; int ret; + tr = top_trace_array(); + d_tracer = tracing_init_dentry(); if (!d_tracer) return 0; entry = debugfs_create_file("available_events", 0444, d_tracer, - NULL, &ftrace_avail_fops); + tr, &ftrace_avail_fops); if (!entry) pr_warning("Could not create debugfs " "'available_events' entry\n"); - entry = debugfs_create_file("set_event", 0644, d_tracer, - NULL, &ftrace_set_event_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_event' entry\n"); - - d_events = event_trace_events_dir(); - if (!d_events) - return 0; - - /* ring buffer internal formats */ - trace_create_file("header_page", 0444, d_events, - ring_buffer_print_page_header, - &ftrace_show_header_fops); - - trace_create_file("header_event", 0444, d_events, - ring_buffer_print_entry_header, - &ftrace_show_header_fops); - - trace_create_file("enable", 0644, d_events, - NULL, &ftrace_system_enable_fops); - if (trace_define_common_fields()) pr_warning("tracing: Failed to allocate common fields"); - /* - * Early initialization already enabled ftrace event. - * Now it's only necessary to create the event directory. - */ - list_for_each_entry(call, &ftrace_events, list) { - - ret = event_create_dir(call, d_events, - &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); - if (ret < 0) - event_remove(call); - } + ret = event_trace_add_tracer(d_tracer, tr); + if (ret) + return ret; ret = register_module_notifier(&trace_module_nb); if (ret) @@ -1627,13 +1908,20 @@ static __init void event_test_stuff(void) */ static __init void event_trace_self_tests(void) { + struct ftrace_subsystem_dir *dir; + struct ftrace_event_file *file; struct ftrace_event_call *call; struct event_subsystem *system; + struct trace_array *tr; int ret; + tr = top_trace_array(); + pr_info("Running tests on trace events:\n"); - list_for_each_entry(call, &ftrace_events, list) { + list_for_each_entry(file, &tr->events, list) { + + call = file->event_call; /* Only test those that have a probe */ if (!call->class || !call->class->probe) @@ -1657,15 +1945,15 @@ static __init void event_trace_self_tests(void) * If an event is already enabled, someone is using * it and the self test should not be on. */ - if (call->flags & TRACE_EVENT_FL_ENABLED) { + if (file->flags & FTRACE_EVENT_FL_ENABLED) { pr_warning("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; } - ftrace_event_enable_disable(call, 1); + ftrace_event_enable_disable(file, 1); event_test_stuff(); - ftrace_event_enable_disable(call, 0); + ftrace_event_enable_disable(file, 0); pr_cont("OK\n"); } @@ -1674,7 +1962,9 @@ static __init void event_trace_self_tests(void) pr_info("Running tests on trace event systems:\n"); - list_for_each_entry(system, &event_subsystems, list) { + list_for_each_entry(dir, &tr->systems, list) { + + system = dir->subsystem; /* the ftrace system is special, skip it */ if (strcmp(system->name, "ftrace") == 0) @@ -1682,7 +1972,7 @@ static __init void event_trace_self_tests(void) pr_info("Testing event system %s: ", system->name); - ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1); + ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); if (WARN_ON_ONCE(ret)) { pr_warning("error enabling system %s\n", system->name); @@ -1691,7 +1981,7 @@ static __init void event_trace_self_tests(void) event_test_stuff(); - ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0); + ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); if (WARN_ON_ONCE(ret)) { pr_warning("error disabling system %s\n", system->name); @@ -1706,7 +1996,7 @@ static __init void event_trace_self_tests(void) pr_info("Running tests on all trace events:\n"); pr_info("Testing all events: "); - ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1); + ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); if (WARN_ON_ONCE(ret)) { pr_warning("error enabling all events\n"); return; @@ -1715,7 +2005,7 @@ static __init void event_trace_self_tests(void) event_test_stuff(); /* reset sysname */ - ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0); + ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); if (WARN_ON_ONCE(ret)) { pr_warning("error disabling all events\n"); return; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e5b0ca8b8d4d..2a22a177ab44 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1907,16 +1907,17 @@ out_unlock: return err; } -int apply_subsystem_event_filter(struct event_subsystem *system, +int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, char *filter_string) { + struct event_subsystem *system = dir->subsystem; struct event_filter *filter; int err = 0; mutex_lock(&event_mutex); /* Make sure the system still has events */ - if (!system->nr_events) { + if (!dir->nr_events) { err = -ENODEV; goto out_unlock; } -- cgit From ccb469a198cffac94a7eea0b69f715f06e2ddf15 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Aug 2012 10:32:10 -0400 Subject: tracing: Pass the ftrace_file to the buffer lock reserve code Pass the struct ftrace_event_file *ftrace_file to the trace_event_buffer_lock_reserve() (new function that replaces the trace_current_buffer_lock_reserver()). The ftrace_file holds a pointer to the trace_array that is in use. In the case of multiple buffers with different trace_arrays, this allows different events to be recorded into different buffers. Also fixed some of the stale comments in include/trace/ftrace.h Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 7 +++++++ include/trace/ftrace.h | 9 +++++---- kernel/trace/trace.c | 12 ++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c7191d482f98..fd28c170c597 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -128,6 +128,13 @@ enum print_line_t { void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); +struct ftrace_event_file; + +struct ring_buffer_event * +trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, + struct ftrace_event_file *ftrace_file, + int type, unsigned long len, + unsigned long flags, int pc); struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, int type, unsigned long len, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 191d9661e277..e5d140a91fd7 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -414,7 +414,8 @@ static inline notrace int ftrace_get_offsets_##call( \ * * static void ftrace_raw_event_(void *__data, proto) * { - * struct ftrace_event_call *event_call = __data; + * struct ftrace_event_file *ftrace_file = __data; + * struct ftrace_event_call *event_call = ftrace_file->event_call; * struct ftrace_data_offsets_ __maybe_unused __data_offsets; * struct ring_buffer_event *event; * struct ftrace_raw_ *entry; <-- defined in stage 1 @@ -428,7 +429,7 @@ static inline notrace int ftrace_get_offsets_##call( \ * * __data_size = ftrace_get_offsets_(&__data_offsets, args); * - * event = trace_current_buffer_lock_reserve(&buffer, + * event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, * event_->event.type, * sizeof(*entry) + __data_size, * irq_flags, pc); @@ -440,7 +441,7 @@ static inline notrace int ftrace_get_offsets_##call( \ * __array macros. * * if (!filter_current_check_discard(buffer, event_call, entry, event)) - * trace_current_buffer_unlock_commit(buffer, + * trace_nowake_buffer_unlock_commit(buffer, * event, irq_flags, pc); * } * @@ -533,7 +534,7 @@ ftrace_raw_event_##call(void *__data, proto) \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ - event = trace_current_buffer_lock_reserve(&buffer, \ + event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, \ event_call->event.type, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 91fe40905828..29bff72f97ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1293,6 +1293,18 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); +struct ring_buffer_event * +trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, + struct ftrace_event_file *ftrace_file, + int type, unsigned long len, + unsigned long flags, int pc) +{ + *current_rb = ftrace_file->tr->buffer; + return trace_buffer_lock_reserve(*current_rb, + type, len, flags, pc); +} +EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); + struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, int type, unsigned long len, -- cgit From 15693458c4bc0693fd63a50d60f35b628fcf4e29 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 28 Feb 2013 19:59:17 -0500 Subject: tracing/ring-buffer: Move poll wake ups into ring buffer code Move the logic to wake up on ring buffer data into the ring buffer code itself. This simplifies the tracing code a lot and also has the added benefit that waiters on one of the instance buffers can be woken only when data is added to that instance instead of data added to any instance. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 6 ++ kernel/trace/ring_buffer.c | 146 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.c | 83 ++++--------------------- 3 files changed, 164 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1342e69542f3..d69cf637a15a 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -4,6 +4,7 @@ #include #include #include +#include struct ring_buffer; struct ring_buffer_iter; @@ -96,6 +97,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +void ring_buffer_wait(struct ring_buffer *buffer, int cpu); +int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, + struct file *filp, poll_table *poll_table); + + #define RING_BUFFER_ALL_CPUS -1 void ring_buffer_free(struct ring_buffer *buffer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7244acde77b0..56b6ea32d2e7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -442,6 +443,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) return ret; } +struct rb_irq_work { + struct irq_work work; + wait_queue_head_t waiters; + bool waiters_pending; +}; + /* * head_page == tail_page && head == tail then buffer is empty. */ @@ -476,6 +483,8 @@ struct ring_buffer_per_cpu { struct list_head new_pages; /* new pages to add */ struct work_struct update_pages_work; struct completion update_done; + + struct rb_irq_work irq_work; }; struct ring_buffer { @@ -495,6 +504,8 @@ struct ring_buffer { struct notifier_block cpu_notify; #endif u64 (*clock)(void); + + struct rb_irq_work irq_work; }; struct ring_buffer_iter { @@ -506,6 +517,118 @@ struct ring_buffer_iter { u64 read_stamp; }; +/* + * rb_wake_up_waiters - wake up tasks waiting for ring buffer input + * + * Schedules a delayed work to wake up any task that is blocked on the + * ring buffer waiters queue. + */ +static void rb_wake_up_waiters(struct irq_work *work) +{ + struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); + + wake_up_all(&rbwork->waiters); +} + +/** + * ring_buffer_wait - wait for input to the ring buffer + * @buffer: buffer to wait on + * @cpu: the cpu buffer to wait on + * + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon + * as data is added to any of the @buffer's cpu buffers. Otherwise + * it will wait for data to be added to a specific cpu buffer. + */ +void ring_buffer_wait(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + DEFINE_WAIT(wait); + struct rb_irq_work *work; + + /* + * Depending on what the caller is waiting for, either any + * data in any cpu buffer, or a specific buffer, put the + * caller on the appropriate wait queue. + */ + if (cpu == RING_BUFFER_ALL_CPUS) + work = &buffer->irq_work; + else { + cpu_buffer = buffer->buffers[cpu]; + work = &cpu_buffer->irq_work; + } + + + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); + + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + work->waiters_pending = true; + + if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || + (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) + schedule(); + + finish_wait(&work->waiters, &wait); +} + +/** + * ring_buffer_poll_wait - poll on buffer input + * @buffer: buffer to wait on + * @cpu: the cpu buffer to wait on + * @filp: the file descriptor + * @poll_table: The poll descriptor + * + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon + * as data is added to any of the @buffer's cpu buffers. Otherwise + * it will wait for data to be added to a specific cpu buffer. + * + * Returns POLLIN | POLLRDNORM if data exists in the buffers, + * zero otherwise. + */ +int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, + struct file *filp, poll_table *poll_table) +{ + struct ring_buffer_per_cpu *cpu_buffer; + struct rb_irq_work *work; + + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || + (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) + return POLLIN | POLLRDNORM; + + if (cpu == RING_BUFFER_ALL_CPUS) + work = &buffer->irq_work; + else { + cpu_buffer = buffer->buffers[cpu]; + work = &cpu_buffer->irq_work; + } + + work->waiters_pending = true; + poll_wait(filp, &work->waiters, poll_table); + + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || + (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) + return POLLIN | POLLRDNORM; + return 0; +} + /* buffer may be either ring_buffer or ring_buffer_per_cpu */ #define RB_WARN_ON(b, cond) \ ({ \ @@ -1061,6 +1184,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); init_completion(&cpu_buffer->update_done); + init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); @@ -1156,6 +1280,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, buffer->clock = trace_clock_local; buffer->reader_lock_key = key; + init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters); + /* need at least two pages */ if (nr_pages < 2) nr_pages = 2; @@ -2610,6 +2736,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, rb_end_commit(cpu_buffer); } +static __always_inline void +rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) +{ + if (buffer->irq_work.waiters_pending) { + buffer->irq_work.waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&buffer->irq_work.work); + } + + if (cpu_buffer->irq_work.waiters_pending) { + cpu_buffer->irq_work.waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&cpu_buffer->irq_work.work); + } +} + /** * ring_buffer_unlock_commit - commit a reserved * @buffer: The buffer to commit to @@ -2629,6 +2771,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, rb_commit(cpu_buffer, event); + rb_wakeups(buffer, cpu_buffer); + trace_recursive_unlock(); preempt_enable_notrace(); @@ -2801,6 +2945,8 @@ int ring_buffer_write(struct ring_buffer *buffer, rb_commit(cpu_buffer, event); + rb_wakeups(buffer, cpu_buffer); + ret = 0; out: preempt_enable_notrace(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ec146c96df4..b5b25b6575a9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -86,14 +85,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) */ static DEFINE_PER_CPU(bool, trace_cmdline_save); -/* - * When a reader is waiting for data, then this variable is - * set to true. - */ -static bool trace_wakeup_needed; - -static struct irq_work trace_work_wakeup; - /* * Kill all tracing for good (never come back). * It is initialized to 1 but will turn to zero if the initialization @@ -334,28 +325,12 @@ static inline void trace_access_lock_init(void) #endif -/* trace_wait is a waitqueue for tasks blocked on trace_poll */ -static DECLARE_WAIT_QUEUE_HEAD(trace_wait); - /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; -/** - * trace_wake_up - wake up tasks waiting for trace input - * - * Schedules a delayed work to wake up any task that is blocked on the - * trace_wait queue. These is used with trace_poll for tasks polling the - * trace. - */ -static void trace_wake_up(struct irq_work *work) -{ - wake_up_all(&trace_wait); - -} - /** * tracing_on - enable tracing buffers * @@ -763,36 +738,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) static void default_wait_pipe(struct trace_iterator *iter) { - DEFINE_WAIT(wait); - - prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); - - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - trace_wakeup_needed = true; - - if (trace_empty(iter)) - schedule(); + /* Iterators are static, they should be filled or empty */ + if (trace_buffer_iter(iter, iter->cpu_file)) + return; - finish_wait(&trace_wait, &wait); + ring_buffer_wait(iter->tr->buffer, iter->cpu_file); } /** @@ -1262,11 +1212,6 @@ void __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) { __this_cpu_write(trace_cmdline_save, true); - if (trace_wakeup_needed) { - trace_wakeup_needed = false; - /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&trace_work_wakeup); - } ring_buffer_unlock_commit(buffer, event); } @@ -3557,21 +3502,18 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) static unsigned int trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) { - if (trace_flags & TRACE_ITER_BLOCK) { + /* Iterators are static, they should be filled or empty */ + if (trace_buffer_iter(iter, iter->cpu_file)) + return POLLIN | POLLRDNORM; + + if (trace_flags & TRACE_ITER_BLOCK) /* * Always select as readable when in blocking mode */ return POLLIN | POLLRDNORM; - } else { - if (!trace_empty(iter)) - return POLLIN | POLLRDNORM; - trace_wakeup_needed = true; - poll_wait(filp, &trace_wait, poll_table); - if (!trace_empty(iter)) - return POLLIN | POLLRDNORM; - - return 0; - } + else + return ring_buffer_poll_wait(iter->tr->buffer, iter->cpu_file, + filp, poll_table); } static unsigned int @@ -5701,7 +5643,6 @@ __init static int tracer_alloc_buffers(void) #endif trace_init_cmdlines(); - init_irq_work(&trace_work_wakeup, trace_wake_up); register_tracer(&nop_trace); -- cgit From f71130de5c7fba92faf3901784714e37a234c08f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 21 Feb 2013 10:32:38 +0800 Subject: tracing: Add a helper function for event print functions Move duplicate code in event print functions to a helper function. This shrinks the size of the kernel by ~13K. text data bss dec hex filename 6596137 1743966 10138672 18478775 119f6b7 vmlinux.o.old 6583002 1743849 10138672 18465523 119c2f3 vmlinux.o.new Link: http://lkml.kernel.org/r/51258746.2060304@huawei.com Signed-off-by: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 8 ++++++-- include/trace/ftrace.h | 23 ++++++----------------- kernel/trace/trace_output.c | 26 ++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index fd28c170c597..4d79d2dc189c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -38,6 +38,12 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); +struct trace_iterator; +struct trace_event; + +int ftrace_raw_output_prep(struct trace_iterator *iter, + struct trace_event *event); + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: @@ -95,8 +101,6 @@ enum trace_iter_flags { }; -struct trace_event; - typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, int flags, struct trace_event *event); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index e5d140a91fd7..17a77fcac2a2 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -227,29 +227,18 @@ static notrace enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \ struct trace_event *trace_event) \ { \ - struct ftrace_event_call *event; \ struct trace_seq *s = &iter->seq; \ + struct trace_seq __maybe_unused *p = &iter->tmp_seq; \ struct ftrace_raw_##call *field; \ - struct trace_entry *entry; \ - struct trace_seq *p = &iter->tmp_seq; \ int ret; \ \ - event = container_of(trace_event, struct ftrace_event_call, \ - event); \ - \ - entry = iter->ent; \ + field = (typeof(field))iter->ent; \ \ - if (entry->type != event->event.type) { \ - WARN_ON_ONCE(1); \ - return TRACE_TYPE_UNHANDLED; \ - } \ - \ - field = (typeof(field))entry; \ - \ - trace_seq_init(p); \ - ret = trace_seq_printf(s, "%s: ", event->name); \ + ret = ftrace_raw_output_prep(iter, trace_event); \ if (ret) \ - ret = trace_seq_printf(s, print); \ + return ret; \ + \ + ret = trace_seq_printf(s, print); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 194d79602dc7..aa92ac322ba2 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -397,6 +397,32 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) } EXPORT_SYMBOL(ftrace_print_hex_seq); +int ftrace_raw_output_prep(struct trace_iterator *iter, + struct trace_event *trace_event) +{ + struct ftrace_event_call *event; + struct trace_seq *s = &iter->seq; + struct trace_seq *p = &iter->tmp_seq; + struct trace_entry *entry; + int ret; + + event = container_of(trace_event, struct ftrace_event_call, event); + entry = iter->ent; + + if (entry->type != event->event.type) { + WARN_ON_ONCE(1); + return TRACE_TYPE_UNHANDLED; + } + + trace_seq_init(p); + ret = trace_seq_printf(s, "%s: ", event->name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return 0; +} +EXPORT_SYMBOL(ftrace_raw_output_prep); + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { -- cgit From 2a30c11f6a037e2475f3c651bc57e697e79fa963 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 4 Mar 2013 22:27:04 -0500 Subject: tracing: Add comment for trace event flag IGNORE_ENABLE All the trace event flags have comments but the IGNORE_ENABLE flag which is set for ftrace internal events that should not be enabled via the debugfs "enable" file. That is, if the top level enable file is set, it will enable all events. It use to just check the ftrace event call descriptor "reg" field and skip those whithout it, but now some ftrace internal events have a reg field but still need to be skipped. The flag was created to ignore those events. Now document it. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4d79d2dc189c..0b0814d90164 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -204,6 +204,7 @@ enum { * FILTERED - The event has a filter attached * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored + * IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), -- cgit From 575380da8b46969a2c6a7e14a51056a63b30fe2e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 4 Mar 2013 23:05:12 -0500 Subject: tracing: Only clear trace buffer on module unload if event was traced Currently, when a module with events is unloaded, the trace buffer is cleared. This is just a safety net in case the module might have some strange callback when its event is outputted. But there's no reason to reset the buffer if the module didn't have any of its events traced. Add a flag to the event "call" structure called WAS_ENABLED and gets set when the event is ever enabled, and this flag never gets cleared. When a module gets unloaded, if any of its events have this flag set, then the trace buffer will get cleared. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 5 +++++ kernel/trace/trace_events.c | 12 ++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0b0814d90164..d6964244e567 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -197,6 +197,7 @@ enum { TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, + TRACE_EVENT_FL_WAS_ENABLED_BIT, }; /* @@ -205,12 +206,16 @@ enum { * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file + * WAS_ENABLED - Set and stays set when an event was ever enabled + * (used for module unloading, if a module event is enabled, + * it is best to clear the buffers that used it). */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), + TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), }; struct ftrace_event_call { diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0f1307a29fcf..9a7dc4bf1171 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -245,6 +245,9 @@ static int ftrace_event_enable_disable(struct ftrace_event_file *file, break; } file->flags |= FTRACE_EVENT_FL_ENABLED; + + /* WAS_ENABLED gets set but never cleared. */ + call->flags |= TRACE_EVENT_FL_WAS_ENABLED; } break; } @@ -1626,12 +1629,13 @@ static void trace_module_remove_events(struct module *mod) { struct ftrace_module_file_ops *file_ops; struct ftrace_event_call *call, *p; - bool found = false; + bool clear_trace = false; down_write(&trace_event_mutex); list_for_each_entry_safe(call, p, &ftrace_events, list) { if (call->mod == mod) { - found = true; + if (call->flags & TRACE_EVENT_FL_WAS_ENABLED) + clear_trace = true; __trace_remove_event_call(call); } } @@ -1648,9 +1652,9 @@ static void trace_module_remove_events(struct module *mod) /* * It is safest to reset the ring buffer if the module being unloaded - * registered any events. + * registered any events that were used. */ - if (found) + if (clear_trace) tracing_reset_current_online_cpus(); up_write(&trace_event_mutex); } -- cgit From 12883efb670c28dff57dcd7f4f995a1ffe153b2d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 Mar 2013 09:24:35 -0500 Subject: tracing: Consolidate max_tr into main trace_array structure Currently, the way the latency tracers and snapshot feature works is to have a separate trace_array called "max_tr" that holds the snapshot buffer. For latency tracers, this snapshot buffer is used to swap the running buffer with this buffer to save the current max latency. The only items needed for the max_tr is really just a copy of the buffer itself, the per_cpu data pointers, the time_start timestamp that states when the max latency was triggered, and the cpu that the max latency was triggered on. All other fields in trace_array are unused by the max_tr, making the max_tr mostly bloat. This change removes the max_tr completely, and adds a new structure called trace_buffer, that holds the buffer pointer, the per_cpu data pointers, the time_start timestamp, and the cpu where the latency occurred. The trace_array, now has two trace_buffers, one for the normal trace and one for the max trace or snapshot. By doing this, not only do we remove the bloat from the max_trace but the instances of traces can now use their own snapshot feature and not have just the top level global_trace have the snapshot feature and latency tracers for itself. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 + kernel/trace/blktrace.c | 4 +- kernel/trace/trace.c | 486 +++++++++++++++++++---------------- kernel/trace/trace.h | 37 ++- kernel/trace/trace_functions.c | 8 +- kernel/trace/trace_functions_graph.c | 12 +- kernel/trace/trace_irqsoff.c | 10 +- kernel/trace/trace_kdb.c | 8 +- kernel/trace/trace_mmiotrace.c | 12 +- kernel/trace/trace_output.c | 2 +- kernel/trace/trace_sched_switch.c | 8 +- kernel/trace/trace_sched_wakeup.c | 16 +- kernel/trace/trace_selftest.c | 42 +-- kernel/trace/trace_syscalls.c | 4 +- 14 files changed, 365 insertions(+), 286 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d6964244e567..d84c4a575514 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -8,6 +8,7 @@ #include struct trace_array; +struct trace_buffer; struct tracer; struct dentry; @@ -67,6 +68,7 @@ struct trace_entry { struct trace_iterator { struct trace_array *tr; struct tracer *trace; + struct trace_buffer *trace_buffer; void *private; int cpu_file; struct mutex mutex; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 71259e2b6b61..90a55054744c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -72,7 +72,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, bool blk_tracer = blk_tracer_enabled; if (blk_tracer) { - buffer = blk_tr->buffer; + buffer = blk_tr->trace_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len, @@ -218,7 +218,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, if (blk_tracer) { tracing_record_cmdline(current); - buffer = blk_tr->buffer; + buffer = blk_tr->trace_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c8a852a55db4..a08c127db865 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -195,27 +195,15 @@ cycle_t ftrace_now(int cpu) u64 ts; /* Early boot up does not have a buffer yet */ - if (!global_trace.buffer) + if (!global_trace.trace_buffer.buffer) return trace_clock_local(); - ts = ring_buffer_time_stamp(global_trace.buffer, cpu); - ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts); + ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu); + ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts); return ts; } -/* - * The max_tr is used to snapshot the global_trace when a maximum - * latency is reached. Some tracers will use this to store a maximum - * trace while it continues examining live traces. - * - * The buffers for the max_tr are set up the same as the global_trace. - * When a snapshot is taken, the link list of the max_tr is swapped - * with the link list of the global_trace and the buffers are reset for - * the global_trace so the tracing can continue. - */ -static struct trace_array max_tr; - int tracing_is_enabled(void) { return tracing_is_on(); @@ -339,8 +327,8 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | */ void tracing_on(void) { - if (global_trace.buffer) - ring_buffer_record_on(global_trace.buffer); + if (global_trace.trace_buffer.buffer) + ring_buffer_record_on(global_trace.trace_buffer.buffer); /* * This flag is only looked at when buffers haven't been * allocated yet. We don't really care about the race @@ -361,8 +349,8 @@ EXPORT_SYMBOL_GPL(tracing_on); */ void tracing_off(void) { - if (global_trace.buffer) - ring_buffer_record_off(global_trace.buffer); + if (global_trace.trace_buffer.buffer) + ring_buffer_record_off(global_trace.trace_buffer.buffer); /* * This flag is only looked at when buffers haven't been * allocated yet. We don't really care about the race @@ -378,8 +366,8 @@ EXPORT_SYMBOL_GPL(tracing_off); */ int tracing_is_on(void) { - if (global_trace.buffer) - return ring_buffer_record_is_on(global_trace.buffer); + if (global_trace.trace_buffer.buffer) + return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); return !global_trace.buffer_disabled; } EXPORT_SYMBOL_GPL(tracing_is_on); @@ -637,13 +625,14 @@ unsigned long __read_mostly tracing_max_latency; static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct trace_array_cpu *data = per_cpu_ptr(tr->data, cpu); - struct trace_array_cpu *max_data; + struct trace_buffer *trace_buf = &tr->trace_buffer; + struct trace_buffer *max_buf = &tr->max_buffer; + struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); + struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); - max_tr.cpu = cpu; - max_tr.time_start = data->preempt_timestamp; + max_buf->cpu = cpu; + max_buf->time_start = data->preempt_timestamp; - max_data = per_cpu_ptr(max_tr.data, cpu); max_data->saved_latency = tracing_max_latency; max_data->critical_start = data->critical_start; max_data->critical_end = data->critical_end; @@ -686,9 +675,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&ftrace_max_lock); - buf = tr->buffer; - tr->buffer = max_tr.buffer; - max_tr.buffer = buf; + buf = tr->trace_buffer.buffer; + tr->trace_buffer.buffer = tr->max_buffer.buffer; + tr->max_buffer.buffer = buf; __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&ftrace_max_lock); @@ -716,7 +705,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&ftrace_max_lock); - ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); + ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); if (ret == -EBUSY) { /* @@ -725,7 +714,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) * the max trace buffer (no one writes directly to it) * and flag that it failed. */ - trace_array_printk(&max_tr, _THIS_IP_, + trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, "Failed to swap buffers due to commit in progress\n"); } @@ -742,7 +731,7 @@ static void default_wait_pipe(struct trace_iterator *iter) if (trace_buffer_iter(iter, iter->cpu_file)) return; - ring_buffer_wait(iter->tr->buffer, iter->cpu_file); + ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); } /** @@ -803,17 +792,19 @@ int register_tracer(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); tr->current_trace = type; +#ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { /* If we expanded the buffers, make sure the max is expanded too */ if (ring_buffer_expanded) - ring_buffer_resize(max_tr.buffer, trace_buf_size, + ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, RING_BUFFER_ALL_CPUS); type->allocated_snapshot = true; } +#endif /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); @@ -827,16 +818,18 @@ int register_tracer(struct tracer *type) goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); +#ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { type->allocated_snapshot = false; /* Shrink the max buffer again */ if (ring_buffer_expanded) - ring_buffer_resize(max_tr.buffer, 1, + ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); } +#endif printk(KERN_CONT "PASSED\n"); } @@ -870,9 +863,9 @@ int register_tracer(struct tracer *type) return ret; } -void tracing_reset(struct trace_array *tr, int cpu) +void tracing_reset(struct trace_buffer *buf, int cpu) { - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = buf->buffer; if (!buffer) return; @@ -886,9 +879,9 @@ void tracing_reset(struct trace_array *tr, int cpu) ring_buffer_record_enable(buffer); } -void tracing_reset_online_cpus(struct trace_array *tr) +void tracing_reset_online_cpus(struct trace_buffer *buf) { - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = buf->buffer; int cpu; if (!buffer) @@ -899,7 +892,7 @@ void tracing_reset_online_cpus(struct trace_array *tr) /* Make sure all commits have finished */ synchronize_sched(); - tr->time_start = ftrace_now(tr->cpu); + buf->time_start = ftrace_now(buf->cpu); for_each_online_cpu(cpu) ring_buffer_reset_cpu(buffer, cpu); @@ -909,7 +902,7 @@ void tracing_reset_online_cpus(struct trace_array *tr) void tracing_reset_current(int cpu) { - tracing_reset(&global_trace, cpu); + tracing_reset(&global_trace.trace_buffer, cpu); } void tracing_reset_all_online_cpus(void) @@ -918,7 +911,10 @@ void tracing_reset_all_online_cpus(void) mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); +#ifdef CONFIG_TRACER_MAX_TRACE + tracing_reset_online_cpus(&tr->max_buffer); +#endif } mutex_unlock(&trace_types_lock); } @@ -988,13 +984,15 @@ void tracing_start(void) /* Prevent the buffers from switching */ arch_spin_lock(&ftrace_max_lock); - buffer = global_trace.buffer; + buffer = global_trace.trace_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); - buffer = max_tr.buffer; +#ifdef CONFIG_TRACER_MAX_TRACE + buffer = global_trace.max_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); +#endif arch_spin_unlock(&ftrace_max_lock); @@ -1026,7 +1024,7 @@ static void tracing_start_tr(struct trace_array *tr) goto out; } - buffer = tr->buffer; + buffer = tr->trace_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -1053,13 +1051,15 @@ void tracing_stop(void) /* Prevent the buffers from switching */ arch_spin_lock(&ftrace_max_lock); - buffer = global_trace.buffer; + buffer = global_trace.trace_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); - buffer = max_tr.buffer; +#ifdef CONFIG_TRACER_MAX_TRACE + buffer = global_trace.max_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); +#endif arch_spin_unlock(&ftrace_max_lock); @@ -1080,7 +1080,7 @@ static void tracing_stop_tr(struct trace_array *tr) if (tr->stop_count++) goto out; - buffer = tr->buffer; + buffer = tr->trace_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -1246,7 +1246,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, int type, unsigned long len, unsigned long flags, int pc) { - *current_rb = ftrace_file->tr->buffer; + *current_rb = ftrace_file->tr->trace_buffer.buffer; return trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); } @@ -1257,7 +1257,7 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, int type, unsigned long len, unsigned long flags, int pc) { - *current_rb = global_trace.buffer; + *current_rb = global_trace.trace_buffer.buffer; return trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); } @@ -1296,7 +1296,7 @@ trace_function(struct trace_array *tr, int pc) { struct ftrace_event_call *call = &event_function; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -1437,7 +1437,7 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL); } /** @@ -1453,7 +1453,8 @@ void trace_dump_stack(void) local_save_flags(flags); /* skipping 3 traces, seems to get us at the caller of this function */ - __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL); + __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, 3, + preempt_count(), NULL); } static DEFINE_PER_CPU(int, user_stack_count); @@ -1623,7 +1624,7 @@ void trace_printk_init_buffers(void) * directly here. If the global_trace.buffer is already * allocated here, then this was called by module code. */ - if (global_trace.buffer) + if (global_trace.trace_buffer.buffer) tracing_start_cmdline_record(); } @@ -1683,7 +1684,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; - buffer = tr->buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, flags, pc); if (!event) @@ -1706,27 +1707,12 @@ out: } EXPORT_SYMBOL_GPL(trace_vbprintk); -int trace_array_printk(struct trace_array *tr, - unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!(trace_flags & TRACE_ITER_PRINTK)) - return 0; - - va_start(ap, fmt); - ret = trace_array_vprintk(tr, ip, fmt, ap); - va_end(ap); - return ret; -} - -int trace_array_vprintk(struct trace_array *tr, - unsigned long ip, const char *fmt, va_list args) +static int +__trace_array_vprintk(struct ring_buffer *buffer, + unsigned long ip, const char *fmt, va_list args) { struct ftrace_event_call *call = &event_print; struct ring_buffer_event *event; - struct ring_buffer *buffer; int len = 0, size, pc; struct print_entry *entry; unsigned long flags; @@ -1754,7 +1740,6 @@ int trace_array_vprintk(struct trace_array *tr, local_save_flags(flags); size = sizeof(*entry) + len + 1; - buffer = tr->buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, flags, pc); if (!event) @@ -1775,6 +1760,42 @@ int trace_array_vprintk(struct trace_array *tr, return len; } +int trace_array_vprintk(struct trace_array *tr, + unsigned long ip, const char *fmt, va_list args) +{ + return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); +} + +int trace_array_printk(struct trace_array *tr, + unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + va_start(ap, fmt); + ret = trace_array_vprintk(tr, ip, fmt, ap); + va_end(ap); + return ret; +} + +int trace_array_printk_buf(struct ring_buffer *buffer, + unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + va_start(ap, fmt); + ret = __trace_array_vprintk(buffer, ip, fmt, ap); + va_end(ap); + return ret; +} + int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { return trace_array_vprintk(&global_trace, ip, fmt, args); @@ -1800,7 +1821,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, if (buf_iter) event = ring_buffer_iter_peek(buf_iter, ts); else - event = ring_buffer_peek(iter->tr->buffer, cpu, ts, + event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts, lost_events); if (event) { @@ -1815,7 +1836,7 @@ static struct trace_entry * __find_next_entry(struct trace_iterator *iter, int *ent_cpu, unsigned long *missing_events, u64 *ent_ts) { - struct ring_buffer *buffer = iter->tr->buffer; + struct ring_buffer *buffer = iter->trace_buffer->buffer; struct trace_entry *ent, *next = NULL; unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; @@ -1892,7 +1913,7 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter) static void trace_consume(struct trace_iterator *iter) { - ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, + ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts, &iter->lost_events); } @@ -1925,13 +1946,12 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) void tracing_iter_reset(struct trace_iterator *iter, int cpu) { - struct trace_array *tr = iter->tr; struct ring_buffer_event *event; struct ring_buffer_iter *buf_iter; unsigned long entries = 0; u64 ts; - per_cpu_ptr(tr->data, cpu)->skipped_entries = 0; + per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0; buf_iter = trace_buffer_iter(iter, cpu); if (!buf_iter) @@ -1945,13 +1965,13 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) * by the timestamp being before the start of the buffer. */ while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { - if (ts >= iter->tr->time_start) + if (ts >= iter->trace_buffer->time_start) break; entries++; ring_buffer_read(buf_iter, NULL); } - per_cpu_ptr(tr->data, cpu)->skipped_entries = entries; + per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries; } /* @@ -1978,8 +1998,10 @@ static void *s_start(struct seq_file *m, loff_t *pos) *iter->trace = *tr->current_trace; mutex_unlock(&trace_types_lock); +#ifdef CONFIG_TRACER_MAX_TRACE if (iter->snapshot && iter->trace->use_max_tr) return ERR_PTR(-EBUSY); +#endif if (!iter->snapshot) atomic_inc(&trace_record_cmdline_disabled); @@ -2021,17 +2043,21 @@ static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; +#ifdef CONFIG_TRACER_MAX_TRACE if (iter->snapshot && iter->trace->use_max_tr) return; +#endif if (!iter->snapshot) atomic_dec(&trace_record_cmdline_disabled); + trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } static void -get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries) +get_total_entries(struct trace_buffer *buf, + unsigned long *total, unsigned long *entries) { unsigned long count; int cpu; @@ -2040,19 +2066,19 @@ get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *e *entries = 0; for_each_tracing_cpu(cpu) { - count = ring_buffer_entries_cpu(tr->buffer, cpu); + count = ring_buffer_entries_cpu(buf->buffer, cpu); /* * If this buffer has skipped entries, then we hold all * entries for the trace and we need to ignore the * ones before the time stamp. */ - if (per_cpu_ptr(tr->data, cpu)->skipped_entries) { - count -= per_cpu_ptr(tr->data, cpu)->skipped_entries; + if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { + count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; /* total is the same as the entries */ *total += count; } else *total += count + - ring_buffer_overrun_cpu(tr->buffer, cpu); + ring_buffer_overrun_cpu(buf->buffer, cpu); *entries += count; } } @@ -2069,27 +2095,27 @@ static void print_lat_help_header(struct seq_file *m) seq_puts(m, "# \\ / ||||| \\ | / \n"); } -static void print_event_info(struct trace_array *tr, struct seq_file *m) +static void print_event_info(struct trace_buffer *buf, struct seq_file *m) { unsigned long total; unsigned long entries; - get_total_entries(tr, &total, &entries); + get_total_entries(buf, &total, &entries); seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", entries, total, num_online_cpus()); seq_puts(m, "#\n"); } -static void print_func_help_header(struct trace_array *tr, struct seq_file *m) +static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) { - print_event_info(tr, m); + print_event_info(buf, m); seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); seq_puts(m, "# | | | | |\n"); } -static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m) +static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) { - print_event_info(tr, m); + print_event_info(buf, m); seq_puts(m, "# _-----=> irqs-off\n"); seq_puts(m, "# / _----=> need-resched\n"); seq_puts(m, "# | / _---=> hardirq/softirq\n"); @@ -2103,8 +2129,8 @@ void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_array *tr = iter->tr; - struct trace_array_cpu *data = per_cpu_ptr(tr->data, tr->cpu); + struct trace_buffer *buf = iter->trace_buffer; + struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); struct tracer *type = iter->trace; unsigned long entries; unsigned long total; @@ -2112,7 +2138,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) name = type->name; - get_total_entries(tr, &total, &entries); + get_total_entries(buf, &total, &entries); seq_printf(m, "# %s latency trace v1.1.5 on %s\n", name, UTS_RELEASE); @@ -2123,7 +2149,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) nsecs_to_usecs(data->saved_latency), entries, total, - tr->cpu, + buf->cpu, #if defined(CONFIG_PREEMPT_NONE) "server", #elif defined(CONFIG_PREEMPT_VOLUNTARY) @@ -2174,7 +2200,7 @@ static void test_cpu_buff_start(struct trace_iterator *iter) if (cpumask_test_cpu(iter->cpu, iter->started)) return; - if (per_cpu_ptr(iter->tr->data, iter->cpu)->skipped_entries) + if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) return; cpumask_set_cpu(iter->cpu, iter->started); @@ -2304,7 +2330,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) return 0; } return 1; @@ -2316,7 +2342,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) return 0; } } @@ -2394,9 +2420,9 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->tr, m); + print_func_help_header_irq(iter->trace_buffer, m); else - print_func_help_header(iter->tr, m); + print_func_help_header(iter->trace_buffer, m); } } } @@ -2515,11 +2541,15 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; + iter->tr = tr; + +#ifdef CONFIG_TRACER_MAX_TRACE /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) - iter->tr = &max_tr; + iter->trace_buffer = &tr->max_buffer; else - iter->tr = tr; +#endif + iter->trace_buffer = &tr->trace_buffer; iter->snapshot = snapshot; iter->pos = -1; mutex_init(&iter->mutex); @@ -2530,7 +2560,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->tr->buffer)) + if (ring_buffer_overruns(iter->trace_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -2544,7 +2574,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu); } ring_buffer_read_prepare_sync(); for_each_tracing_cpu(cpu) { @@ -2554,7 +2584,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu); ring_buffer_read_prepare_sync(); ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); @@ -2593,12 +2623,7 @@ static int tracing_release(struct inode *inode, struct file *file) return 0; iter = m->private; - - /* Only the global tracer has a matching max_tr */ - if (iter->tr == &max_tr) - tr = &global_trace; - else - tr = iter->tr; + tr = iter->tr; mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { @@ -2634,9 +2659,9 @@ static int tracing_open(struct inode *inode, struct file *file) struct trace_array *tr = tc->tr; if (tc->cpu == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); else - tracing_reset(tr, tc->cpu); + tracing_reset(&tr->trace_buffer, tc->cpu); } if (file->f_mode & FMODE_READ) { @@ -2805,13 +2830,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, */ if (cpumask_test_cpu(cpu, tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_inc(&per_cpu_ptr(tr->data, cpu)->disabled); - ring_buffer_record_disable_cpu(tr->buffer, cpu); + atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); + ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu); } if (!cpumask_test_cpu(cpu, tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_dec(&per_cpu_ptr(tr->data, cpu)->disabled); - ring_buffer_record_enable_cpu(tr->buffer, cpu); + atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); + ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu); } } arch_spin_unlock(&ftrace_max_lock); @@ -2930,9 +2955,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) trace_event_enable_cmd_record(enabled); if (mask == TRACE_ITER_OVERWRITE) { - ring_buffer_change_overwrite(global_trace.buffer, enabled); + ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE - ring_buffer_change_overwrite(max_tr.buffer, enabled); + ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); #endif } @@ -3116,42 +3141,44 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int tracer_init(struct tracer *t, struct trace_array *tr) { - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); return t->init(tr); } -static void set_buffer_entries(struct trace_array *tr, unsigned long val) +static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) { int cpu; for_each_tracing_cpu(cpu) - per_cpu_ptr(tr->data, cpu)->entries = val; + per_cpu_ptr(buf->data, cpu)->entries = val; } +#ifdef CONFIG_TRACER_MAX_TRACE /* resize @tr's buffer to the size of @size_tr's entries */ -static int resize_buffer_duplicate_size(struct trace_array *tr, - struct trace_array *size_tr, int cpu_id) +static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, + struct trace_buffer *size_buf, int cpu_id) { int cpu, ret = 0; if (cpu_id == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { - ret = ring_buffer_resize(tr->buffer, - per_cpu_ptr(size_tr->data, cpu)->entries, cpu); + ret = ring_buffer_resize(trace_buf->buffer, + per_cpu_ptr(size_buf->data, cpu)->entries, cpu); if (ret < 0) break; - per_cpu_ptr(tr->data, cpu)->entries = - per_cpu_ptr(size_tr->data, cpu)->entries; + per_cpu_ptr(trace_buf->data, cpu)->entries = + per_cpu_ptr(size_buf->data, cpu)->entries; } } else { - ret = ring_buffer_resize(tr->buffer, - per_cpu_ptr(size_tr->data, cpu_id)->entries, cpu_id); + ret = ring_buffer_resize(trace_buf->buffer, + per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); if (ret == 0) - per_cpu_ptr(tr->data, cpu_id)->entries = - per_cpu_ptr(size_tr->data, cpu_id)->entries; + per_cpu_ptr(trace_buf->data, cpu_id)->entries = + per_cpu_ptr(size_buf->data, cpu_id)->entries; } return ret; } +#endif /* CONFIG_TRACER_MAX_TRACE */ static int __tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu) @@ -3166,20 +3193,22 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ring_buffer_expanded = 1; /* May be called before buffers are initialized */ - if (!tr->buffer) + if (!tr->trace_buffer.buffer) return 0; - ret = ring_buffer_resize(tr->buffer, size, cpu); + ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu); if (ret < 0) return ret; +#ifdef CONFIG_TRACER_MAX_TRACE if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) || !tr->current_trace->use_max_tr) goto out; - ret = ring_buffer_resize(max_tr.buffer, size, cpu); + ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); if (ret < 0) { - int r = resize_buffer_duplicate_size(tr, tr, cpu); + int r = resize_buffer_duplicate_size(&tr->trace_buffer, + &tr->trace_buffer, cpu); if (r < 0) { /* * AARGH! We are left with different @@ -3202,15 +3231,17 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, } if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&max_tr, size); + set_buffer_entries(&tr->max_buffer, size); else - per_cpu_ptr(max_tr.data, cpu)->entries = size; + per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size; out: +#endif /* CONFIG_TRACER_MAX_TRACE */ + if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(tr, size); + set_buffer_entries(&tr->trace_buffer, size); else - per_cpu_ptr(tr->data, cpu)->entries = size; + per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size; return ret; } @@ -3277,7 +3308,9 @@ static int tracing_set_tracer(const char *buf) static struct trace_option_dentry *topts; struct trace_array *tr = &global_trace; struct tracer *t; +#ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; +#endif int ret = 0; mutex_lock(&trace_types_lock); @@ -3308,7 +3341,10 @@ static int tracing_set_tracer(const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); +#ifdef CONFIG_TRACER_MAX_TRACE had_max_tr = tr->current_trace->allocated_snapshot; + + /* Current trace needs to be nop_trace before synchronize_sched */ tr->current_trace = &nop_trace; if (had_max_tr && !t->use_max_tr) { @@ -3325,22 +3361,28 @@ static int tracing_set_tracer(const char *buf) * The max_tr ring buffer has some state (e.g. ring->clock) and * we want preserve it. */ - ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); - set_buffer_entries(&max_tr, 1); - tracing_reset_online_cpus(&max_tr); + ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); + set_buffer_entries(&tr->max_buffer, 1); + tracing_reset_online_cpus(&tr->max_buffer); tr->current_trace->allocated_snapshot = false; } +#else + tr->current_trace = &nop_trace; +#endif destroy_trace_option_files(topts); topts = create_trace_option_files(tr, t); + +#ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { /* we need to make per cpu buffer sizes equivalent */ - ret = resize_buffer_duplicate_size(&max_tr, &global_trace, + ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->trace_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) goto out; t->allocated_snapshot = true; } +#endif if (t->init) { ret = tracer_init(t, tr); @@ -3468,6 +3510,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->cpu_file = tc->cpu; iter->tr = tc->tr; + iter->trace_buffer = &tc->tr->trace_buffer; mutex_init(&iter->mutex); filp->private_data = iter; @@ -3518,7 +3561,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl */ return POLLIN | POLLRDNORM; else - return ring_buffer_poll_wait(iter->tr->buffer, iter->cpu_file, + return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file, filp, poll_table); } @@ -3857,8 +3900,8 @@ tracing_entries_read(struct file *filp, char __user *ubuf, for_each_tracing_cpu(cpu) { /* fill in the size from first enabled cpu */ if (size == 0) - size = per_cpu_ptr(tr->data, cpu)->entries; - if (size != per_cpu_ptr(tr->data, cpu)->entries) { + size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries; + if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) { buf_size_same = 0; break; } @@ -3874,7 +3917,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } else r = sprintf(buf, "X\n"); } else - r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->data, tc->cpu)->entries >> 10); + r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10); mutex_unlock(&trace_types_lock); @@ -3921,7 +3964,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { - size += per_cpu_ptr(tr->data, cpu)->entries >> 10; + size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10; if (!ring_buffer_expanded) expanded_size += trace_buf_size >> 10; } @@ -4026,7 +4069,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ - buffer = global_trace.buffer; + buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (!event) { @@ -4111,16 +4154,19 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, tr->clock_id = i; - ring_buffer_set_clock(tr->buffer, trace_clocks[i].func); - if (tr->flags & TRACE_ARRAY_FL_GLOBAL && max_tr.buffer) - ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func); + ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func); /* * New clock may not be consistent with the previous clock. * Reset the buffer so that it doesn't have incomparable timestamps. */ - tracing_reset_online_cpus(&global_trace); - tracing_reset_online_cpus(&max_tr); + tracing_reset_online_cpus(&global_trace.trace_buffer); + +#ifdef CONFIG_TRACER_MAX_TRACE + if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) + ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); + tracing_reset_online_cpus(&global_trace.max_buffer); +#endif mutex_unlock(&trace_types_lock); @@ -4160,6 +4206,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) return -ENOMEM; } iter->tr = tc->tr; + iter->trace_buffer = &tc->tr->max_buffer; m->private = iter; file->private_data = m; } @@ -4196,18 +4243,18 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, case 0: if (tr->current_trace->allocated_snapshot) { /* free spare buffer */ - ring_buffer_resize(max_tr.buffer, 1, + ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); - set_buffer_entries(&max_tr, 1); - tracing_reset_online_cpus(&max_tr); + set_buffer_entries(&tr->max_buffer, 1); + tracing_reset_online_cpus(&tr->max_buffer); tr->current_trace->allocated_snapshot = false; } break; case 1: if (!tr->current_trace->allocated_snapshot) { /* allocate spare buffer */ - ret = resize_buffer_duplicate_size(&max_tr, - &global_trace, RING_BUFFER_ALL_CPUS); + ret = resize_buffer_duplicate_size(&tr->max_buffer, + &tr->trace_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) break; tr->current_trace->allocated_snapshot = true; @@ -4220,7 +4267,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, break; default: if (tr->current_trace->allocated_snapshot) - tracing_reset_online_cpus(&max_tr); + tracing_reset_online_cpus(&tr->max_buffer); break; } @@ -4338,6 +4385,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) info->iter.tr = tr; info->iter.cpu_file = tc->cpu; info->iter.trace = tr->current_trace; + info->iter.trace_buffer = &tr->trace_buffer; info->spare = NULL; /* Force reading ring buffer for first read */ info->read = (unsigned int)-1; @@ -4369,7 +4417,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return 0; if (!info->spare) - info->spare = ring_buffer_alloc_read_page(iter->tr->buffer, iter->cpu_file); + info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, + iter->cpu_file); if (!info->spare) return -ENOMEM; @@ -4379,7 +4428,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, again: trace_access_lock(iter->cpu_file); - ret = ring_buffer_read_page(iter->tr->buffer, + ret = ring_buffer_read_page(iter->trace_buffer->buffer, &info->spare, count, iter->cpu_file, 0); @@ -4421,7 +4470,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) struct trace_iterator *iter = &info->iter; if (info->spare) - ring_buffer_free_read_page(iter->tr->buffer, info->spare); + ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); kfree(info); return 0; @@ -4521,7 +4570,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, again: trace_access_lock(iter->cpu_file); - entries = ring_buffer_entries_cpu(iter->tr->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { struct page *page; @@ -4532,7 +4581,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; ref->ref = 1; - ref->buffer = iter->tr->buffer; + ref->buffer = iter->trace_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (!ref->page) { kfree(ref); @@ -4564,7 +4613,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.nr_pages++; *ppos += PAGE_SIZE; - entries = ring_buffer_entries_cpu(iter->tr->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); } trace_access_unlock(iter->cpu_file); @@ -4605,6 +4654,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, { struct trace_cpu *tc = filp->private_data; struct trace_array *tr = tc->tr; + struct trace_buffer *trace_buf = &tr->trace_buffer; struct trace_seq *s; unsigned long cnt; unsigned long long t; @@ -4617,41 +4667,41 @@ tracing_stats_read(struct file *filp, char __user *ubuf, trace_seq_init(s); - cnt = ring_buffer_entries_cpu(tr->buffer, cpu); + cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "entries: %ld\n", cnt); - cnt = ring_buffer_overrun_cpu(tr->buffer, cpu); + cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "overrun: %ld\n", cnt); - cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); + cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "commit overrun: %ld\n", cnt); - cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); + cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "bytes: %ld\n", cnt); if (trace_clocks[trace_clock_id].in_ns) { /* local or global for trace_clock */ - t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); + t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); usec_rem = do_div(t, USEC_PER_SEC); trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); - t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); + t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu)); usec_rem = do_div(t, USEC_PER_SEC); trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); } else { /* counter or tsc mode for trace_clock */ trace_seq_printf(s, "oldest event ts: %llu\n", - ring_buffer_oldest_event_ts(tr->buffer, cpu)); + ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); trace_seq_printf(s, "now ts: %llu\n", - ring_buffer_time_stamp(tr->buffer, cpu)); + ring_buffer_time_stamp(trace_buf->buffer, cpu)); } - cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); + cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "dropped events: %ld\n", cnt); - cnt = ring_buffer_read_events_cpu(tr->buffer, cpu); + cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "read events: %ld\n", cnt); count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); @@ -4754,7 +4804,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) static void tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) { - struct trace_array_cpu *data = per_cpu_ptr(tr->data, cpu); + struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu); struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); struct dentry *d_cpu; char cpu_dir[30]; /* 30 characters should be more than enough */ @@ -5038,7 +5088,7 @@ rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; char buf[64]; int r; @@ -5057,7 +5107,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; unsigned long val; int ret; @@ -5129,18 +5179,18 @@ static int new_instance_create(const char *name) rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; - tr->buffer = ring_buffer_alloc(trace_buf_size, rb_flags); - if (!tr->buffer) + tr->trace_buffer.buffer = ring_buffer_alloc(trace_buf_size, rb_flags); + if (!tr->trace_buffer.buffer) goto out_free_tr; - tr->data = alloc_percpu(struct trace_array_cpu); - if (!tr->data) + tr->trace_buffer.data = alloc_percpu(struct trace_array_cpu); + if (!tr->trace_buffer.data) goto out_free_tr; for_each_tracing_cpu(i) { - memset(per_cpu_ptr(tr->data, i), 0, sizeof(struct trace_array_cpu)); - per_cpu_ptr(tr->data, i)->trace_cpu.cpu = i; - per_cpu_ptr(tr->data, i)->trace_cpu.tr = tr; + memset(per_cpu_ptr(tr->trace_buffer.data, i), 0, sizeof(struct trace_array_cpu)); + per_cpu_ptr(tr->trace_buffer.data, i)->trace_cpu.cpu = i; + per_cpu_ptr(tr->trace_buffer.data, i)->trace_cpu.tr = tr; } /* Holder for file callbacks */ @@ -5164,8 +5214,8 @@ static int new_instance_create(const char *name) return 0; out_free_tr: - if (tr->buffer) - ring_buffer_free(tr->buffer); + if (tr->trace_buffer.buffer) + ring_buffer_free(tr->trace_buffer.buffer); kfree(tr->name); kfree(tr); @@ -5198,8 +5248,8 @@ static int instance_delete(const char *name) event_trace_del_tracer(tr); debugfs_remove_recursive(tr->dir); - free_percpu(tr->data); - ring_buffer_free(tr->buffer); + free_percpu(tr->trace_buffer.data); + ring_buffer_free(tr->trace_buffer.buffer); kfree(tr->name); kfree(tr); @@ -5439,6 +5489,7 @@ void trace_init_global_iter(struct trace_iterator *iter) iter->tr = &global_trace; iter->trace = iter->tr->current_trace; iter->cpu_file = RING_BUFFER_ALL_CPUS; + iter->trace_buffer = &global_trace.trace_buffer; } static void @@ -5476,7 +5527,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.tr->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled); } old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; @@ -5544,7 +5595,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) trace_flags |= old_userobj; for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.tr->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } tracing_on(); } @@ -5594,58 +5645,59 @@ __init static int tracer_alloc_buffers(void) raw_spin_lock_init(&global_trace.start_lock); /* TODO: make the number of buffers hot pluggable with CPUS */ - global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags); - if (!global_trace.buffer) { + global_trace.trace_buffer.buffer = ring_buffer_alloc(ring_buf_size, rb_flags); + if (!global_trace.trace_buffer.buffer) { printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); WARN_ON(1); goto out_free_cpumask; } - global_trace.data = alloc_percpu(struct trace_array_cpu); + global_trace.trace_buffer.data = alloc_percpu(struct trace_array_cpu); - if (!global_trace.data) { + if (!global_trace.trace_buffer.data) { printk(KERN_ERR "tracer: failed to allocate percpu memory!\n"); WARN_ON(1); goto out_free_cpumask; } for_each_tracing_cpu(i) { - memset(per_cpu_ptr(global_trace.data, i), 0, sizeof(struct trace_array_cpu)); - per_cpu_ptr(global_trace.data, i)->trace_cpu.cpu = i; - per_cpu_ptr(global_trace.data, i)->trace_cpu.tr = &global_trace; + memset(per_cpu_ptr(global_trace.trace_buffer.data, i), 0, + sizeof(struct trace_array_cpu)); + per_cpu_ptr(global_trace.trace_buffer.data, i)->trace_cpu.cpu = i; + per_cpu_ptr(global_trace.trace_buffer.data, i)->trace_cpu.tr = &global_trace; } if (global_trace.buffer_disabled) tracing_off(); #ifdef CONFIG_TRACER_MAX_TRACE - max_tr.data = alloc_percpu(struct trace_array_cpu); - if (!max_tr.data) { + global_trace.max_buffer.data = alloc_percpu(struct trace_array_cpu); + if (!global_trace.max_buffer.data) { printk(KERN_ERR "tracer: failed to allocate percpu memory!\n"); WARN_ON(1); goto out_free_cpumask; } - max_tr.buffer = ring_buffer_alloc(1, rb_flags); - raw_spin_lock_init(&max_tr.start_lock); - if (!max_tr.buffer) { + global_trace.max_buffer.buffer = ring_buffer_alloc(1, rb_flags); + if (!global_trace.max_buffer.buffer) { printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); WARN_ON(1); - ring_buffer_free(global_trace.buffer); + ring_buffer_free(global_trace.trace_buffer.buffer); goto out_free_cpumask; } for_each_tracing_cpu(i) { - memset(per_cpu_ptr(max_tr.data, i), 0, sizeof(struct trace_array_cpu)); - per_cpu_ptr(max_tr.data, i)->trace_cpu.cpu = i; - per_cpu_ptr(max_tr.data, i)->trace_cpu.tr = &max_tr; + memset(per_cpu_ptr(global_trace.max_buffer.data, i), 0, + sizeof(struct trace_array_cpu)); + per_cpu_ptr(global_trace.max_buffer.data, i)->trace_cpu.cpu = i; + per_cpu_ptr(global_trace.max_buffer.data, i)->trace_cpu.tr = &global_trace; } #endif /* Allocate the first page for all buffers */ - set_buffer_entries(&global_trace, - ring_buffer_size(global_trace.buffer, 0)); + set_buffer_entries(&global_trace.trace_buffer, + ring_buffer_size(global_trace.trace_buffer.buffer, 0)); #ifdef CONFIG_TRACER_MAX_TRACE - set_buffer_entries(&max_tr, 1); + set_buffer_entries(&global_trace.max_buffer, 1); #endif trace_init_cmdlines(); @@ -5682,8 +5734,10 @@ __init static int tracer_alloc_buffers(void) return 0; out_free_cpumask: - free_percpu(global_trace.data); - free_percpu(max_tr.data); + free_percpu(global_trace.trace_buffer.data); +#ifdef CONFIG_TRACER_MAX_TRACE + free_percpu(global_trace.max_buffer.data); +#endif free_cpumask_var(tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fa60b2977524..986834f1f4dd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -167,16 +167,37 @@ struct trace_array_cpu { struct tracer; +struct trace_buffer { + struct trace_array *tr; + struct ring_buffer *buffer; + struct trace_array_cpu __percpu *data; + cycle_t time_start; + int cpu; +}; + /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. * They have on/off state as well: */ struct trace_array { - struct ring_buffer *buffer; struct list_head list; char *name; - int cpu; + struct trace_buffer trace_buffer; +#ifdef CONFIG_TRACER_MAX_TRACE + /* + * The max_buffer is used to snapshot the trace when a maximum + * latency is reached, or when the user initiates a snapshot. + * Some tracers will use this to store a maximum trace while + * it continues examining live traces. + * + * The buffers for the max_buffer are set up the same as the trace_buffer + * When a snapshot is taken, the buffer of the max_buffer is swapped + * with the buffer of the trace_buffer and the buffers are reset for + * the trace_buffer so the tracing can continue. + */ + struct trace_buffer max_buffer; +#endif int buffer_disabled; struct trace_cpu trace_cpu; /* place holder */ #ifdef CONFIG_FTRACE_SYSCALLS @@ -189,7 +210,6 @@ struct trace_array { int clock_id; struct tracer *current_trace; unsigned int flags; - cycle_t time_start; raw_spinlock_t start_lock; struct dentry *dir; struct dentry *options; @@ -198,7 +218,6 @@ struct trace_array { struct list_head systems; struct list_head events; struct task_struct *waiter; - struct trace_array_cpu __percpu *data; }; enum { @@ -345,9 +364,11 @@ struct tracer { struct tracer *next; struct tracer_flags *flags; bool print_max; + bool enabled; +#ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; bool allocated_snapshot; - bool enabled; +#endif }; @@ -493,8 +514,8 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void tracing_reset(struct trace_array *tr, int cpu); -void tracing_reset_online_cpus(struct trace_array *tr); +void tracing_reset(struct trace_buffer *buf, int cpu); +void tracing_reset_online_cpus(struct trace_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); @@ -674,6 +695,8 @@ trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...); +int trace_array_printk_buf(struct ring_buffer *buffer, + unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 9d73861efc6a..e467c0c7bdd5 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -28,7 +28,7 @@ static void tracing_stop_function_trace(void); static int function_trace_init(struct trace_array *tr) { func_trace = tr; - tr->cpu = get_cpu(); + tr->trace_buffer.cpu = get_cpu(); put_cpu(); tracing_start_cmdline_record(); @@ -44,7 +44,7 @@ static void function_trace_reset(struct trace_array *tr) static void function_trace_start(struct trace_array *tr) { - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); } /* Our option */ @@ -76,7 +76,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, goto out; cpu = smp_processor_id(); - data = per_cpu_ptr(tr->data, cpu); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); if (!atomic_read(&data->disabled)) { local_save_flags(flags); trace_function(tr, ip, parent_ip, flags, pc); @@ -107,7 +107,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, */ local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->data, cpu); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ca986d61a282..8388bc99f2ee 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -218,7 +218,7 @@ int __trace_graph_entry(struct trace_array *tr, { struct ftrace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ent_entry *entry; if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) @@ -265,7 +265,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->data, cpu); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -323,7 +323,7 @@ void __trace_graph_return(struct trace_array *tr, { struct ftrace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ret_entry *entry; if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) @@ -350,7 +350,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->data, cpu); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -560,9 +560,9 @@ get_return_for_leaf(struct trace_iterator *iter, * We need to consume the current entry to see * the next one. */ - ring_buffer_consume(iter->tr->buffer, iter->cpu, + ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, NULL, NULL); - event = ring_buffer_peek(iter->tr->buffer, iter->cpu, + event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu, NULL, NULL); } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 9b52f9cf7a0d..5aa40ab72b57 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -121,7 +121,7 @@ static int func_prolog_dec(struct trace_array *tr, if (!irqs_disabled_flags(*flags)) return 0; - *data = per_cpu_ptr(tr->data, cpu); + *data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (likely(disabled == 1)) @@ -175,7 +175,7 @@ static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) per_cpu(tracing_cpu, cpu) = 0; tracing_max_latency = 0; - tracing_reset_online_cpus(irqsoff_trace); + tracing_reset_online_cpus(&irqsoff_trace->trace_buffer); return start_irqsoff_tracer(irqsoff_trace, set); } @@ -380,7 +380,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) if (per_cpu(tracing_cpu, cpu)) return; - data = per_cpu_ptr(tr->data, cpu); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); if (unlikely(!data) || atomic_read(&data->disabled)) return; @@ -418,7 +418,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) if (!tracer_enabled) return; - data = per_cpu_ptr(tr->data, cpu); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); if (unlikely(!data) || !data->critical_start || atomic_read(&data->disabled)) @@ -568,7 +568,7 @@ static void __irqsoff_tracer_init(struct trace_array *tr) irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); if (start_irqsoff_tracer(tr, is_graph())) printk(KERN_ERR "failed to start irqsoff tracer\n"); diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 349f6941e8f2..bd90e1b06088 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -26,7 +26,7 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.tr->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } old_userobj = trace_flags; @@ -46,14 +46,14 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) if (cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter.buffer_iter[cpu] = - ring_buffer_read_prepare(iter.tr->buffer, cpu); + ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu); ring_buffer_read_start(iter.buffer_iter[cpu]); tracing_iter_reset(&iter, cpu); } } else { iter.cpu_file = cpu_file; iter.buffer_iter[cpu_file] = - ring_buffer_read_prepare(iter.tr->buffer, cpu_file); + ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file); ring_buffer_read_start(iter.buffer_iter[cpu_file]); tracing_iter_reset(&iter, cpu_file); } @@ -83,7 +83,7 @@ out: trace_flags = old_userobj; for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.tr->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } for_each_tracing_cpu(cpu) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 2472f6f76b50..a5e8f4878bfa 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -31,7 +31,7 @@ static void mmio_reset_data(struct trace_array *tr) overrun_detected = false; prev_overruns = 0; - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); } static int mmio_trace_init(struct trace_array *tr) @@ -128,7 +128,7 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { unsigned long cnt = atomic_xchg(&dropped_count, 0); - unsigned long over = ring_buffer_overruns(iter->tr->buffer); + unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer); if (over > prev_overruns) cnt += over - prev_overruns; @@ -309,7 +309,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct mmiotrace_rw *rw) { struct ftrace_event_call *call = &event_mmiotrace_rw; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; int pc = preempt_count(); @@ -330,7 +330,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; - struct trace_array_cpu *data = per_cpu_ptr(tr->data, smp_processor_id()); + struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); __trace_mmiotrace_rw(tr, data, rw); } @@ -339,7 +339,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct mmiotrace_map *map) { struct ftrace_event_call *call = &event_mmiotrace_map; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; int pc = preempt_count(); @@ -363,7 +363,7 @@ void mmio_trace_mapping(struct mmiotrace_map *map) struct trace_array_cpu *data; preempt_disable(); - data = per_cpu_ptr(tr->data, smp_processor_id()); + data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); __trace_mmiotrace_map(tr, data, map); preempt_enable(); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index aa92ac322ba2..2edc7220d017 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -643,7 +643,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) { unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; - unsigned long long abs_ts = iter->ts - iter->tr->time_start; + unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start; unsigned long long rel_ts = next_ts - iter->ts; struct trace_seq *s = &iter->seq; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 1ffe39abd6fc..4e98e3b257a3 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -28,7 +28,7 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags, int pc) { struct ftrace_event_call *call = &event_context_switch; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -69,7 +69,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n pc = preempt_count(); local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(ctx_trace->data, cpu); + data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); if (likely(!atomic_read(&data->disabled))) tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc); @@ -86,7 +86,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct ftrace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; - struct ring_buffer *buffer = tr->buffer; + struct ring_buffer *buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, sizeof(*entry), flags, pc); @@ -123,7 +123,7 @@ probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) pc = preempt_count(); local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(ctx_trace->data, cpu); + data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); if (likely(!atomic_read(&data->disabled))) tracing_sched_wakeup_trace(ctx_trace, wakee, current, diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index f9ceb75a95b7..c16f8cd63c3c 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -89,7 +89,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (cpu != wakeup_current_cpu) goto out_enable; - *data = per_cpu_ptr(tr->data, cpu); + *data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (unlikely(disabled != 1)) goto out; @@ -353,7 +353,7 @@ probe_wakeup_sched_switch(void *ignore, /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); if (likely(disabled != 1)) goto out; @@ -365,7 +365,7 @@ probe_wakeup_sched_switch(void *ignore, goto out_unlock; /* The task we are waiting for is waking up */ - data = per_cpu_ptr(wakeup_trace->data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); @@ -387,7 +387,7 @@ out_unlock: arch_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); } static void __wakeup_reset(struct trace_array *tr) @@ -405,7 +405,7 @@ static void wakeup_reset(struct trace_array *tr) { unsigned long flags; - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); local_irq_save(flags); arch_spin_lock(&wakeup_lock); @@ -435,7 +435,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) return; pc = preempt_count(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; @@ -458,7 +458,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); @@ -472,7 +472,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) out_locked: arch_spin_unlock(&wakeup_lock); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); } static void start_wakeup_tracer(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 51c819c12c29..8672c40cb153 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -21,13 +21,13 @@ static inline int trace_valid_entry(struct trace_entry *entry) return 0; } -static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) +static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; unsigned int loops = 0; - while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) { + while ((event = ring_buffer_consume(buf->buffer, cpu, NULL, NULL))) { entry = ring_buffer_event_data(event); /* @@ -58,7 +58,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) * Test the trace buffer to see if all the elements * are still sane. */ -static int trace_test_buffer(struct trace_array *tr, unsigned long *count) +static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count) { unsigned long flags, cnt = 0; int cpu, ret = 0; @@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) local_irq_save(flags); arch_spin_lock(&ftrace_max_lock); - cnt = ring_buffer_entries(tr->buffer); + cnt = ring_buffer_entries(buf->buffer); /* * The trace_test_buffer_cpu runs a while loop to consume all data. @@ -78,7 +78,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) */ tracing_off(); for_each_possible_cpu(cpu) { - ret = trace_test_buffer_cpu(tr, cpu); + ret = trace_test_buffer_cpu(buf, cpu); if (ret) break; } @@ -355,7 +355,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, msleep(100); /* we should have nothing in the buffer */ - ret = trace_test_buffer(tr, &count); + ret = trace_test_buffer(&tr->trace_buffer, &count); if (ret) goto out; @@ -376,7 +376,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); + ret = trace_test_buffer(&tr->trace_buffer, &count); tracing_start(); /* we should only have one item */ @@ -666,7 +666,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); + ret = trace_test_buffer(&tr->trace_buffer, &count); trace->reset(tr); tracing_start(); @@ -737,7 +737,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, * Simulate the init() callback but we attach a watchdog callback * to detect and recover from possible hangs */ - tracing_reset_online_cpus(tr); + tracing_reset_online_cpus(&tr->trace_buffer); set_graph_array(tr); ret = register_ftrace_graph(&trace_graph_return, &trace_graph_entry_watchdog); @@ -760,7 +760,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); + ret = trace_test_buffer(&tr->trace_buffer, &count); trace->reset(tr); tracing_start(); @@ -815,9 +815,9 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(tr, NULL); + ret = trace_test_buffer(&tr->trace_buffer, NULL); if (!ret) - ret = trace_test_buffer(&max_tr, &count); + ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); tracing_start(); @@ -877,9 +877,9 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(tr, NULL); + ret = trace_test_buffer(&tr->trace_buffer, NULL); if (!ret) - ret = trace_test_buffer(&max_tr, &count); + ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); tracing_start(); @@ -943,11 +943,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(tr, NULL); + ret = trace_test_buffer(&tr->trace_buffer, NULL); if (ret) goto out; - ret = trace_test_buffer(&max_tr, &count); + ret = trace_test_buffer(&tr->max_buffer, &count); if (ret) goto out; @@ -973,11 +973,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(tr, NULL); + ret = trace_test_buffer(&tr->trace_buffer, NULL); if (ret) goto out; - ret = trace_test_buffer(&max_tr, &count); + ret = trace_test_buffer(&tr->max_buffer, &count); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -1084,10 +1084,10 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(tr, NULL); + ret = trace_test_buffer(&tr->trace_buffer, NULL); printk("ret = %d\n", ret); if (!ret) - ret = trace_test_buffer(&max_tr, &count); + ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); @@ -1126,7 +1126,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr /* stop the tracing. */ tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); + ret = trace_test_buffer(&tr->trace_buffer, &count); trace->reset(tr); tracing_start(); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 1cd37ffb4093..68f3f344be65 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -321,7 +321,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - buffer = tr->buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->enter_event->event.type, size, 0, 0); if (!event) @@ -355,7 +355,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; - buffer = tr->buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), 0, 0); if (!event) -- cgit From ad909e21bbe69f1d39055d346540abd827190eca Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 6 Mar 2013 21:45:37 -0500 Subject: tracing: Add internal tracing_snapshot() functions The new snapshot feature is quite handy. It's a way for the user to take advantage of the spare buffer that, until then, only the latency tracers used to "snapshot" the buffer when it hit a max latency. Now users can trigger a "snapshot" manually when some condition is hit in a program. But a snapshot currently can not be triggered by a condition inside the kernel. With the addition of tracing_snapshot() and tracing_snapshot_alloc(), snapshots can now be taking when a condition is hit, and the developer wants to snapshot the case without stopping the trace. Note, any snapshot will overwrite the old one, so take care in how this is done. These new functions are to be used like tracing_on(), tracing_off() and trace_printk() are. That is, they should never be called in the mainline Linux kernel. They are solely for the purpose of debugging. The tracing_snapshot() will not allocate a buffer, but it is safe to be called from any context (except NMIs). But if a snapshot buffer isn't allocated when it is called, it will write to the live buffer, complaining about the lack of a snapshot buffer, and then stop tracing (giving you the "permanent snapshot"). tracing_snapshot_alloc() will allocate the snapshot buffer if it was not already allocated and then take the snapshot. This routine *may sleep*, and must be called from context that can sleep. The allocation is done with GFP_KERNEL and not atomic. If you need a snapshot in an atomic context, say in early boot, then it is best to call the tracing_snapshot_alloc() before then, where it will allocate the buffer, and then you can use the tracing_snapshot() anywhere you want and still get snapshots. Cc: Hiraku Toyooka Cc: Thomas Gleixner Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 4 +++ kernel/trace/trace.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index c566927efcbd..bc5392a326ab 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -483,6 +483,8 @@ enum ftrace_dump_mode { void tracing_on(void); void tracing_off(void); int tracing_is_on(void); +void tracing_snapshot(void); +void tracing_snapshot_alloc(void); extern void tracing_start(void); extern void tracing_stop(void); @@ -570,6 +572,8 @@ static inline void trace_dump_stack(void) { } static inline void tracing_on(void) { } static inline void tracing_off(void) { } static inline int tracing_is_on(void) { return 0; } +static inline void tracing_snapshot(void) { } +static inline void tracing_snapshot_alloc(void) { } static inline __printf(1, 2) int trace_printk(const char *fmt, ...) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3a89496dc99b..307524d784ec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -339,6 +339,90 @@ void tracing_on(void) } EXPORT_SYMBOL_GPL(tracing_on); +#ifdef CONFIG_TRACER_SNAPSHOT +/** + * trace_snapshot - take a snapshot of the current buffer. + * + * This causes a swap between the snapshot buffer and the current live + * tracing buffer. You can use this to take snapshots of the live + * trace when some condition is triggered, but continue to trace. + * + * Note, make sure to allocate the snapshot with either + * a tracing_snapshot_alloc(), or by doing it manually + * with: echo 1 > /sys/kernel/debug/tracing/snapshot + * + * If the snapshot buffer is not allocated, it will stop tracing. + * Basically making a permanent snapshot. + */ +void tracing_snapshot(void) +{ + struct trace_array *tr = &global_trace; + struct tracer *tracer = tr->current_trace; + unsigned long flags; + + if (!tr->allocated_snapshot) { + trace_printk("*** SNAPSHOT NOT ALLOCATED ***\n"); + trace_printk("*** stopping trace here! ***\n"); + tracing_off(); + return; + } + + /* Note, snapshot can not be used when the tracer uses it */ + if (tracer->use_max_tr) { + trace_printk("*** LATENCY TRACER ACTIVE ***\n"); + trace_printk("*** Can not use snapshot (sorry) ***\n"); + return; + } + + local_irq_save(flags); + update_max_tr(tr, current, smp_processor_id()); + local_irq_restore(flags); +} + +static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, + struct trace_buffer *size_buf, int cpu_id); + +/** + * trace_snapshot_alloc - allocate and take a snapshot of the current buffer. + * + * This is similar to trace_snapshot(), but it will allocate the + * snapshot buffer if it isn't already allocated. Use this only + * where it is safe to sleep, as the allocation may sleep. + * + * This causes a swap between the snapshot buffer and the current live + * tracing buffer. You can use this to take snapshots of the live + * trace when some condition is triggered, but continue to trace. + */ +void tracing_snapshot_alloc(void) +{ + struct trace_array *tr = &global_trace; + int ret; + + if (!tr->allocated_snapshot) { + + /* allocate spare buffer */ + ret = resize_buffer_duplicate_size(&tr->max_buffer, + &tr->trace_buffer, RING_BUFFER_ALL_CPUS); + if (WARN_ON(ret < 0)) + return; + + tr->allocated_snapshot = true; + } + + tracing_snapshot(); +} +#else +void tracing_snapshot(void) +{ + WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); +} +void tracing_snapshot_alloc(void) +{ + /* Give warning */ + tracing_snapshot(); +} +#endif /* CONFIG_TRACER_SNAPSHOT */ + /** * tracing_off - turn off tracing buffers * -- cgit From 09ae72348eccb60e304cf8ce94653f4a78fcd407 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 8 Mar 2013 21:02:34 -0500 Subject: tracing: Add trace_puts() for even faster trace_printk() tracing The trace_printk() is extremely fast and is very handy as it can be used in any context (including NMIs!). But it still requires scanning the fmt string for parsing the args. Even the trace_bprintk() requires a scan to know what args will be saved, although it doesn't copy the format string itself. Several times trace_printk() has no args, and wastes cpu cycles scanning the fmt string. Adding trace_puts() allows the developer to use an even faster tracing method that only saves the pointer to the string in the ring buffer without doing any format parsing at all. This will help remove even more of the "Heisenbug" effect, when debugging. Also fixed up the F_printk()s for the ftrace internal bprint and print events. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 41 +++++++++++++++++++++++- kernel/trace/trace.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 2 ++ kernel/trace/trace_entries.h | 23 +++++++++++--- kernel/trace/trace_output.c | 75 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_output.h | 2 ++ 6 files changed, 214 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bc5392a326ab..a3a5574a61fc 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -514,7 +514,8 @@ do { \ * * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_printks scattered around in - * your code. + * your code. (Extra memory is used for special buffers that are + * allocated when trace_printk() is used) */ #define trace_printk(fmt, args...) \ @@ -537,6 +538,44 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...); extern __printf(2, 3) int __trace_printk(unsigned long ip, const char *fmt, ...); +/** + * trace_puts - write a string into the ftrace buffer + * @str: the string to record + * + * Note: __trace_bputs is an internal function for trace_puts and + * the @ip is passed in via the trace_puts macro. + * + * This is similar to trace_printk() but is made for those really fast + * paths that a developer wants the least amount of "Heisenbug" affects, + * where the processing of the print format is still too much. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving trace_puts scattered around in + * your code. (Extra memory is used for special buffers that are + * allocated when trace_puts() is used) + * + * Returns: 0 if nothing was written, positive # if string was. + * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) + */ + +extern int __trace_bputs(unsigned long ip, const char *str); +extern int __trace_puts(unsigned long ip, const char *str, int size); +#define trace_puts(str) ({ \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(str) ? str : NULL; \ + \ + if (__builtin_constant_p(str)) \ + __trace_bputs(_THIS_IP_, trace_printk_fmt); \ + else \ + __trace_puts(_THIS_IP_, str, strlen(str)); \ +}) + extern void trace_dump_stack(void); /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4021a5e66412..5043a0c4dde0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -350,6 +350,77 @@ void tracing_on(void) } EXPORT_SYMBOL_GPL(tracing_on); +/** + * __trace_puts - write a constant string into the trace buffer. + * @ip: The address of the caller + * @str: The constant string to write + * @size: The size of the string. + */ +int __trace_puts(unsigned long ip, const char *str, int size) +{ + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct print_entry *entry; + unsigned long irq_flags; + int alloc; + + alloc = sizeof(*entry) + size + 2; /* possible \n added */ + + local_save_flags(irq_flags); + buffer = global_trace.trace_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + irq_flags, preempt_count()); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + + memcpy(&entry->buf, str, size); + + /* Add a newline if necessary */ + if (entry->buf[size - 1] != '\n') { + entry->buf[size] = '\n'; + entry->buf[size + 1] = '\0'; + } else + entry->buf[size] = '\0'; + + __buffer_unlock_commit(buffer, event); + + return size; +} +EXPORT_SYMBOL_GPL(__trace_puts); + +/** + * __trace_bputs - write the pointer to a constant string into trace buffer + * @ip: The address of the caller + * @str: The constant string to write to the buffer to + */ +int __trace_bputs(unsigned long ip, const char *str) +{ + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct bputs_entry *entry; + unsigned long irq_flags; + int size = sizeof(struct bputs_entry); + + local_save_flags(irq_flags); + buffer = global_trace.trace_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, + irq_flags, preempt_count()); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->str = str; + + __buffer_unlock_commit(buffer, event); + + return 1; +} +EXPORT_SYMBOL_GPL(__trace_bputs); + #ifdef CONFIG_TRACER_SNAPSHOT /** * trace_snapshot - take a snapshot of the current buffer. @@ -2475,6 +2546,11 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) return ret; } + if (iter->ent->type == TRACE_BPUTS && + trace_flags & TRACE_ITER_PRINTK && + trace_flags & TRACE_ITER_PRINTK_MSGONLY) + return trace_print_bputs_msg_only(iter); + if (iter->ent->type == TRACE_BPRINT && trace_flags & TRACE_ITER_PRINTK && trace_flags & TRACE_ITER_PRINTK_MSGONLY) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 26bc71834041..d5764a8532e2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -34,6 +34,7 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BLK, + TRACE_BPUTS, __TRACE_LAST_TYPE, }; @@ -277,6 +278,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ + IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 4108e1250ca2..e2d027ac66a2 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -223,8 +223,8 @@ FTRACE_ENTRY(bprint, bprint_entry, __dynamic_array( u32, buf ) ), - F_printk("%08lx fmt:%p", - __entry->ip, __entry->fmt), + F_printk("%pf: %s", + (void *)__entry->ip, __entry->fmt), FILTER_OTHER ); @@ -238,8 +238,23 @@ FTRACE_ENTRY(print, print_entry, __dynamic_array( char, buf ) ), - F_printk("%08lx %s", - __entry->ip, __entry->buf), + F_printk("%pf: %s", + (void *)__entry->ip, __entry->buf), + + FILTER_OTHER +); + +FTRACE_ENTRY(bputs, bputs_entry, + + TRACE_BPUTS, + + F_STRUCT( + __field( unsigned long, ip ) + __field( const char *, str ) + ), + + F_printk("%pf: %s", + (void *)__entry->ip, __entry->str), FILTER_OTHER ); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 2edc7220d017..19f48e7edc39 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -37,6 +37,22 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s) return ret; } +enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct bputs_entry *field; + int ret; + + trace_assign_type(field, entry); + + ret = trace_seq_puts(s, field->str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1244,6 +1260,64 @@ static struct trace_event trace_user_stack_event = { .funcs = &trace_user_stack_funcs, }; +/* TRACE_BPUTS */ +static enum print_line_t +trace_bputs_print(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct bputs_entry *field; + + trace_assign_type(field, entry); + + if (!seq_print_ip_sym(s, field->ip, flags)) + goto partial; + + if (!trace_seq_puts(s, ": ")) + goto partial; + + if (!trace_seq_puts(s, field->str)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + + +static enum print_line_t +trace_bputs_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct bputs_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(s, ": %lx : ", field->ip)) + goto partial; + + if (!trace_seq_puts(s, field->str)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static struct trace_event_functions trace_bputs_funcs = { + .trace = trace_bputs_print, + .raw = trace_bputs_raw, +}; + +static struct trace_event trace_bputs_event = { + .type = TRACE_BPUTS, + .funcs = &trace_bputs_funcs, +}; + /* TRACE_BPRINT */ static enum print_line_t trace_bprint_print(struct trace_iterator *iter, int flags, @@ -1356,6 +1430,7 @@ static struct trace_event *events[] __initdata = { &trace_wake_event, &trace_stack_event, &trace_user_stack_event, + &trace_bputs_event, &trace_bprint_event, &trace_print_event, NULL diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index c038eba0492b..af77870de278 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -4,6 +4,8 @@ #include #include "trace.h" +extern enum print_line_t +trace_print_bputs_msg_only(struct trace_iterator *iter); extern enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t -- cgit From 9d3c752c062e3266f1051ba0825276ea1e2777da Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 8 Mar 2013 22:11:57 -0500 Subject: tracing: Optimize trace_printk() with one arg to use trace_puts() Although trace_printk() is extremely fast, especially when it uses trace_bprintk() (writes args straight to buffer instead of inserting into string), it still has the overhead of calling one of the printf sprintf() functions, that need to scan the fmt string to determine what, if any args it has. This is a waste of precious CPU cycles if the printk format has no args but a single constant string. It is better to use trace_puts() which does not have the overhead of the fmt scanning. But wouldn't it be nice if the developer didn't have to think about such things, and the compile would just do it for them? trace_printk("this string has no args\n"); [...] trace_printk("this sting does %p %d\n", foo, bar); As tracing is critical to have the least amount of overhead, especially when dealing with race conditions, and you want to eliminate any "Heisenbugs", you want the trace_printk() to use the fastest possible means of tracing. Currently the macro magic determines if it will use trace_bprintk() or if the fmt is a dynamic string (a variable), it will fall back to the slow trace_printk() method that does a full snprintf() before copying it into the buffer, where as trace_bprintk() only copys the pointer to the fmt and the args into the buffer. Well, now there's a way to spend some more Hogwarts cash and come up with new fancy macro magic. #define trace_printk(fmt, ...) \ do { \ char _______STR[] = __stringify((__VA_ARGS__)); \ if (sizeof(_______STR) > 3) \ do_trace_printk(fmt, ##__VA_ARGS__); \ else \ trace_puts(fmt); \ } while (0) The above needs a bit of explaining (both here and in the comments). By stringifying the __VA_ARGS__, we can, at compile time, determine the number of args that are being passed to trace_printk(). The extra parenthesis are required, otherwise the compiler complains about too many parameters for __stringify if there is more than one arg. When there are no args, the __stringify((__VA_ARGS__)) converts into "()\0", a string of 3 characters. Anything else, will be a string containing more than 3 characters. Now we assign that string to a dynamic char array, and then take the sizeof() of that array. If it is greater than 3 characters, we know trace_printk() has args and we need to do the full "do_trace_printk()" on them, otherwise it was only passed a single arg and we can optimize to use trace_puts(). Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: Steven "The King of Nasty Macros!" Rostedt --- include/linux/kernel.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a3a5574a61fc..d0a16fe03fef 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -516,9 +516,30 @@ do { \ * Please refrain from leaving trace_printks scattered around in * your code. (Extra memory is used for special buffers that are * allocated when trace_printk() is used) + * + * A little optization trick is done here. If there's only one + * argument, there's no need to scan the string for printf formats. + * The trace_puts() will suffice. But how can we take advantage of + * using trace_puts() when trace_printk() has only one argument? + * By stringifying the args and checking the size we can tell + * whether or not there are args. __stringify((__VA_ARGS__)) will + * turn into "()\0" with a size of 3 when there are no args, anything + * else will be bigger. All we need to do is define a string to this, + * and then take its size and compare to 3. If it's bigger, use + * do_trace_printk() otherwise, optimize it to trace_puts(). Then just + * let gcc optimize the rest. */ -#define trace_printk(fmt, args...) \ +#define trace_printk(fmt, ...) \ +do { \ + char _______STR[] = __stringify((__VA_ARGS__)); \ + if (sizeof(_______STR) > 3) \ + do_trace_printk(fmt, ##__VA_ARGS__); \ + else \ + trace_puts(fmt); \ +} while (0) + +#define do_trace_printk(fmt, args...) \ do { \ static const char *trace_printk_fmt \ __attribute__((section("__trace_printk_fmt"))) = \ -- cgit From 57d01ad09721fb7719c4c8c72b434398186f35a0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 12 Mar 2013 12:38:06 -0400 Subject: tracing: Fix comments for ftrace_event_file/call flags Most of the flags for the struct ftrace_event_file were moved over to the flags of the struct ftrace_event_call, but the comments were never updated. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d84c4a575514..4cb6cd8338a4 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -230,6 +230,13 @@ struct ftrace_event_call { struct list_head *files; void *mod; void *data; + /* + * bit 0: filter_active + * bit 1: allow trace by non root (cap any) + * bit 2: failed to apply filter + * bit 3: ftrace internal event (do not enable) + * bit 4: Event was enabled by module + */ int flags; /* static flags of different events */ #ifdef CONFIG_PERF_EVENTS @@ -248,7 +255,7 @@ enum { /* * Ftrace event file flags: - * ENABELD - The event is enabled + * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch */ enum { @@ -265,12 +272,8 @@ struct ftrace_event_file { /* * 32 bit flags: - * bit 1: enabled - * bit 2: filter_active - * bit 3: enabled cmd record - * bit 4: allow trace by non root (cap any) - * bit 5: failed to apply filter - * bit 6: ftrace internal event (do not enable) + * bit 0: enabled + * bit 1: enabled cmd record * * Changes to flags must hold the event_mutex. * -- cgit From e67efb93f0e9130174293ffaa5975f87b301b531 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 12 Mar 2013 15:07:59 -0400 Subject: ftrace: Clean up function probe methods When a function probe is created, each function that the probe is attached to, a "callback" method is called. On release of the probe, each function entry calls the "free" method. First, "callback" is a confusing name and does not really match what it does. Callback sounds like it will be called when the probe triggers. But that's not the case. This is really an "init" function, so lets rename it as such. Secondly, both "init" and "free" do not pass enough information back to the handlers. Pass back the ops, ip and data for each time the method is called. We have the information, might as well use it. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++-- kernel/trace/ftrace.c | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e5ca8ef50e9b..832422d706f4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -259,8 +259,10 @@ struct ftrace_probe_ops { void (*func)(unsigned long ip, unsigned long parent_ip, void **data); - int (*callback)(unsigned long ip, void **data); - void (*free)(void **data); + int (*init)(struct ftrace_probe_ops *ops, + unsigned long ip, void **data); + void (*free)(struct ftrace_probe_ops *ops, + unsigned long ip, void **data); int (*print)(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dab031fec85b..ff0ef41c6d93 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2984,7 +2984,7 @@ static void ftrace_free_entry_rcu(struct rcu_head *rhp) container_of(rhp, struct ftrace_func_probe, rcu); if (entry->ops->free) - entry->ops->free(&entry->data); + entry->ops->free(entry->ops, entry->ip, &entry->data); kfree(entry); } @@ -3045,8 +3045,8 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, * for each function we find. We call the callback * to give the caller an opportunity to do so. */ - if (ops->callback) { - if (ops->callback(rec->ip, &entry->data) < 0) { + if (ops->init) { + if (ops->init(ops, rec->ip, &entry->data) < 0) { /* caller does not like this func */ kfree(entry); continue; -- cgit From 417944c4c7a0f657158d0515f3b8e8c043fd788f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 12 Mar 2013 13:26:18 -0400 Subject: tracing: Add a way to soft disable trace events In order to let triggers enable or disable events, we need a 'soft' method for doing so. For example, if a function probe is added that lets a user enable or disable events when a function is called, that change must be done without taking locks or a mutex, and definitely it can't sleep. But the full enabling of a tracepoint is expensive. By adding a 'SOFT_DISABLE' flag, and converting the flags to be updated without the protection of a mutex (using set/clear_bit()), this soft disable flag can be used to allow critical sections to enable or disable events from being traced (after the event has been placed into "SOFT_MODE"). Some caveats though: The comm recorder (to map pids with a comm) can not be soft disabled (yet). If you disable an event with with a "soft" disable and wait a while before reading the trace, the comm cache may be replaced and you'll get a bunch of <...> for comms in the trace. Reading the "enable" file for an event that is disabled will now give you "0*" where the '*' denotes that the tracepoint is still active but the event itself is "disabled". [ fixed _BIT used in & operation : thanks to Dan Carpenter and smatch ] Cc: Dan Carpenter Cc: Tom Zanussi Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 20 ++++++++---- include/trace/ftrace.h | 8 +++++ kernel/trace/trace_events.c | 75 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 84 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4cb6cd8338a4..4e28b011e63b 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -251,16 +251,23 @@ struct ftrace_subsystem_dir; enum { FTRACE_EVENT_FL_ENABLED_BIT, FTRACE_EVENT_FL_RECORDED_CMD_BIT, + FTRACE_EVENT_FL_SOFT_MODE_BIT, + FTRACE_EVENT_FL_SOFT_DISABLED_BIT, }; /* * Ftrace event file flags: * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch + * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED + * SOFT_DISABLED - When set, do not trace the event (even though its + * tracepoint may be enabled) */ enum { FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT), + FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT), + FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT), }; struct ftrace_event_file { @@ -274,17 +281,18 @@ struct ftrace_event_file { * 32 bit flags: * bit 0: enabled * bit 1: enabled cmd record + * bit 2: enable/disable with the soft disable bit + * bit 3: soft disabled * - * Changes to flags must hold the event_mutex. - * - * Note: Reads of flags do not hold the event_mutex since - * they occur in critical sections. But the way flags + * Note: The bits must be set atomically to prevent races + * from other writers. Reads of flags do not need to be in + * sync as they occur in critical sections. But the way flags * is currently used, these changes do not affect the code * except that when a change is made, it may have a slight * delay in propagating the changes to other CPUs due to - * caching and such. + * caching and such. Which is mostly OK ;-) */ - unsigned int flags; + unsigned long flags; }; #define __TRACE_EVENT_FLAGS(name, value) \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index bbf09c2021b9..4bda044e6c77 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -413,6 +413,10 @@ static inline notrace int ftrace_get_offsets_##call( \ * int __data_size; * int pc; * + * if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, + * &ftrace_file->flags)) + * return; + * * local_save_flags(irq_flags); * pc = preempt_count(); * @@ -518,6 +522,10 @@ ftrace_raw_event_##call(void *__data, proto) \ int __data_size; \ int pc; \ \ + if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, \ + &ftrace_file->flags)) \ + return; \ + \ local_save_flags(irq_flags); \ pc = preempt_count(); \ \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 38b54c5edeb9..106640b0df4a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -205,37 +205,77 @@ void trace_event_enable_cmd_record(bool enable) if (enable) { tracing_start_cmdline_record(); - file->flags |= FTRACE_EVENT_FL_RECORDED_CMD; + set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); } else { tracing_stop_cmdline_record(); - file->flags &= ~FTRACE_EVENT_FL_RECORDED_CMD; + clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); mutex_unlock(&event_mutex); } -static int ftrace_event_enable_disable(struct ftrace_event_file *file, - int enable) +static int __ftrace_event_enable_disable(struct ftrace_event_file *file, + int enable, int soft_disable) { struct ftrace_event_call *call = file->event_call; int ret = 0; + int disable; switch (enable) { case 0: - if (file->flags & FTRACE_EVENT_FL_ENABLED) { - file->flags &= ~FTRACE_EVENT_FL_ENABLED; + /* + * When soft_disable is set and enable is cleared, we want + * to clear the SOFT_DISABLED flag but leave the event in the + * state that it was. That is, if the event was enabled and + * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED + * is set we do not want the event to be enabled before we + * clear the bit. + * + * When soft_disable is not set but the SOFT_MODE flag is, + * we do nothing. Do not disable the tracepoint, otherwise + * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. + */ + if (soft_disable) { + disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED; + clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + } else + disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE); + + if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) { + clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) { tracing_stop_cmdline_record(); - file->flags &= ~FTRACE_EVENT_FL_RECORDED_CMD; + clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); } call->class->reg(call, TRACE_REG_UNREGISTER, file); } + /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */ + if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) + set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: + /* + * When soft_disable is set and enable is set, we want to + * register the tracepoint for the event, but leave the event + * as is. That means, if the event was already enabled, we do + * nothing (but set SOFT_MODE). If the event is disabled, we + * set SOFT_DISABLED before enabling the event tracepoint, so + * it still seems to be disabled. + */ + if (!soft_disable) + clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + else + set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { + + /* Keep the event disabled, when going to SOFT_MODE. */ + if (soft_disable) + set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + if (trace_flags & TRACE_ITER_RECORD_CMD) { tracing_start_cmdline_record(); - file->flags |= FTRACE_EVENT_FL_RECORDED_CMD; + set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); } ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { @@ -244,7 +284,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_file *file, "%s\n", call->name); break; } - file->flags |= FTRACE_EVENT_FL_ENABLED; + set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); /* WAS_ENABLED gets set but never cleared. */ call->flags |= TRACE_EVENT_FL_WAS_ENABLED; @@ -255,6 +295,12 @@ static int ftrace_event_enable_disable(struct ftrace_event_file *file, return ret; } +static int ftrace_event_enable_disable(struct ftrace_event_file *file, + int enable) +{ + return __ftrace_event_enable_disable(file, enable, 0); +} + static void ftrace_clear_events(struct trace_array *tr) { struct ftrace_event_file *file; @@ -547,12 +593,15 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_file *file = filp->private_data; char *buf; - if (file->flags & FTRACE_EVENT_FL_ENABLED) - buf = "1\n"; - else + if (file->flags & FTRACE_EVENT_FL_ENABLED) { + if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) + buf = "0*\n"; + else + buf = "1\n"; + } else buf = "0\n"; - return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); } static ssize_t -- cgit From c142be8ebe0b7bf73c8a0063925623f3e4b980c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 Mar 2013 09:55:57 -0400 Subject: tracing: Add skip argument to trace_dump_stack() Altough the trace_dump_stack() already skips three functions in the call to stack trace, which gets the stack trace to start at the caller of the function, the caller may want to skip some more too (as it may have helper functions). Add a skip argument to the trace_dump_stack() that lets the caller skip back tracing functions that it doesn't care about. Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 2 +- kernel/trace/trace.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d0a16fe03fef..239dbb9627ca 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -597,7 +597,7 @@ extern int __trace_puts(unsigned long ip, const char *str, int size); __trace_puts(_THIS_IP_, str, strlen(str)); \ }) -extern void trace_dump_stack(void); +extern void trace_dump_stack(int skip); /* * The double __builtin_constant_p is because gcc will give us an error diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c5b844621562..8aa53213201f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1657,8 +1657,9 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, /** * trace_dump_stack - record a stack back trace in the trace buffer + * @skip: Number of functions to skip (helper handlers) */ -void trace_dump_stack(void) +void trace_dump_stack(int skip) { unsigned long flags; @@ -1667,9 +1668,13 @@ void trace_dump_stack(void) local_save_flags(flags); - /* skipping 3 traces, seems to get us at the caller of this function */ - __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, 3, - preempt_count(), NULL); + /* + * Skip 3 more, seems to get us at the caller of + * this function. + */ + skip += 3; + __ftrace_trace_stack(global_trace.trace_buffer.buffer, + flags, skip, preempt_count(), NULL); } static DEFINE_PER_CPU(int, user_stack_count); -- cgit From 8aacf017b065a805d27467843490c976835eb4a5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 14 Mar 2013 13:13:45 -0400 Subject: tracing: Add "uptime" trace clock that uses jiffies Add a simple trace clock called "uptime" for those that are interested in the uptime of the trace. It uses jiffies as that's the safest method, as other uptime clocks grab seq locks, which could cause a deadlock if taken from an event or function tracer. Requested-by: Mauro Carvalho Chehab Cc: Thomas Gleixner Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/trace_clock.h | 1 + kernel/trace/trace.c | 1 + kernel/trace/trace_clock.c | 10 ++++++++++ 3 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h index d563f37e1a1d..1d7ca2739272 100644 --- a/include/linux/trace_clock.h +++ b/include/linux/trace_clock.h @@ -16,6 +16,7 @@ extern u64 notrace trace_clock_local(void); extern u64 notrace trace_clock(void); +extern u64 notrace trace_clock_jiffies(void); extern u64 notrace trace_clock_global(void); extern u64 notrace trace_clock_counter(void); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f90ca16afcf2..8eabfbb8003e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -647,6 +647,7 @@ static struct { { trace_clock_local, "local", 1 }, { trace_clock_global, "global", 1 }, { trace_clock_counter, "counter", 0 }, + { trace_clock_jiffies, "uptime", 1 }, ARCH_TRACE_CLOCKS }; diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index aa8f5f48dae6..26dc348332b7 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -57,6 +57,16 @@ u64 notrace trace_clock(void) return local_clock(); } +/* + * trace_jiffy_clock(): Simply use jiffies as a clock counter. + */ +u64 notrace trace_clock_jiffies(void) +{ + u64 jiffy = jiffies - INITIAL_JIFFIES; + + /* Return nsecs */ + return (u64)jiffies_to_usecs(jiffy) * 1000ULL; +} /* * trace_clock_global(): special globally coherent trace clock -- cgit From b92021b09df70c1609e3547f3d6128dd560be97f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 15 Mar 2013 15:04:17 +1030 Subject: CONFIG_SYMBOL_PREFIX: cleanup. We have CONFIG_SYMBOL_PREFIX, which three archs define to the string "_". But Al Viro broke this in "consolidate cond_syscall and SYSCALL_ALIAS declarations" (in linux-next), and he's not the first to do so. Using CONFIG_SYMBOL_PREFIX is awkward, since we usually just want to prefix it so something. So various places define helpers which are defined to nothing if CONFIG_SYMBOL_PREFIX isn't set: 1) include/asm-generic/unistd.h defines __SYMBOL_PREFIX. 2) include/asm-generic/vmlinux.lds.h defines VMLINUX_SYMBOL(sym) 3) include/linux/export.h defines MODULE_SYMBOL_PREFIX. 4) include/linux/kernel.h defines SYMBOL_PREFIX (which differs from #7) 5) kernel/modsign_certificate.S defines ASM_SYMBOL(sym) 6) scripts/modpost.c defines MODULE_SYMBOL_PREFIX 7) scripts/Makefile.lib defines SYMBOL_PREFIX on the commandline if CONFIG_SYMBOL_PREFIX is set, so that we have a non-string version for pasting. (arch/h8300/include/asm/linkage.h defines SYMBOL_NAME(), too). Let's solve this properly: 1) No more generic prefix, just CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX. 2) Make linux/export.h usable from asm. 3) Define VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR(). 4) Make everyone use them. Signed-off-by: Rusty Russell Reviewed-by: James Hogan Tested-by: James Hogan (metag) --- Makefile | 2 +- arch/Kconfig | 6 ++++++ arch/blackfin/Kconfig | 5 +---- arch/h8300/Kconfig | 5 +---- arch/metag/Kconfig | 5 +---- drivers/mtd/chips/gen_probe.c | 8 +++++--- include/asm-generic/unistd.h | 12 ++++-------- include/asm-generic/vmlinux.lds.h | 8 +------- include/linux/export.h | 20 ++++++++++++++------ include/linux/kernel.h | 7 ------- include/linux/module.h | 4 ++-- kernel/modsign_certificate.S | 13 +++---------- kernel/module.c | 2 +- scripts/Makefile.lib | 7 ------- scripts/link-vmlinux.sh | 5 ++--- scripts/mod/modpost.c | 36 +++++++++++++++--------------------- 16 files changed, 57 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/Makefile b/Makefile index a05ea42c5f18..0b09ba5e492a 100644 --- a/Makefile +++ b/Makefile @@ -1398,7 +1398,7 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)) # Run depmod only if we have System.map and depmod is executable quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \ - $(KERNELRELEASE) "$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))" + $(KERNELRELEASE) "$(patsubst y,_,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))" # Create temporary dir for module support files # clean it up only when building all modules diff --git a/arch/Kconfig b/arch/Kconfig index 1455579791ec..7b433a4bcc28 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -384,6 +384,12 @@ config MODULES_USE_ELF_REL Modules only use ELF REL relocations. Modules with ELF RELA relocations will give an error. +config HAVE_UNDERSCORE_SYMBOL_PREFIX + bool + help + Some architectures generate an _ in front of C symbols; things like + module loading and assembly files need to know about this. + # # ABI hall of shame # diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index c3f2e0bc644a..453ebe46b065 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -1,7 +1,3 @@ -config SYMBOL_PREFIX - string - default "_" - config MMU def_bool n @@ -33,6 +29,7 @@ config BLACKFIN select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_UID16 + select HAVE_UNDERSCORE_SYMBOL_PREFIX select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select HAVE_GENERIC_HARDIRQS diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 79250de1b12a..303e4f9a79d1 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -12,10 +12,7 @@ config H8300 select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - -config SYMBOL_PREFIX - string - default "_" + select HAVE_UNDERSCORE_SYMBOL_PREFIX config MMU bool diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index afc8973d1488..2099617e3ec0 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -1,7 +1,3 @@ -config SYMBOL_PREFIX - string - default "_" - config METAG def_bool y select EMBEDDED @@ -27,6 +23,7 @@ config METAG select HAVE_MOD_ARCH_SPECIFIC select HAVE_PERF_EVENTS select HAVE_SYSCALL_TRACEPOINTS + select HAVE_UNDERSCORE_SYMBOL_PREFIX select IRQ_DOMAIN select MODULES_USE_ELF_RELA select OF diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c index 3b9a2843c5f8..74dbb6bcf488 100644 --- a/drivers/mtd/chips/gen_probe.c +++ b/drivers/mtd/chips/gen_probe.c @@ -204,14 +204,16 @@ static inline struct mtd_info *cfi_cmdset_unknown(struct map_info *map, struct cfi_private *cfi = map->fldrv_priv; __u16 type = primary?cfi->cfiq->P_ID:cfi->cfiq->A_ID; #ifdef CONFIG_MODULES - char probename[16+sizeof(MODULE_SYMBOL_PREFIX)]; + char probename[sizeof(VMLINUX_SYMBOL_STR(cfi_cmdset_%4.4X))]; cfi_cmdset_fn_t *probe_function; - sprintf(probename, MODULE_SYMBOL_PREFIX "cfi_cmdset_%4.4X", type); + sprintf(probename, VMLINUX_SYMBOL_STR(cfi_cmdset_%4.4X), type); probe_function = __symbol_get(probename); if (!probe_function) { - request_module(probename + sizeof(MODULE_SYMBOL_PREFIX) - 1); + char modname[sizeof("cfi_cmdset_%4.4X")]; + sprintf(modname, "cfi_cmdset_%4.4X", type); + request_module(modname); probe_function = __symbol_get(probename); } diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 4077b5d9ff81..15c0598e1109 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -1,4 +1,5 @@ #include +#include /* * These are required system calls, we should @@ -17,12 +18,7 @@ * but it doesn't work on all toolchains, so we just do it by hand */ #ifndef cond_syscall -#ifdef CONFIG_SYMBOL_PREFIX -#define __SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX -#else -#define __SYMBOL_PREFIX -#endif -#define cond_syscall(x) asm(".weak\t" __SYMBOL_PREFIX #x "\n\t" \ - ".set\t" __SYMBOL_PREFIX #x "," \ - __SYMBOL_PREFIX "sys_ni_syscall") +#define cond_syscall(x) asm(".weak\t" VMLINUX_SYMBOL_STR(x) "\n\t" \ + ".set\t" VMLINUX_SYMBOL_STR(x) "," \ + VMLINUX_SYMBOL_STR(sys_ni_syscall)) #endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index afa12c7a025c..eb58d2d7d971 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -52,13 +52,7 @@ #define LOAD_OFFSET 0 #endif -#ifndef SYMBOL_PREFIX -#define VMLINUX_SYMBOL(sym) sym -#else -#define PASTE2(x,y) x##y -#define PASTE(x,y) PASTE2(x,y) -#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) -#endif +#include /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) diff --git a/include/linux/export.h b/include/linux/export.h index 696c0f48afc7..412cd509effe 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -5,17 +5,24 @@ * to reduce the amount of pointless cruft we feed to gcc when only * exporting a simple symbol or two. * - * If you feel the need to add #include to this file - * then you are doing something wrong and should go away silently. + * Try not to add #includes here. It slows compilation and makes kernel + * hackers place grumpy comments in header files. */ /* Some toolchains use a `_' prefix for all user symbols. */ -#ifdef CONFIG_SYMBOL_PREFIX -#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX +#define __VMLINUX_SYMBOL(x) _##x +#define __VMLINUX_SYMBOL_STR(x) "_" #x #else -#define MODULE_SYMBOL_PREFIX "" +#define __VMLINUX_SYMBOL(x) x +#define __VMLINUX_SYMBOL_STR(x) #x #endif +/* Indirect, so macros are expanded before pasting. */ +#define VMLINUX_SYMBOL(x) __VMLINUX_SYMBOL(x) +#define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x) + +#ifndef __ASSEMBLY__ struct kernel_symbol { unsigned long value; @@ -51,7 +58,7 @@ extern struct module __this_module; __CRC_SYMBOL(sym, sec) \ static const char __kstrtab_##sym[] \ __attribute__((section("__ksymtab_strings"), aligned(1))) \ - = MODULE_SYMBOL_PREFIX #sym; \ + = VMLINUX_SYMBOL_STR(sym); \ static const struct kernel_symbol __ksymtab_##sym \ __used \ __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ @@ -85,5 +92,6 @@ extern struct module __this_module; #define EXPORT_UNUSED_SYMBOL_GPL(sym) #endif /* CONFIG_MODULES */ +#endif /* !__ASSEMBLY__ */ #endif /* _LINUX_EXPORT_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 80d36874689b..e13e992eae8a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -723,13 +723,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) -/* This helps us to avoid #ifdef CONFIG_SYMBOL_PREFIX */ -#ifdef CONFIG_SYMBOL_PREFIX -#define SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX -#else -#define SYMBOL_PREFIX "" -#endif - /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/include/linux/module.h b/include/linux/module.h index ead1b5719a12..46f1ea01e6f6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -190,7 +190,7 @@ extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) +#define symbol_get(x) ((typeof(&x))(__symbol_get(VMLINUX_SYMBOL_STR(x)))) /* modules using other modules: kdb wants to see this. */ struct module_use { @@ -453,7 +453,7 @@ extern void __module_put_and_exit(struct module *mod, long code) #ifdef CONFIG_MODULE_UNLOAD unsigned long module_refcount(struct module *mod); void __symbol_put(const char *symbol); -#define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) +#define symbol_put(x) __symbol_put(VMLINUX_SYMBOL_STR(x)) void symbol_put_addr(void *addr); /* Sometimes we know we already have a refcount, and it's easier not diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S index 246b4c6e6135..4a9a86d12c8b 100644 --- a/kernel/modsign_certificate.S +++ b/kernel/modsign_certificate.S @@ -1,15 +1,8 @@ -/* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */ -#ifndef SYMBOL_PREFIX -#define ASM_SYMBOL(sym) sym -#else -#define PASTE2(x,y) x##y -#define PASTE(x,y) PASTE2(x,y) -#define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) -#endif +#include #define GLOBAL(name) \ - .globl ASM_SYMBOL(name); \ - ASM_SYMBOL(name): + .globl VMLINUX_SYMBOL(name); \ + VMLINUX_SYMBOL(name): .section ".init.data","aw" diff --git a/kernel/module.c b/kernel/module.c index 0925c9a71975..cfd4a3f68d7d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1209,7 +1209,7 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, /* Since this should be found in kernel (which can't be removed), * no locking is necessary. */ - if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, + if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL, &crc, true, false)) BUG(); return check_version(sechdrs, versindex, "module_layout", mod, crc, diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 07125e697d7a..a373a1f66023 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -119,13 +119,6 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_GCOV)) endif -ifdef CONFIG_SYMBOL_PREFIX -_sym_flags = -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX)) -_cpp_flags += $(_sym_flags) -_a_flags += $(_sym_flags) -endif - - # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 3d569d6022c2..014994936b1c 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -74,9 +74,8 @@ kallsyms() info KSYM ${2} local kallsymopt; - if [ -n "${CONFIG_SYMBOL_PREFIX}" ]; then - kallsymopt="${kallsymopt} \ - --symbol-prefix=${CONFIG_SYMBOL_PREFIX}" + if [ -n "${CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX}" ]; then + kallsymopt="${kallsymopt} --symbol-prefix=_" fi if [ -n "${CONFIG_KALLSYMS_ALL}" ]; then diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 78b30c1548e9..282decfa29ae 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -18,14 +18,7 @@ #include "modpost.h" #include "../../include/generated/autoconf.h" #include "../../include/linux/license.h" - -/* Some toolchains use a `_' prefix for all user symbols. */ -#ifdef CONFIG_SYMBOL_PREFIX -#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX -#else -#define MODULE_SYMBOL_PREFIX "" -#endif - +#include "../../include/linux/export.h" /* Are we using CONFIG_MODVERSIONS? */ int modversions = 0; @@ -562,7 +555,7 @@ static void parse_elf_finish(struct elf_info *info) static int ignore_undef_symbol(struct elf_info *info, const char *symname) { /* ignore __this_module, it will be resolved shortly */ - if (strcmp(symname, MODULE_SYMBOL_PREFIX "__this_module") == 0) + if (strcmp(symname, VMLINUX_SYMBOL_STR(__this_module)) == 0) return 1; /* ignore global offset table */ if (strcmp(symname, "_GLOBAL_OFFSET_TABLE_") == 0) @@ -583,8 +576,8 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) return 0; } -#define CRC_PFX MODULE_SYMBOL_PREFIX "__crc_" -#define KSYMTAB_PFX MODULE_SYMBOL_PREFIX "__ksymtab_" +#define CRC_PFX VMLINUX_SYMBOL_STR(__crc_) +#define KSYMTAB_PFX VMLINUX_SYMBOL_STR(__ksymtab_) static void handle_modversions(struct module *mod, struct elf_info *info, Elf_Sym *sym, const char *symname) @@ -637,14 +630,15 @@ static void handle_modversions(struct module *mod, struct elf_info *info, } #endif - if (memcmp(symname, MODULE_SYMBOL_PREFIX, - strlen(MODULE_SYMBOL_PREFIX)) == 0) { - mod->unres = - alloc_symbol(symname + - strlen(MODULE_SYMBOL_PREFIX), - ELF_ST_BIND(sym->st_info) == STB_WEAK, - mod->unres); - } +#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX + if (symname[0] != '_') + break; + else + symname++; +#endif + mod->unres = alloc_symbol(symname, + ELF_ST_BIND(sym->st_info) == STB_WEAK, + mod->unres); break; default: /* All exported symbols */ @@ -652,9 +646,9 @@ static void handle_modversions(struct module *mod, struct elf_info *info, sym_add_exported(symname + strlen(KSYMTAB_PFX), mod, export); } - if (strcmp(symname, MODULE_SYMBOL_PREFIX "init_module") == 0) + if (strcmp(symname, VMLINUX_SYMBOL_STR(init_module)) == 0) mod->has_init = 1; - if (strcmp(symname, MODULE_SYMBOL_PREFIX "cleanup_module") == 0) + if (strcmp(symname, VMLINUX_SYMBOL_STR(cleanup_module)) == 0) mod->has_cleanup = 1; break; } -- cgit From 8a7fbfab4be39b8690543f3d29b26860d2f6c576 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Tue, 12 Mar 2013 02:49:01 +0000 Subject: netxen: write IP address to firmware when using bonding This patch allows LRO aggregation on bonded devices that contain an NX3031 device. It also adds a for_each_netdev_in_bond_rcu(bond, slave) macro which executes for each slave that has bond as master. V3: After testing and discussing this with Rajesh, I decided to keep the vlan ip cache and just rename it to ip_cache since it will store bond ip addresses too. A new master flag has been added to the ip cache to denote that the address has been added because of a master device. I've taken care of the enslave/release cases by checking for various combinations of events and flags (e.g. netxen has a master, it's a bond master and it's not marked as a slave means it is being enslaved and is dev_open()ed in bond_enslave). I've changed netxen_free_ip_list() to have a "master" parameter which causes all IP addresses marked as master to be deleted (used when a netxen is being released). I've made the patch use the new upper device API as well. The following cases were tested: - bond -> netxen - vlan -> netxen - vlan -> bond -> netxen V2: Remove local ip caching, retrieve addresses dynamically and restore them if necessary. Note: Tested with NX3031 adapter. Tested-by: Rajesh Borundia Signed-off-by: Andy Gospodarek Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic.h | 5 +- .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 220 ++++++++++++++------- include/linux/netdevice.h | 8 + 3 files changed, 155 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index eb3dfdbb642b..322a36b76727 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -955,9 +955,10 @@ typedef struct nx_mac_list_s { uint8_t mac_addr[ETH_ALEN+2]; } nx_mac_list_t; -struct nx_vlan_ip_list { +struct nx_ip_list { struct list_head list; __be32 ip_addr; + bool master; }; /* @@ -1605,7 +1606,7 @@ struct netxen_adapter { struct net_device *netdev; struct pci_dev *pdev; struct list_head mac_list; - struct list_head vlan_ip_list; + struct list_head ip_list; spinlock_t tx_clean_lock; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 501f49207da5..7867aebc05f2 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -90,7 +90,7 @@ static irqreturn_t netxen_intr(int irq, void *data); static irqreturn_t netxen_msi_intr(int irq, void *data); static irqreturn_t netxen_msix_intr(int irq, void *data); -static void netxen_free_vlan_ip_list(struct netxen_adapter *); +static void netxen_free_ip_list(struct netxen_adapter *, bool); static void netxen_restore_indev_addr(struct net_device *dev, unsigned long); static struct rtnl_link_stats64 *netxen_nic_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); @@ -1450,7 +1450,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->tx_clean_lock); INIT_LIST_HEAD(&adapter->mac_list); - INIT_LIST_HEAD(&adapter->vlan_ip_list); + INIT_LIST_HEAD(&adapter->ip_list); err = netxen_setup_pci_map(adapter); if (err) @@ -1585,7 +1585,7 @@ static void netxen_nic_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->tx_timeout_task); - netxen_free_vlan_ip_list(adapter); + netxen_free_ip_list(adapter, false); netxen_nic_detach(adapter); nx_decr_dev_ref_cnt(adapter); @@ -3137,62 +3137,77 @@ netxen_destip_supported(struct netxen_adapter *adapter) } static void -netxen_free_vlan_ip_list(struct netxen_adapter *adapter) +netxen_free_ip_list(struct netxen_adapter *adapter, bool master) { - struct nx_vlan_ip_list *cur; - struct list_head *head = &adapter->vlan_ip_list; + struct nx_ip_list *cur, *tmp_cur; - while (!list_empty(head)) { - cur = list_entry(head->next, struct nx_vlan_ip_list, list); - netxen_config_ipaddr(adapter, cur->ip_addr, NX_IP_DOWN); - list_del(&cur->list); - kfree(cur); + list_for_each_entry_safe(cur, tmp_cur, &adapter->ip_list, list) { + if (master) { + if (cur->master) { + netxen_config_ipaddr(adapter, cur->ip_addr, + NX_IP_DOWN); + list_del(&cur->list); + kfree(cur); + } + } else { + netxen_config_ipaddr(adapter, cur->ip_addr, NX_IP_DOWN); + list_del(&cur->list); + kfree(cur); + } } - } -static void -netxen_list_config_vlan_ip(struct netxen_adapter *adapter, + +static bool +netxen_list_config_ip(struct netxen_adapter *adapter, struct in_ifaddr *ifa, unsigned long event) { struct net_device *dev; - struct nx_vlan_ip_list *cur, *tmp_cur; + struct nx_ip_list *cur, *tmp_cur; struct list_head *head; + bool ret = false; dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL; if (dev == NULL) - return; - - if (!is_vlan_dev(dev)) - return; + goto out; switch (event) { case NX_IP_UP: - list_for_each(head, &adapter->vlan_ip_list) { - cur = list_entry(head, struct nx_vlan_ip_list, list); + list_for_each(head, &adapter->ip_list) { + cur = list_entry(head, struct nx_ip_list, list); if (cur->ip_addr == ifa->ifa_address) - return; + goto out; } - cur = kzalloc(sizeof(struct nx_vlan_ip_list), GFP_ATOMIC); + cur = kzalloc(sizeof(struct nx_ip_list), GFP_ATOMIC); if (cur == NULL) - return; - + goto out; + if (dev->priv_flags & IFF_802_1Q_VLAN) + dev = vlan_dev_real_dev(dev); + cur->master = !!netif_is_bond_master(dev); cur->ip_addr = ifa->ifa_address; - list_add_tail(&cur->list, &adapter->vlan_ip_list); + list_add_tail(&cur->list, &adapter->ip_list); + netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP); + ret = true; break; case NX_IP_DOWN: list_for_each_entry_safe(cur, tmp_cur, - &adapter->vlan_ip_list, list) { + &adapter->ip_list, list) { if (cur->ip_addr == ifa->ifa_address) { list_del(&cur->list); kfree(cur); + netxen_config_ipaddr(adapter, ifa->ifa_address, + NX_IP_DOWN); + ret = true; break; } } } +out: + return ret; } + static void netxen_config_indev_addr(struct netxen_adapter *adapter, struct net_device *dev, unsigned long event) @@ -3209,14 +3224,10 @@ netxen_config_indev_addr(struct netxen_adapter *adapter, for_ifa(indev) { switch (event) { case NETDEV_UP: - netxen_config_ipaddr(adapter, - ifa->ifa_address, NX_IP_UP); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP); + netxen_list_config_ip(adapter, ifa, NX_IP_UP); break; case NETDEV_DOWN: - netxen_config_ipaddr(adapter, - ifa->ifa_address, NX_IP_DOWN); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN); + netxen_list_config_ip(adapter, ifa, NX_IP_DOWN); break; default: break; @@ -3231,23 +3242,78 @@ netxen_restore_indev_addr(struct net_device *netdev, unsigned long event) { struct netxen_adapter *adapter = netdev_priv(netdev); - struct nx_vlan_ip_list *pos, *tmp_pos; + struct nx_ip_list *pos, *tmp_pos; unsigned long ip_event; ip_event = (event == NETDEV_UP) ? NX_IP_UP : NX_IP_DOWN; netxen_config_indev_addr(adapter, netdev, event); - list_for_each_entry_safe(pos, tmp_pos, &adapter->vlan_ip_list, list) { + list_for_each_entry_safe(pos, tmp_pos, &adapter->ip_list, list) { netxen_config_ipaddr(adapter, pos->ip_addr, ip_event); } } +static inline bool +netxen_config_checkdev(struct net_device *dev) +{ + struct netxen_adapter *adapter; + + if (!is_netxen_netdev(dev)) + return false; + adapter = netdev_priv(dev); + if (!adapter) + return false; + if (!netxen_destip_supported(adapter)) + return false; + if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) + return false; + + return true; +} + +/** + * netxen_config_master - configure addresses based on master + * @dev: netxen device + * @event: netdev event + */ +static void netxen_config_master(struct net_device *dev, unsigned long event) +{ + struct net_device *master, *slave; + struct netxen_adapter *adapter = netdev_priv(dev); + + rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + /* + * This is the case where the netxen nic is being + * enslaved and is dev_open()ed in bond_enslave() + * Now we should program the bond's (and its vlans') + * addresses in the netxen NIC. + */ + if (master && netif_is_bond_master(master) && + !netif_is_bond_slave(dev)) { + netxen_config_indev_addr(adapter, master, event); + for_each_netdev_rcu(&init_net, slave) + if (slave->priv_flags & IFF_802_1Q_VLAN && + vlan_dev_real_dev(slave) == master) + netxen_config_indev_addr(adapter, slave, event); + } + rcu_read_unlock(); + /* + * This is the case where the netxen nic is being + * released and is dev_close()ed in bond_release() + * just before IFF_BONDING is stripped. + */ + if (!master && dev->priv_flags & IFF_BONDING) + netxen_free_ip_list(adapter, true); +} + static int netxen_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netxen_adapter *adapter; struct net_device *dev = (struct net_device *)ptr; struct net_device *orig_dev = dev; + struct net_device *slave; recheck: if (dev == NULL) @@ -3257,19 +3323,28 @@ recheck: dev = vlan_dev_real_dev(dev); goto recheck; } - - if (!is_netxen_netdev(dev)) - goto done; - - adapter = netdev_priv(dev); - - if (!adapter) - goto done; - - if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) - goto done; - - netxen_config_indev_addr(adapter, orig_dev, event); + if (event == NETDEV_UP || event == NETDEV_DOWN) { + /* If this is a bonding device, look for netxen-based slaves*/ + if (netif_is_bond_master(dev)) { + rcu_read_lock(); + for_each_netdev_in_bond_rcu(dev, slave) { + if (!netxen_config_checkdev(slave)) + continue; + adapter = netdev_priv(slave); + netxen_config_indev_addr(adapter, + orig_dev, event); + } + rcu_read_unlock(); + } else { + if (!netxen_config_checkdev(dev)) + goto done; + adapter = netdev_priv(dev); + /* Act only if the actual netxen is the target */ + if (orig_dev == dev) + netxen_config_master(dev, event); + netxen_config_indev_addr(adapter, orig_dev, event); + } + } done: return NOTIFY_DONE; } @@ -3279,12 +3354,12 @@ netxen_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netxen_adapter *adapter; - struct net_device *dev; - + struct net_device *dev, *slave; struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + unsigned long ip_event; dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL; - + ip_event = (event == NETDEV_UP) ? NX_IP_UP : NX_IP_DOWN; recheck: if (dev == NULL) goto done; @@ -3293,31 +3368,24 @@ recheck: dev = vlan_dev_real_dev(dev); goto recheck; } - - if (!is_netxen_netdev(dev)) - goto done; - - adapter = netdev_priv(dev); - - if (!adapter || !netxen_destip_supported(adapter)) - goto done; - - if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) - goto done; - - switch (event) { - case NETDEV_UP: - netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP); - break; - case NETDEV_DOWN: - netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_DOWN); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN); - break; - default: - break; + if (event == NETDEV_UP || event == NETDEV_DOWN) { + /* If this is a bonding device, look for netxen-based slaves*/ + if (netif_is_bond_master(dev)) { + rcu_read_lock(); + for_each_netdev_in_bond_rcu(dev, slave) { + if (!netxen_config_checkdev(slave)) + continue; + adapter = netdev_priv(slave); + netxen_list_config_ip(adapter, ifa, ip_event); + } + rcu_read_unlock(); + } else { + if (!netxen_config_checkdev(dev)) + goto done; + adapter = netdev_priv(dev); + netxen_list_config_ip(adapter, ifa, ip_event); + } } - done: return NOTIFY_DONE; } @@ -3334,7 +3402,7 @@ static void netxen_restore_indev_addr(struct net_device *dev, unsigned long event) { } static void -netxen_free_vlan_ip_list(struct netxen_adapter *adapter) +netxen_free_ip_list(struct netxen_adapter *adapter, bool master) { } #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e1ebeffa6b35..9fc1ab0c8914 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1617,6 +1617,9 @@ extern seqcount_t devnet_rename_seq; /* Device rename seq */ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_in_bond_rcu(bond, slave) \ + for_each_netdev_rcu(&init_net, slave) \ + if (netdev_master_upper_dev_get_rcu(slave) == bond) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) static inline struct net_device *next_net_device(struct net_device *dev) @@ -2774,6 +2777,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline bool netif_is_bond_master(struct net_device *dev) +{ + return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; +} + static inline bool netif_is_bond_slave(struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; -- cgit From 764444f5a324ad5a272773f078192819084388ce Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Wed, 13 Mar 2013 16:57:25 +0000 Subject: net: clean leftover of COMPAT_NET_DEV_OPS removal COMPAT_NET_DEV_OPS was removed a while back and with it the definition of netdev_resync_ops() went away. Let's finish the clean-up. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9fc1ab0c8914..56e3e0665272 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1692,7 +1692,6 @@ extern int netdev_refcnt_read(const struct net_device *dev); extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int init_dummy_netdev(struct net_device *dev); -extern void netdev_resync_ops(struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); -- cgit From dad3cab3e063110b3ae3dc82a00e7aacd09b91ec Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 4 Mar 2013 16:52:49 -0600 Subject: USB: fix trivial usb_device kernel-doc errors Fix trivial kernel-doc warnings: Warning(include/linux/usb.h:574): No description found for parameter 'usb3_lpm_enabled' Warning(include/linux/usb.h:574): Excess struct/union/enum/typedef member 'usb_classdev' description in 'usb_device' Warning(include/linux/usb.h:574): Excess struct/union/enum/typedef member 'usbfs_dentry' description in 'usb_device' Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Jiri Kosina Cc: linux-usb@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 4d22d0f6167a..52464fb2389b 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -469,14 +469,12 @@ struct usb3_lpm_parameters { * @lpm_capable: device supports LPM * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM * @usb2_hw_lpm_enabled: USB2 hardware LPM enabled + * @usb3_lpm_enabled: USB3 hardware LPM enabled * @string_langid: language ID for strings * @product: iProduct string, if present (static) * @manufacturer: iManufacturer string, if present (static) * @serial: iSerialNumber string, if present (static) * @filelist: usbfs files that are open to this device - * @usb_classdev: USB class device that was created for usbfs device - * access from userspace - * @usbfs_dentry: usbfs dentry entry for the device * @maxchild: number of ports if hub * @quirks: quirks of the whole device * @urbnum: number of URBs submitted for the whole device -- cgit From 96dd86fa588169b745a71aedf2070e80f4943623 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 15 Mar 2013 12:30:06 -0700 Subject: Drivers: hv: Add a new driver to support host initiated backup This driver supports host initiated backup of the guest. On Windows guests, the host can generate application consistent backups using the Windows VSS framework. On Linux, we ensure that the backup will be file system consistent. This driver allows the host to initiate a "Freeze" operation on all the mounted file systems in the guest. Once the mounted file systems in the guest are frozen, the host snapshots the guest's file systems. Once this is done, the guest's file systems are "thawed". This driver has a user-level component (daemon) that invokes the appropriate operation on all the mounted file systems in response to the requests from the host. The duration for which the guest is frozen is very short - a few seconds. During this interval, the diff disk is comitted. In this version of the patch I have addressed the feedback from Olaf Herring. Also, some of the connector related issues have been fixed. Signed-off-by: K. Y. Srinivasan Reviewed-by: Haiyang Zhang Cc: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- drivers/hv/Makefile | 2 +- drivers/hv/hv_snapshot.c | 287 +++++++++++++++++++++++++++++++++++++++++ drivers/hv/hv_util.c | 10 ++ include/linux/hyperv.h | 69 ++++++++++ include/uapi/linux/connector.h | 5 +- tools/hv/hv_vss_daemon.c | 220 +++++++++++++++++++++++++++++++ 6 files changed, 591 insertions(+), 2 deletions(-) create mode 100644 drivers/hv/hv_snapshot.c create mode 100644 tools/hv/hv_vss_daemon.c (limited to 'include/linux') diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index e6abfa02d8b7..0a74b5661186 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o hv_vmbus-y := vmbus_drv.o \ hv.o connection.o channel.o \ channel_mgmt.o ring_buffer.o -hv_utils-y := hv_util.o hv_kvp.o +hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c new file mode 100644 index 000000000000..8ad5653ce447 --- /dev/null +++ b/drivers/hv/hv_snapshot.c @@ -0,0 +1,287 @@ +/* + * An implementation of host initiated guest snapshot. + * + * + * Copyright (C) 2013, Microsoft, Inc. + * Author : K. Y. Srinivasan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + + + +/* + * Global state maintained for transaction that is being processed. + * Note that only one transaction can be active at any point in time. + * + * This state is set when we receive a request from the host; we + * cleanup this state when the transaction is completed - when we respond + * to the host with the key value. + */ + +static struct { + bool active; /* transaction status - active or not */ + int recv_len; /* number of bytes received. */ + struct vmbus_channel *recv_channel; /* chn we got the request */ + u64 recv_req_id; /* request ID. */ + struct hv_vss_msg *msg; /* current message */ +} vss_transaction; + + +static void vss_respond_to_host(int error); + +static struct cb_id vss_id = { CN_VSS_IDX, CN_VSS_VAL }; +static const char vss_name[] = "vss_kernel_module"; +static __u8 *recv_buffer; + +static void vss_send_op(struct work_struct *dummy); +static DECLARE_WORK(vss_send_op_work, vss_send_op); + +/* + * Callback when data is received from user mode. + */ + +static void +vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) +{ + struct hv_vss_msg *vss_msg; + + vss_msg = (struct hv_vss_msg *)msg->data; + + if (vss_msg->vss_hdr.operation == VSS_OP_REGISTER) { + pr_info("VSS daemon registered\n"); + vss_transaction.active = false; + if (vss_transaction.recv_channel != NULL) + hv_vss_onchannelcallback(vss_transaction.recv_channel); + return; + + } + vss_respond_to_host(vss_msg->error); +} + + +static void vss_send_op(struct work_struct *dummy) +{ + int op = vss_transaction.msg->vss_hdr.operation; + struct cn_msg *msg; + struct hv_vss_msg *vss_msg; + + msg = kzalloc(sizeof(*msg) + sizeof(*vss_msg), GFP_ATOMIC); + if (!msg) + return; + + vss_msg = (struct hv_vss_msg *)msg->data; + + msg->id.idx = CN_VSS_IDX; + msg->id.val = CN_VSS_VAL; + + vss_msg->vss_hdr.operation = op; + msg->len = sizeof(struct hv_vss_msg); + + cn_netlink_send(msg, 0, GFP_ATOMIC); + kfree(msg); + + return; +} + +/* + * Send a response back to the host. + */ + +static void +vss_respond_to_host(int error) +{ + struct icmsg_hdr *icmsghdrp; + u32 buf_len; + struct vmbus_channel *channel; + u64 req_id; + + /* + * If a transaction is not active; log and return. + */ + + if (!vss_transaction.active) { + /* + * This is a spurious call! + */ + pr_warn("VSS: Transaction not active\n"); + return; + } + /* + * Copy the global state for completing the transaction. Note that + * only one transaction can be active at a time. + */ + + buf_len = vss_transaction.recv_len; + channel = vss_transaction.recv_channel; + req_id = vss_transaction.recv_req_id; + vss_transaction.active = false; + + icmsghdrp = (struct icmsg_hdr *) + &recv_buffer[sizeof(struct vmbuspipe_hdr)]; + + if (channel->onchannel_callback == NULL) + /* + * We have raced with util driver being unloaded; + * silently return. + */ + return; + + icmsghdrp->status = error; + + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, recv_buffer, buf_len, req_id, + VM_PKT_DATA_INBAND, 0); + +} + +/* + * This callback is invoked when we get a VSS message from the host. + * The host ensures that only one VSS transaction can be active at a time. + */ + +void hv_vss_onchannelcallback(void *context) +{ + struct vmbus_channel *channel = context; + u32 recvlen; + u64 requestid; + struct hv_vss_msg *vss_msg; + + + struct icmsg_hdr *icmsghdrp; + struct icmsg_negotiate *negop = NULL; + + if (vss_transaction.active) { + /* + * We will defer processing this callback once + * the current transaction is complete. + */ + vss_transaction.recv_channel = channel; + return; + } + + vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 2, &recvlen, + &requestid); + + if (recvlen > 0) { + icmsghdrp = (struct icmsg_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + vmbus_prep_negotiate_resp(icmsghdrp, negop, + recv_buffer, MAX_SRV_VER, MAX_SRV_VER); + /* + * We currently negotiate the highest number the + * host has presented. If this version is not + * atleast 5.0, reject. + */ + negop = (struct icmsg_negotiate *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + + if (negop->icversion_data[1].major < 5) + negop->icframe_vercnt = 0; + } else { + vss_msg = (struct hv_vss_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + + /* + * Stash away this global state for completing the + * transaction; note transactions are serialized. + */ + + vss_transaction.recv_len = recvlen; + vss_transaction.recv_channel = channel; + vss_transaction.recv_req_id = requestid; + vss_transaction.active = true; + vss_transaction.msg = (struct hv_vss_msg *)vss_msg; + + switch (vss_msg->vss_hdr.operation) { + /* + * Initiate a "freeze/thaw" + * operation in the guest. + * We respond to the host once + * the operation is complete. + * + * We send the message to the + * user space daemon and the + * operation is performed in + * the daemon. + */ + case VSS_OP_FREEZE: + case VSS_OP_THAW: + schedule_work(&vss_send_op_work); + return; + + case VSS_OP_HOT_BACKUP: + vss_msg->vss_cf.flags = + VSS_HBU_NO_AUTO_RECOVERY; + vss_respond_to_host(0); + return; + + case VSS_OP_GET_DM_INFO: + vss_msg->dm_info.flags = 0; + vss_respond_to_host(0); + return; + + default: + vss_respond_to_host(0); + return; + + } + + } + + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION + | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, recv_buffer, + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); + } + +} + +int +hv_vss_init(struct hv_util_service *srv) +{ + int err; + + err = cn_add_callback(&vss_id, vss_name, vss_cn_callback); + if (err) + return err; + recv_buffer = srv->recv_buffer; + + /* + * When this driver loads, the user level daemon that + * processes the host requests may not yet be running. + * Defer processing channel callbacks until the daemon + * has registered. + */ + vss_transaction.active = true; + return 0; +} + +void hv_vss_deinit(void) +{ + cn_del_callback(&vss_id); + cancel_work_sync(&vss_send_op_work); +} diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 1d4cbd8e8261..2f561c5dfe24 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -49,6 +49,12 @@ static struct hv_util_service util_kvp = { .util_deinit = hv_kvp_deinit, }; +static struct hv_util_service util_vss = { + .util_cb = hv_vss_onchannelcallback, + .util_init = hv_vss_init, + .util_deinit = hv_vss_deinit, +}; + static void perform_shutdown(struct work_struct *dummy) { orderly_poweroff(true); @@ -339,6 +345,10 @@ static const struct hv_vmbus_device_id id_table[] = { { HV_KVP_GUID, .driver_data = (unsigned long)&util_kvp }, + /* VSS GUID */ + { HV_VSS_GUID, + .driver_data = (unsigned long)&util_vss + }, { }, }; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index df77ba9a8166..95d0850584da 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -27,6 +27,63 @@ #include + +/* + * Implementation of host controlled snapshot of the guest. + */ + +#define VSS_OP_REGISTER 128 + +enum hv_vss_op { + VSS_OP_CREATE = 0, + VSS_OP_DELETE, + VSS_OP_HOT_BACKUP, + VSS_OP_GET_DM_INFO, + VSS_OP_BU_COMPLETE, + /* + * Following operations are only supported with IC version >= 5.0 + */ + VSS_OP_FREEZE, /* Freeze the file systems in the VM */ + VSS_OP_THAW, /* Unfreeze the file systems */ + VSS_OP_AUTO_RECOVER, + VSS_OP_COUNT /* Number of operations, must be last */ +}; + + +/* + * Header for all VSS messages. + */ +struct hv_vss_hdr { + __u8 operation; + __u8 reserved[7]; +} __attribute__((packed)); + + +/* + * Flag values for the hv_vss_check_feature. Linux supports only + * one value. + */ +#define VSS_HBU_NO_AUTO_RECOVERY 0x00000005 + +struct hv_vss_check_feature { + __u32 flags; +} __attribute__((packed)); + +struct hv_vss_check_dm_info { + __u32 flags; +} __attribute__((packed)); + +struct hv_vss_msg { + union { + struct hv_vss_hdr vss_hdr; + int error; + }; + union { + struct hv_vss_check_feature vss_cf; + struct hv_vss_check_dm_info dm_info; + }; +} __attribute__((packed)); + /* * An implementation of HyperV key value pair (KVP) functionality for Linux. * @@ -1252,6 +1309,14 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver); 0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \ } +/* + * VSS (Backup/Restore) GUID + */ +#define HV_VSS_GUID \ + .guid = { \ + 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \ + 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \ + } /* * Common header for Hyper-V ICs */ @@ -1356,6 +1421,10 @@ int hv_kvp_init(struct hv_util_service *); void hv_kvp_deinit(void); void hv_kvp_onchannelcallback(void *); +int hv_vss_init(struct hv_util_service *); +void hv_vss_deinit(void); +void hv_vss_onchannelcallback(void *); + /* * Negotiated version with the Host. */ diff --git a/include/uapi/linux/connector.h b/include/uapi/linux/connector.h index 8761a0349c74..4cb283505e45 100644 --- a/include/uapi/linux/connector.h +++ b/include/uapi/linux/connector.h @@ -44,8 +44,11 @@ #define CN_VAL_DRBD 0x1 #define CN_KVP_IDX 0x9 /* HyperV KVP */ #define CN_KVP_VAL 0x1 /* queries from the kernel */ +#define CN_VSS_IDX 0xA /* HyperV VSS */ +#define CN_VSS_VAL 0x1 /* queries from the kernel */ -#define CN_NETLINK_USERS 10 /* Highest index + 1 */ + +#define CN_NETLINK_USERS 11 /* Highest index + 1 */ /* * Maximum connector's message size. diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c new file mode 100644 index 000000000000..95269952aa92 --- /dev/null +++ b/tools/hv/hv_vss_daemon.c @@ -0,0 +1,220 @@ +/* + * An implementation of the host initiated guest snapshot for Hyper-V. + * + * + * Copyright (C) 2013, Microsoft, Inc. + * Author : K. Y. Srinivasan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static char vss_recv_buffer[4096]; +static char vss_send_buffer[4096]; +static struct sockaddr_nl addr; + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + + +static int vss_operate(int operation) +{ + char *fs_op; + char cmd[512]; + char buf[512]; + FILE *file; + char *p; + char *x; + int error; + + switch (operation) { + case VSS_OP_FREEZE: + fs_op = "-f "; + break; + case VSS_OP_THAW: + fs_op = "-u "; + break; + } + + file = popen("mount | awk '/^\/dev\// { print $3}'", "r"); + if (file == NULL) + return; + + while ((p = fgets(buf, sizeof(buf), file)) != NULL) { + x = strchr(p, '\n'); + *x = '\0'; + if (!strncmp(p, "/", sizeof("/"))) + continue; + + sprintf(cmd, "%s %s %s", "fsfreeze ", fs_op, p); + syslog(LOG_INFO, "VSS cmd is %s\n", cmd); + error = system(cmd); + } + pclose(file); + + sprintf(cmd, "%s %s %s", "fsfreeze ", fs_op, "/"); + syslog(LOG_INFO, "VSS cmd is %s\n", cmd); + error = system(cmd); + + return error; +} + +static int netlink_send(int fd, struct cn_msg *msg) +{ + struct nlmsghdr *nlh; + unsigned int size; + struct msghdr message; + char buffer[64]; + struct iovec iov[2]; + + size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len); + + nlh = (struct nlmsghdr *)buffer; + nlh->nlmsg_seq = 0; + nlh->nlmsg_pid = getpid(); + nlh->nlmsg_type = NLMSG_DONE; + nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh)); + nlh->nlmsg_flags = 0; + + iov[0].iov_base = nlh; + iov[0].iov_len = sizeof(*nlh); + + iov[1].iov_base = msg; + iov[1].iov_len = size; + + memset(&message, 0, sizeof(message)); + message.msg_name = &addr; + message.msg_namelen = sizeof(addr); + message.msg_iov = iov; + message.msg_iovlen = 2; + + return sendmsg(fd, &message, 0); +} + +int main(void) +{ + int fd, len, nl_group; + int error; + struct cn_msg *message; + struct pollfd pfd; + struct nlmsghdr *incoming_msg; + struct cn_msg *incoming_cn_msg; + int op; + struct hv_vss_msg *vss_msg; + + daemon(1, 0); + openlog("Hyper-V VSS", 0, LOG_USER); + syslog(LOG_INFO, "VSS starting; pid is:%d", getpid()); + + fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + if (fd < 0) { + syslog(LOG_ERR, "netlink socket creation failed; error:%d", fd); + exit(EXIT_FAILURE); + } + addr.nl_family = AF_NETLINK; + addr.nl_pad = 0; + addr.nl_pid = 0; + addr.nl_groups = 0; + + + error = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (error < 0) { + syslog(LOG_ERR, "bind failed; error:%d", error); + close(fd); + exit(EXIT_FAILURE); + } + nl_group = CN_VSS_IDX; + setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group)); + /* + * Register ourselves with the kernel. + */ + message = (struct cn_msg *)vss_send_buffer; + message->id.idx = CN_VSS_IDX; + message->id.val = CN_VSS_VAL; + message->ack = 0; + vss_msg = (struct hv_vss_msg *)message->data; + vss_msg->vss_hdr.operation = VSS_OP_REGISTER; + + message->len = sizeof(struct hv_vss_msg); + + len = netlink_send(fd, message); + if (len < 0) { + syslog(LOG_ERR, "netlink_send failed; error:%d", len); + close(fd); + exit(EXIT_FAILURE); + } + + pfd.fd = fd; + + while (1) { + struct sockaddr *addr_p = (struct sockaddr *) &addr; + socklen_t addr_l = sizeof(addr); + pfd.events = POLLIN; + pfd.revents = 0; + poll(&pfd, 1, -1); + + len = recvfrom(fd, vss_recv_buffer, sizeof(vss_recv_buffer), 0, + addr_p, &addr_l); + + if (len < 0 || addr.nl_pid) { + syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s", + addr.nl_pid, errno, strerror(errno)); + close(fd); + return -1; + } + + incoming_msg = (struct nlmsghdr *)vss_recv_buffer; + + if (incoming_msg->nlmsg_type != NLMSG_DONE) + continue; + + incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg); + vss_msg = (struct hv_vss_msg *)incoming_cn_msg->data; + op = vss_msg->vss_hdr.operation; + error = HV_S_OK; + + switch (op) { + case VSS_OP_FREEZE: + case VSS_OP_THAW: + error = vss_operate(op); + if (error) + error = HV_E_FAIL; + break; + default: + syslog(LOG_ERR, "Illegal op:%d\n", op); + } + vss_msg->error = error; + len = netlink_send(fd, incoming_cn_msg); + if (len < 0) { + syslog(LOG_ERR, "net_link send failed; error:%d", len); + exit(EXIT_FAILURE); + } + } + +} -- cgit From fa882867ae5f8543eb304a1667563f1c99514475 Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Fri, 8 Mar 2013 09:21:46 +0100 Subject: ipack: add ipack_get_device() ipack_put_device() Prepare everything for later use. Signed-off-by: Samuel Iglesias Gonsalvez Signed-off-by: Greg Kroah-Hartman --- drivers/ipack/ipack.c | 12 ++++++++++++ include/linux/ipack.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c index 7ec6b208b1cb..4f913aa88971 100644 --- a/drivers/ipack/ipack.c +++ b/drivers/ipack/ipack.c @@ -461,6 +461,18 @@ void ipack_device_unregister(struct ipack_device *dev) } EXPORT_SYMBOL_GPL(ipack_device_unregister); +void ipack_get_device(struct ipack_device *dev) +{ + get_device(&dev->dev); +} +EXPORT_SYMBOL_GPL(ipack_get_device); + +void ipack_put_device(struct ipack_device *dev) +{ + put_device(&dev->dev); +} +EXPORT_SYMBOL_GPL(ipack_put_device); + static int __init ipack_init(void) { ida_init(&ipack_ida); diff --git a/include/linux/ipack.h b/include/linux/ipack.h index fea12cbb2aeb..def91fd996f4 100644 --- a/include/linux/ipack.h +++ b/include/linux/ipack.h @@ -221,6 +221,9 @@ void ipack_driver_unregister(struct ipack_driver *edrv); int ipack_device_register(struct ipack_device *dev); void ipack_device_unregister(struct ipack_device *dev); +void ipack_get_device(struct ipack_device *dev); +void ipack_put_device(struct ipack_device *dev); + /** * DEFINE_IPACK_DEVICE_TABLE - macro used to describe a IndustryPack table * @_table: device table name -- cgit From e926301b39a07f587ff8c66354a2e2ee4c29162c Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Fri, 8 Mar 2013 09:21:47 +0100 Subject: ipack: split ipack_device_register() in several functions One function is ipack_device_init(). If it fails, the caller should execute ipack_put_device(). The second function is ipack_device_add that only adds the device. If it fails, the caller should execute ipack_put_device(). Then the device is removed with refcount = 0, as device_register() kernel documentation says. ipack_device_del() is added to remove the device. Signed-off-by: Samuel Iglesias Gonsalvez Signed-off-by: Greg Kroah-Hartman --- drivers/ipack/carriers/tpci200.c | 14 +++++++++++++- drivers/ipack/ipack.c | 24 ++++++++++++++---------- include/linux/ipack.h | 39 +++++++++++++++++++++++++++++---------- 3 files changed, 56 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c index 0246b1fddffe..c276fde318e5 100644 --- a/drivers/ipack/carriers/tpci200.c +++ b/drivers/ipack/carriers/tpci200.c @@ -480,6 +480,7 @@ static void tpci200_release_device(struct ipack_device *dev) static int tpci200_create_device(struct tpci200_board *tpci200, int i) { + int ret; enum ipack_space space; struct ipack_device *dev = kzalloc(sizeof(struct ipack_device), GFP_KERNEL); @@ -495,7 +496,18 @@ static int tpci200_create_device(struct tpci200_board *tpci200, int i) + tpci200_space_interval[space] * i; dev->region[space].size = tpci200_space_size[space]; } - return ipack_device_register(dev); + + ret = ipack_device_init(dev); + if (ret < 0) { + ipack_put_device(dev); + return ret; + } + + ret = ipack_device_add(dev); + if (ret < 0) + ipack_put_device(dev); + + return ret; } static int tpci200_pci_probe(struct pci_dev *pdev, diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c index 4f913aa88971..6e066c53acce 100644 --- a/drivers/ipack/ipack.c +++ b/drivers/ipack/ipack.c @@ -227,7 +227,7 @@ static int ipack_unregister_bus_member(struct device *dev, void *data) struct ipack_bus_device *bus = data; if (idev->bus == bus) - ipack_device_unregister(idev); + ipack_device_del(idev); return 1; } @@ -419,7 +419,7 @@ out: return ret; } -int ipack_device_register(struct ipack_device *dev) +int ipack_device_init(struct ipack_device *dev) { int ret; @@ -428,6 +428,7 @@ int ipack_device_register(struct ipack_device *dev) dev->dev.parent = dev->bus->parent; dev_set_name(&dev->dev, "ipack-dev.%u.%u", dev->bus->bus_nr, dev->slot); + device_initialize(&dev->dev); if (dev->bus->ops->set_clockrate(dev, 8)) dev_warn(&dev->dev, "failed to switch to 8 MHz operation for reading of device ID.\n"); @@ -447,19 +448,22 @@ int ipack_device_register(struct ipack_device *dev) dev_err(&dev->dev, "failed to switch to 32 MHz operation.\n"); } - ret = device_register(&dev->dev); - if (ret < 0) - kfree(dev->id); + return 0; +} +EXPORT_SYMBOL_GPL(ipack_device_init); - return ret; +int ipack_device_add(struct ipack_device *dev) +{ + return device_add(&dev->dev); } -EXPORT_SYMBOL_GPL(ipack_device_register); +EXPORT_SYMBOL_GPL(ipack_device_add); -void ipack_device_unregister(struct ipack_device *dev) +void ipack_device_del(struct ipack_device *dev) { - device_unregister(&dev->dev); + device_del(&dev->dev); + ipack_put_device(dev); } -EXPORT_SYMBOL_GPL(ipack_device_unregister); +EXPORT_SYMBOL_GPL(ipack_device_del); void ipack_get_device(struct ipack_device *dev) { diff --git a/include/linux/ipack.h b/include/linux/ipack.h index def91fd996f4..1888e06ddf64 100644 --- a/include/linux/ipack.h +++ b/include/linux/ipack.h @@ -207,19 +207,38 @@ int ipack_driver_register(struct ipack_driver *edrv, struct module *owner, void ipack_driver_unregister(struct ipack_driver *edrv); /** - * ipack_device_register -- register an IPack device with the kernel - * @dev: the new device to register. + * ipack_device_init -- initialize an IPack device + * @dev: the new device to initialize. * - * Register a new IPack device ("module" in IndustryPack jargon). The call - * is done by the carrier driver. The carrier should populate the fields - * bus and slot as well as the region array of @dev prior to calling this - * function. The rest of the fields will be allocated and populated - * during registration. + * Initialize a new IPack device ("module" in IndustryPack jargon). The call + * is done by the carrier driver. The carrier should populate the fields + * bus and slot as well as the region array of @dev prior to calling this + * function. The rest of the fields will be allocated and populated + * during initalization. * - * Return zero on success or error code on failure. + * Return zero on success or error code on failure. + * + * NOTE: _Never_ directly free @dev after calling this function, even + * if it returned an error! Always use ipack_put_device() to give up the + * reference initialized in this function instead. + */ +int ipack_device_init(struct ipack_device *dev); + +/** + * ipack_device_add -- Add an IPack device + * @dev: the new device to add. + * + * Add a new IPack device. The call is done by the carrier driver + * after calling ipack_device_init(). + * + * Return zero on success or error code on failure. + * + * NOTE: _Never_ directly free @dev after calling this function, even + * if it returned an error! Always use ipack_put_device() to give up the + * reference initialized in this function instead. */ -int ipack_device_register(struct ipack_device *dev); -void ipack_device_unregister(struct ipack_device *dev); +int ipack_device_add(struct ipack_device *dev); +void ipack_device_del(struct ipack_device *dev); void ipack_get_device(struct ipack_device *dev); void ipack_put_device(struct ipack_device *dev); -- cgit From 5caf4636259ae3af0efbb9bfc4cd97874b547c7d Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 12 Mar 2013 11:56:46 +0800 Subject: clocksource: Add new feature flag CLOCK_SOURCE_SUSPEND_NONSTOP Some x86 processors have a TSC clocksource, which continues to run even when system is suspended. Also most OMAP platforms have a 32 KHz timer which has similar capability. Add a feature flag so that it could be utilized. Signed-off-by: Feng Tang Signed-off-by: John Stultz --- include/linux/clocksource.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 27cfda427dd9..aa7032c7238f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -206,6 +206,7 @@ struct clocksource { #define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_UNSTABLE 0x40 +#define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) -- cgit From a362db3d6c8a952cbde510b1fa35d0ee001b347e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 16 Mar 2013 04:47:55 +0000 Subject: net: fix some typos in netif features Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index f5e797c0c2a4..d6ee2d008ee4 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -102,8 +102,8 @@ enum { #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) -#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) -#define NETIF_F_UDP_TUNNEL __NETIF_F(UDP_TUNNEL) +#define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) +#define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ -- cgit From 1a2c6181c4a1922021b4d7df373bba612c3e5f04 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sun, 17 Mar 2013 08:23:34 +0000 Subject: tcp: Remove TCPCT TCPCT uses option-number 253, reserved for experimental use and should not be used in production environments. Further, TCPCT does not fully implement RFC 6013. As a nice side-effect, removing TCPCT increases TCP's performance for very short flows: Doing an apache-benchmark with -c 100 -n 100000, sending HTTP-requests for files of 1KB size. before this patch: average (among 7 runs) of 20845.5 Requests/Second after: average (among 7 runs) of 21403.6 Requests/Second Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 - drivers/infiniband/hw/cxgb4/cm.c | 2 +- include/linux/tcp.h | 10 -- include/net/request_sock.h | 8 +- include/net/tcp.h | 89 +---------- include/uapi/linux/tcp.h | 26 ---- net/dccp/ipv4.c | 5 +- net/dccp/ipv6.c | 5 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/syncookies.c | 3 +- net/ipv4/sysctl_net_ipv4.c | 7 - net/ipv4/tcp.c | 267 --------------------------------- net/ipv4/tcp_input.c | 69 +-------- net/ipv4/tcp_ipv4.c | 60 +------- net/ipv4/tcp_minisocks.c | 40 +---- net/ipv4/tcp_output.c | 219 +-------------------------- net/ipv6/syncookies.c | 3 +- net/ipv6/tcp_ipv6.c | 56 +------ 18 files changed, 38 insertions(+), 841 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 18a24c405ac0..17953e2bc3e9 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -175,14 +175,6 @@ tcp_congestion_control - STRING is inherited. [see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ] -tcp_cookie_size - INTEGER - Default size of TCP Cookie Transactions (TCPCT) option, that may be - overridden on a per socket basis by the TCPCT socket option. - Values greater than the maximum (16) are interpreted as the maximum. - Values greater than zero and less than the minimum (8) are interpreted - as the minimum. Odd values are interpreted as the next even value. - Default: 0 (off). - tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8dcc84fd9d30..54fd31fcc332 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2915,7 +2915,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 763c108ee03d..ed6a7456eecd 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -90,9 +90,6 @@ struct tcp_options_received { sack_ok : 4, /* SACK seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ - u8 cookie_plus:6, /* bytes in authenticator/cookie option */ - cookie_out_never:1, - cookie_in_always:1; u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ @@ -102,7 +99,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; - rx_opt->cookie_plus = 0; } /* This is the max number of SACKS that we'll generate and process. It's safe @@ -320,12 +316,6 @@ struct tcp_sock { struct tcp_md5sig_info __rcu *md5sig_info; #endif - /* When the cookie options are generated and exchanged, then this - * object holds a reference to them (cookie_values->kref). Also - * contains related tcp_cookie_transactions fields. - */ - struct tcp_cookie_values *cookie_values; - /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; /* fastopen_rsk points to request_sock that resulted in this big diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a51dbd17c2de..9069e65c1c56 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -27,19 +27,13 @@ struct sk_buff; struct dst_entry; struct proto; -/* empty to "strongly type" an otherwise void parameter. - */ -struct request_values { -}; - struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(struct sock *sk, - struct request_sock *req, - struct request_values *rvp); + struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(struct sock *sk, diff --git a/include/net/tcp.h b/include/net/tcp.h index ab9f947b118b..7f2f17198d75 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -179,7 +179,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ -#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt @@ -454,7 +453,7 @@ extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); extern void tcp_parse_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx, const u8 **hvpp, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); @@ -476,7 +475,6 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc); extern int tcp_disconnect(struct sock *sk, int flags); @@ -1589,91 +1587,6 @@ struct tcp_request_sock_ops { #endif }; -/* Using SHA1 for now, define some constants. - */ -#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) -#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) -#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) - -extern int tcp_cookie_generator(u32 *bakery); - -/** - * struct tcp_cookie_values - each socket needs extra space for the - * cookies, together with (optional) space for any SYN data. - * - * A tcp_sock contains a pointer to the current value, and this is - * cloned to the tcp_timewait_sock. - * - * @cookie_pair: variable data from the option exchange. - * - * @cookie_desired: user specified tcpct_cookie_desired. Zero - * indicates default (sysctl_tcp_cookie_size). - * After cookie sent, remembers size of cookie. - * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. - * - * @s_data_desired: user specified tcpct_s_data_desired. When the - * constant payload is specified (@s_data_constant), - * holds its length instead. - * Range 0 to TCP_MSS_DESIRED. - * - * @s_data_payload: constant data that is to be included in the - * payload of SYN or SYNACK segments when the - * cookie option is present. - */ -struct tcp_cookie_values { - struct kref kref; - u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; - u8 cookie_pair_size; - u8 cookie_desired; - u16 s_data_desired:11, - s_data_constant:1, - s_data_in:1, - s_data_out:1, - s_data_unused:2; - u8 s_data_payload[0]; -}; - -static inline void tcp_cookie_values_release(struct kref *kref) -{ - kfree(container_of(kref, struct tcp_cookie_values, kref)); -} - -/* The length of constant payload data. Note that s_data_desired is - * overloaded, depending on s_data_constant: either the length of constant - * data (returned here) or the limit on variable data. - */ -static inline int tcp_s_data_size(const struct tcp_sock *tp) -{ - return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) - ? tp->cookie_values->s_data_desired - : 0; -} - -/** - * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. - * - * As tcp_request_sock has already been extended in other places, the - * only remaining method is to pass stack values along as function - * parameters. These parameters are not needed after sending SYNACK. - * - * @cookie_bakery: cryptographic secret and message workspace. - * - * @cookie_plus: bytes in authenticator/cookie option, copied from - * struct tcp_options_received (above). - */ -struct tcp_extend_values { - struct request_values rv; - u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; - u8 cookie_plus:6, - cookie_out_never:1, - cookie_in_always:1; -}; - -static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) -{ - return (struct tcp_extend_values *)rvp; -} - extern void tcp_v4_init(void); extern void tcp_init(void); diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 6b1ead0b0c9d..8d776ebc4829 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -102,7 +102,6 @@ enum { #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ -#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ #define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ @@ -199,29 +198,4 @@ struct tcp_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; -/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */ -#define TCP_COOKIE_MIN 8 /* 64-bits */ -#define TCP_COOKIE_MAX 16 /* 128-bits */ -#define TCP_COOKIE_PAIR_SIZE (2*TCP_COOKIE_MAX) - -/* Flags for both getsockopt and setsockopt */ -#define TCP_COOKIE_IN_ALWAYS (1 << 0) /* Discard SYN without cookie */ -#define TCP_COOKIE_OUT_NEVER (1 << 1) /* Prohibit outgoing cookies, - * supercedes everything. */ - -/* Flags for getsockopt */ -#define TCP_S_DATA_IN (1 << 2) /* Was data received? */ -#define TCP_S_DATA_OUT (1 << 3) /* Was data sent? */ - -/* TCP_COOKIE_TRANSACTIONS data */ -struct tcp_cookie_transactions { - __u16 tcpct_flags; /* see above */ - __u8 __tcpct_pad1; /* zero */ - __u8 tcpct_cookie_desired; /* bytes */ - __u16 tcpct_s_data_desired; /* bytes of variable data */ - __u16 tcpct_used; /* bytes in value */ - __u8 tcpct_value[TCP_MSS_DEFAULT]; -}; - - #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4f9f5eb478f1..ebc54fef85a5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; @@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req, NULL)) + if (dccp_v4_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e05981f271e..9c61f9c02fdb 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -213,8 +213,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req, NULL)) + if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 786d97aee751..6acb541c9091 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) { - int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + int err = req->rsk_ops->rtx_syn_ack(parent, req); if (!err) req->num_retrans++; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377fb11c..7f4a5cb8f8d0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cca4550f4082..cb45062c8be0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -732,13 +732,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "tcp_cookie_size", - .data = &sysctl_tcp_cookie_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_thin_linear_timeouts", .data = &sysctl_tcp_thin_linear_timeouts, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8d14573ade77..17a6810af5c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - /* TCP Cookie Transactions */ - if (sysctl_tcp_cookie_size > 0) { - /* Default, cookies without s_data_payload. */ - tp->cookie_values = - kzalloc(sizeof(*tp->cookie_values), - sk->sk_allocation); - if (tp->cookie_values != NULL) - kref_init(&tp->cookie_values->kref); - } /* Presumed zeroed, in order of appearance: * cookie_in_always, cookie_out_never, * s_data_constant, s_data_in, s_data_out @@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = NULL; - - if (sizeof(ctd) > optlen) - return -EINVAL; - if (copy_from_user(&ctd, optval, sizeof(ctd))) - return -EFAULT; - - if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || - ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) - return -EINVAL; - - if (ctd.tcpct_cookie_desired == 0) { - /* default to global value */ - } else if ((0x1 & ctd.tcpct_cookie_desired) || - ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || - ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { - return -EINVAL; - } - - if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { - /* Supercedes all other values */ - lock_sock(sk); - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } - tp->rx_opt.cookie_in_always = 0; /* false */ - tp->rx_opt.cookie_out_never = 1; /* true */ - release_sock(sk); - return err; - } - - /* Allocate ancillary memory before locking. - */ - if (ctd.tcpct_used > 0 || - (tp->cookie_values == NULL && - (sysctl_tcp_cookie_size > 0 || - ctd.tcpct_cookie_desired > 0 || - ctd.tcpct_s_data_desired > 0))) { - cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, - GFP_KERNEL); - if (cvp == NULL) - return -ENOMEM; - - kref_init(&cvp->kref); - } - lock_sock(sk); - tp->rx_opt.cookie_in_always = - (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); - tp->rx_opt.cookie_out_never = 0; /* false */ - - if (tp->cookie_values != NULL) { - if (cvp != NULL) { - /* Changed values are recorded by a changed - * pointer, ensuring the cookie will differ, - * without separately hashing each value later. - */ - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - } else { - cvp = tp->cookie_values; - } - } - - if (cvp != NULL) { - cvp->cookie_desired = ctd.tcpct_cookie_desired; - - if (ctd.tcpct_used > 0) { - memcpy(cvp->s_data_payload, ctd.tcpct_value, - ctd.tcpct_used); - cvp->s_data_desired = ctd.tcpct_used; - cvp->s_data_constant = 1; /* true */ - } else { - /* No constant payload data. */ - cvp->s_data_desired = ctd.tcpct_s_data_desired; - cvp->s_data_constant = 0; /* false */ - } - - tp->cookie_values = cvp; - } - release_sock(sk); - return err; - } default: /* fallthru */ break; @@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = tp->cookie_values; - - if (get_user(len, optlen)) - return -EFAULT; - if (len < sizeof(ctd)) - return -EINVAL; - - memset(&ctd, 0, sizeof(ctd)); - ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? - TCP_COOKIE_IN_ALWAYS : 0) - | (tp->rx_opt.cookie_out_never ? - TCP_COOKIE_OUT_NEVER : 0); - - if (cvp != NULL) { - ctd.tcpct_flags |= (cvp->s_data_in ? - TCP_S_DATA_IN : 0) - | (cvp->s_data_out ? - TCP_S_DATA_OUT : 0); - - ctd.tcpct_cookie_desired = cvp->cookie_desired; - ctd.tcpct_s_data_desired = cvp->s_data_desired; - - memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], - cvp->cookie_pair_size); - ctd.tcpct_used = cvp->cookie_pair_size; - } - - if (put_user(sizeof(ctd), optlen)) - return -EFAULT; - if (copy_to_user(optval, &ctd, sizeof(ctd))) - return -EFAULT; - return 0; - } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; @@ -3409,134 +3279,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif -/* Each Responder maintains up to two secret values concurrently for - * efficient secret rollover. Each secret value has 4 states: - * - * Generating. (tcp_secret_generating != tcp_secret_primary) - * Generates new Responder-Cookies, but not yet used for primary - * verification. This is a short-term state, typically lasting only - * one round trip time (RTT). - * - * Primary. (tcp_secret_generating == tcp_secret_primary) - * Used both for generation and primary verification. - * - * Retiring. (tcp_secret_retiring != tcp_secret_secondary) - * Used for verification, until the first failure that can be - * verified by the newer Generating secret. At that time, this - * cookie's state is changed to Secondary, and the Generating - * cookie's state is changed to Primary. This is a short-term state, - * typically lasting only one round trip time (RTT). - * - * Secondary. (tcp_secret_retiring == tcp_secret_secondary) - * Used for secondary verification, after primary verification - * failures. This state lasts no more than twice the Maximum Segment - * Lifetime (2MSL). Then, the secret is discarded. - */ -struct tcp_cookie_secret { - /* The secret is divided into two parts. The digest part is the - * equivalent of previously hashing a secret and saving the state, - * and serves as an initialization vector (IV). The message part - * serves as the trailing secret. - */ - u32 secrets[COOKIE_WORKSPACE_WORDS]; - unsigned long expires; -}; - -#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) -#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) -#define TCP_SECRET_LIFE (HZ * 600) - -static struct tcp_cookie_secret tcp_secret_one; -static struct tcp_cookie_secret tcp_secret_two; - -/* Essentially a circular list, without dynamic allocation. */ -static struct tcp_cookie_secret *tcp_secret_generating; -static struct tcp_cookie_secret *tcp_secret_primary; -static struct tcp_cookie_secret *tcp_secret_retiring; -static struct tcp_cookie_secret *tcp_secret_secondary; - -static DEFINE_SPINLOCK(tcp_secret_locker); - -/* Select a pseudo-random word in the cookie workspace. - */ -static inline u32 tcp_cookie_work(const u32 *ws, const int n) -{ - return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; -} - -/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. - * Called in softirq context. - * Returns: 0 for success. - */ -int tcp_cookie_generator(u32 *bakery) -{ - unsigned long jiffy = jiffies; - - if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { - spin_lock_bh(&tcp_secret_locker); - if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { - /* refreshed by another */ - memcpy(bakery, - &tcp_secret_generating->secrets[0], - COOKIE_WORKSPACE_WORDS); - } else { - /* still needs refreshing */ - get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); - - /* The first time, paranoia assumes that the - * randomization function isn't as strong. But, - * this secret initialization is delayed until - * the last possible moment (packet arrival). - * Although that time is observable, it is - * unpredictably variable. Mash in the most - * volatile clock bits available, and expire the - * secret extra quickly. - */ - if (unlikely(tcp_secret_primary->expires == - tcp_secret_secondary->expires)) { - struct timespec tv; - - getnstimeofday(&tv); - bakery[COOKIE_DIGEST_WORDS+0] ^= - (u32)tv.tv_nsec; - - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_1MSL - + (0x0f & tcp_cookie_work(bakery, 0)); - } else { - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_LIFE - + (0xff & tcp_cookie_work(bakery, 1)); - tcp_secret_primary->expires = jiffy - + TCP_SECRET_2MSL - + (0x1f & tcp_cookie_work(bakery, 2)); - } - memcpy(&tcp_secret_secondary->secrets[0], - bakery, COOKIE_WORKSPACE_WORDS); - - rcu_assign_pointer(tcp_secret_generating, - tcp_secret_secondary); - rcu_assign_pointer(tcp_secret_retiring, - tcp_secret_primary); - /* - * Neither call_rcu() nor synchronize_rcu() needed. - * Retiring data is not freed. It is replaced after - * further (locked) pointer updates, and a quiet time - * (minimum 1MSL, maximum LIFE - 2MSL). - */ - } - spin_unlock_bh(&tcp_secret_locker); - } else { - rcu_read_lock_bh(); - memcpy(bakery, - &rcu_dereference(tcp_secret_generating)->secrets[0], - COOKIE_WORKSPACE_WORDS); - rcu_read_unlock_bh(); - } - return 0; -} -EXPORT_SYMBOL(tcp_cookie_generator); - void tcp_done(struct sock *sk) { struct request_sock *req = tcp_sk(sk)->fastopen_rsk; @@ -3591,7 +3333,6 @@ void __init tcp_init(void) unsigned long limit; int max_rshare, max_wshare, cnt; unsigned int i; - unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3667,13 +3408,5 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); - memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); - memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); - tcp_secret_one.expires = jiffy; /* past due */ - tcp_secret_two.expires = jiffy; /* past due */ - tcp_secret_generating = &tcp_secret_one; - tcp_secret_primary = &tcp_secret_one; - tcp_secret_retiring = &tcp_secret_two; - tcp_secret_secondary = &tcp_secret_two; tcp_tasklet_init(); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 836d74dd0187..19f0149fb6a2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3760,8 +3760,8 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, - const u8 **hvpp, int estab, +void tcp_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) { const unsigned char *ptr; @@ -3845,31 +3845,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o */ break; #endif - case TCPOPT_COOKIE: - /* This option is variable length. - */ - switch (opsize) { - case TCPOLEN_COOKIE_BASE: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_PAIR: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_MIN+0: - case TCPOLEN_COOKIE_MIN+2: - case TCPOLEN_COOKIE_MIN+4: - case TCPOLEN_COOKIE_MIN+6: - case TCPOLEN_COOKIE_MAX: - /* 16-bit multiple */ - opt_rx->cookie_plus = opsize; - *hvpp = ptr; - break; - default: - /* ignore option */ - break; - } - break; - case TCPOPT_EXP: /* Fast Open option shares code 254 using a * 16 bits magic number. It's valid only in @@ -3915,8 +3890,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr * If it is wrong it falls back on tcp_parse_options(). */ static bool tcp_fast_parse_options(const struct sk_buff *skb, - const struct tcphdr *th, - struct tcp_sock *tp, const u8 **hvpp) + const struct tcphdr *th, struct tcp_sock *tp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3930,7 +3904,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, return true; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5311,12 +5285,10 @@ out: static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, int syn_inerr) { - const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp, &hash_location) && - tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5670,12 +5642,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; - const u8 *hash_location; /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(synack, &opt, &hash_location, 0, NULL); + tcp_parse_options(synack, &opt, 0, NULL); mss = opt.mss_clamp; } @@ -5706,14 +5677,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len) { - const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5810,30 +5779,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * is initialized. */ tp->copied_seq = tp->rcv_nxt; - if (cvp != NULL && - cvp->cookie_pair_size > 0 && - tp->rx_opt.cookie_plus > 0) { - int cookie_size = tp->rx_opt.cookie_plus - - TCPOLEN_COOKIE_BASE; - int cookie_pair_size = cookie_size - + cvp->cookie_desired; - - /* A cookie extension option was sent and returned. - * Note that each incoming SYNACK replaces the - * Responder cookie. The initial exchange is most - * fragile, as protection against spoofing relies - * entirely upon the sequence and timestamp (above). - * This replacement strategy allows the correct pair to - * pass through, while any others will be filtered via - * Responder verification later. - */ - if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { - memcpy(&cvp->cookie_pair[cvp->cookie_desired], - hash_location, cookie_size); - cvp->cookie_pair_size = cookie_pair_size; - } - } - smp_mb(); tcp_finish_connect(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b7ab868c8284..b27c758ca23f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping, bool nocache) { @@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, 0, false); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, static int tcp_v4_conn_req_fastopen(struct sock *sk, struct sk_buff *skb, struct sk_buff *skb_synack, - struct request_sock *req, - struct request_values *rvp) + struct request_sock *req) { struct tcp_sock *tp = tcp_sk(sk); struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; @@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, - want_cookie ? NULL : &foc); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_release; - - /* Secret recipe starts with IP addresses */ - *mess++ ^= (__force u32)daddr; - *mess++ ^= (__force u32)saddr; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_release; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * of tcp_v4_send_synack()->tcp_select_initial_window(). */ skb_synack = tcp_make_synack(sk, dst, req, - (struct request_values *)&tmp_ext, fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); if (skb_synack) { @@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (fastopen_cookie_present(&foc) && foc.len != 0) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, - (struct request_values *)&tmp_ext)) + } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) goto drop_and_free; return 0; @@ -2241,12 +2199,6 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* TCP Cookie Transactions */ - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } BUG_ON(tp->fastopen_rsk != NULL); /* If socket is aborted during connect operation */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4bdb09fca401..8f0234f8bb95 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; @@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp = tcp_sk(newsk); - struct tcp_sock *oldtp = tcp_sk(sk); - struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - - /* TCP Cookie Transactions require space for the cookie pair, - * as it differs for each connection. There is no need to - * copy any s_data_payload stored at the original socket. - * Failure will prevent resuming the connection. - * - * Presumed copied, in order of appearance: - * cookie_in_always, cookie_out_never - */ - if (oldcvp != NULL) { - struct tcp_cookie_values *newcvp = - kzalloc(sizeof(*newtp->cookie_values), - GFP_ATOMIC); - - if (newcvp != NULL) { - kref_init(&newcvp->kref); - newcvp->cookie_desired = - oldcvp->cookie_desired; - newtp->cookie_values = newcvp; - } else { - /* Not Yet Implemented */ - newtp->cookie_values = NULL; - } - } /* Now setup tcp_sock */ newtp->pred_flags = 0; @@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); @@ -460,8 +432,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = newtp->pushed_seq = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; @@ -538,7 +509,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, bool fastopen) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); @@ -548,7 +518,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -648,7 +618,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != - tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) + tcp_rsk(req)->snt_isn + 1)) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8e7742f0b5d2..ac5871ebe086 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,9 +65,6 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ -EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -386,7 +383,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) -#define OPTION_COOKIE_EXTENSION (1 << 4) #define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { @@ -400,36 +396,6 @@ struct tcp_out_options { struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; -/* The sysctl int routines are generic, so check consistency here. - */ -static u8 tcp_cookie_size_check(u8 desired) -{ - int cookie_size; - - if (desired > 0) - /* previously specified */ - return desired; - - cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); - if (cookie_size <= 0) - /* no default specified */ - return 0; - - if (cookie_size <= TCP_COOKIE_MIN) - /* value too small, specify minimum */ - return TCP_COOKIE_MIN; - - if (cookie_size >= TCP_COOKIE_MAX) - /* value too large, specify maximum */ - return TCP_COOKIE_MAX; - - if (cookie_size & 1) - /* 8-bit multiple, illegal, fix it */ - cookie_size++; - - return (u8)cookie_size; -} - /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -448,27 +414,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, { u16 options = opts->options; /* mungable copy */ - /* Having both authentication and cookies for security is redundant, - * and there's certainly not enough room. Instead, the cookie-less - * extension variant is proposed. - * - * Consider the pessimal case with authentication. The options - * could look like: - * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 - */ if (unlikely(OPTION_MD5 & options)) { - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - *ptr++ = htonl((TCPOPT_COOKIE << 24) | - (TCPOLEN_COOKIE_BASE << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } else { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } - options &= ~OPTION_COOKIE_EXTENSION; + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; @@ -497,44 +445,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - /* Specification requires after timestamp, so do it now. - * - * Consider the pessimal case without authentication. The options - * could look like: - * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 - */ - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - __u8 *cookie_copy = opts->hash_location; - u8 cookie_size = opts->hash_size; - - /* 8-bit multiple handled in tcp_cookie_size_check() above, - * and elsewhere. - */ - if (0x2 & cookie_size) { - __u8 *p = (__u8 *)ptr; - - /* 16-bit multiple */ - *p++ = TCPOPT_COOKIE; - *p++ = TCPOLEN_COOKIE_BASE + cookie_size; - *p++ = *cookie_copy++; - *p++ = *cookie_copy++; - ptr++; - cookie_size -= 2; - } else { - /* 32-bit multiple */ - *ptr++ = htonl(((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_COOKIE << 8) | - TCPOLEN_COOKIE_BASE) + - cookie_size); - } - - if (cookie_size > 0) { - memcpy(ptr, cookie_copy, cookie_size); - ptr += (cookie_size / 4); - } - } - if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -593,11 +503,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? - tcp_cookie_size_check(cvp->cookie_desired) : - 0; struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG @@ -649,52 +555,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, tp->syn_fastopen = 1; } } - /* Note that timestamps are required by the specification. - * - * Odd numbers of bytes are prohibited by the specification, ensuring - * that the cookie is 16-bit aligned, and the resulting cookie pair is - * 32-bit aligned. - */ - if (*md5 == NULL && - (OPTION_TS & opts->options) && - cookie_size > 0) { - int need = TCPOLEN_COOKIE_BASE + cookie_size; - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - - if (need > remaining) { - /* try shrinking cookie to fit */ - cookie_size -= 2; - need -= 4; - } - } - while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { - cookie_size -= 4; - need -= 4; - } - if (TCP_COOKIE_MIN <= cookie_size) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; - opts->hash_size = cookie_size; - - /* Remember for future incarnations. */ - cvp->cookie_desired = cookie_size; - - if (cvp->cookie_desired != cvp->cookie_pair_size) { - /* Currently use random bytes as a nonce, - * assuming these are completely unpredictable - * by hostile users of the same system. - */ - get_random_bytes(&cvp->cookie_pair[0], - cookie_size); - cvp->cookie_pair_size = cookie_size; - } - remaining -= need; - } - } return MAX_TCP_OPTION_SPACE - remaining; } @@ -704,14 +565,10 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? - xvp->cookie_plus : - 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); @@ -759,28 +616,7 @@ static unsigned int tcp_synack_options(struct sock *sk, remaining -= need; } } - /* Similar rationale to tcp_syn_options() applies here, too. - * If the options fit, the same options should fit now! - */ - if (*md5 == NULL && - ireq->tstamp_ok && - cookie_plus > TCPOLEN_COOKIE_BASE) { - int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - } - if (need <= remaining) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; - remaining -= need; - } else { - /* There's no error return, so flag it. */ - xvp->cookie_out_never = 1; /* true */ - opts->hash_size = 0; - } - } + return MAX_TCP_OPTION_SPACE - remaining; } @@ -2802,32 +2638,24 @@ int tcp_send_synack(struct sock *sk) * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer - * rvp: request_values pointer * * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; - struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); - const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; - int s_data_desired = 0; - if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) - s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, - sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2869,9 +2697,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp, foc) - + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + foc) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2889,40 +2716,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPHDR_SYN | TCPHDR_ACK); - if (OPTION_COOKIE_EXTENSION & opts.options) { - if (s_data_desired) { - u8 *buf = skb_put(skb, s_data_desired); - - /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, s_data_desired); - TCP_SKB_CB(skb)->end_seq += s_data_desired; - } - - if (opts.hash_size > 0) { - __u32 workspace[SHA_WORKSPACE_WORDS]; - u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; - u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; - - /* Secret recipe depends on the Timestamp, (future) - * Sequence and Acknowledgment Numbers, Initiator - * Cookie, and others handled by IP variant caller. - */ - *tail-- ^= opts.tsval; - *tail-- ^= tcp_rsk(req)->rcv_isn + 1; - *tail-- ^= TCP_SKB_CB(skb)->seq + 1; - - /* recommended */ - *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); - *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ - - sha_transform((__u32 *)&xvp->cookie_bakery[0], - (char *)mess, - &workspace[0]); - opts.hash_location = - (__u8 *)&xvp->cookie_bakery[0]; - } - } - th->seq = htonl(TCP_SKB_CB(skb)->seq); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8a0848b60b35..d5dda20bd717 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9b6460055df5..0a97add2ab74 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -454,7 +454,6 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping) { struct inet6_request_sock *treq = inet6_rsk(req); @@ -466,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -481,13 +480,12 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) { struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); return res; @@ -940,9 +938,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -980,50 +976,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *d; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_free; - - /* Secret recipe starts with IP addresses */ - d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_free; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1101,7 +1054,6 @@ have_isn: goto drop_and_release; if (tcp_v6_send_synack(sk, dst, &fl6, req, - (struct request_values *)&tmp_ext, skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; -- cgit From 8655cc490e83f66476de8c1294411860325c3531 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Feb 2013 21:10:30 +0000 Subject: iio: Add broken out info_mask fields for shared_by_type and separate This simplifies the code, removes an extensive layer of 'helper' macros and gives us twice as much room to play with in these masks before we have any need to be clever. Signed-off-by: Jonathan Cameron Acked-by: Lars-Peter Clausen --- drivers/iio/industrialio-core.c | 30 ++++++++++++++++++++++++++++++ include/linux/iio/iio.h | 10 +++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 6d8b02785647..f05289f7b512 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -708,6 +708,36 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev, goto error_ret; attrcount++; } + for_each_set_bit(i, &chan->info_mask_separate, sizeof(long)*8) { + ret = __iio_add_chan_devattr(iio_chan_info_postfix[i], + chan, + &iio_read_channel_info, + &iio_write_channel_info, + i, + 0, + &indio_dev->dev, + &indio_dev->channel_attr_list); + if (ret < 0) + goto error_ret; + attrcount++; + } + for_each_set_bit(i, &chan->info_mask_shared_by_type, sizeof(long)*8) { + ret = __iio_add_chan_devattr(iio_chan_info_postfix[i], + chan, + &iio_read_channel_info, + &iio_write_channel_info, + i, + 1, + &indio_dev->dev, + &indio_dev->channel_attr_list); + if (ret == -EBUSY) { + ret = 0; + continue; + } else if (ret < 0) { + goto error_ret; + } + attrcount++; + } if (chan->ext_info) { unsigned int i = 0; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index da8c776ba0bd..76976509d628 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -218,6 +218,10 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev, * endianness: little or big endian * @info_mask: What information is to be exported about this channel. * This includes calibbias, scale etc. + * @info_mask_separate: What information is to be exported that is specific to + * this channel. + * @info_mask_shared_by_type: What information is to be exported that is shared +* by all channels of the same type. * @event_mask: What events can this channel produce. * @ext_info: Array of extended info attributes for this channel. * The array is NULL terminated, the last element should @@ -253,6 +257,8 @@ struct iio_chan_spec { enum iio_endian endianness; } scan_type; long info_mask; + long info_mask_separate; + long info_mask_shared_by_type; long event_mask; const struct iio_chan_spec_ext_info *ext_info; const char *extend_name; @@ -275,7 +281,9 @@ struct iio_chan_spec { static inline bool iio_channel_has_info(const struct iio_chan_spec *chan, enum iio_chan_info_enum type) { - return chan->info_mask & IIO_CHAN_INFO_BITS(type); + return (chan->info_mask & IIO_CHAN_INFO_BITS(type)) | + (chan->info_mask_separate & type) | + (chan->info_mask_shared_by_type & type); } #define IIO_ST(si, rb, sb, sh) \ -- cgit From 5ea864940e8ab63c2669902650b807d0507c390d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 27 Feb 2013 19:41:59 +0000 Subject: iio:st_sensors move to info_mask_(shared_by_type/separate) The original info_mask is going away in favour of the broken out versions. Signed-off-by: Jonathan Cameron Acked-by: Denis Ciocca --- include/linux/iio/common/st_sensors.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 1f86a97ab2e2..7c0c0d3aef35 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -15,6 +15,7 @@ #include #include #include +#include #define ST_SENSORS_TX_MAX_LENGTH 2 #define ST_SENSORS_RX_MAX_LENGTH 6 @@ -45,8 +46,8 @@ { \ .type = device_type, \ .modified = 1, \ - .info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \ - IIO_CHAN_INFO_SCALE_SEPARATE_BIT, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE), \ .scan_index = index, \ .channel2 = mod, \ .address = addr, \ -- cgit From ea0c68006321eea78a3702a9d68ff9395e06da38 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 27 Feb 2013 19:42:39 +0000 Subject: iio:adc:ad_sigma_delta move to info_mask_(shared_by_type/separate) The original info_mask is going away in favour of the broken out versions. Signed-off-by: Jonathan Cameron Acked-by: Lars-Peter Clausen --- include/linux/iio/adc/ad_sigma_delta.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 2e4eab9868a3..e7fdec4db9da 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -133,9 +133,9 @@ int ad_sd_validate_trigger(struct iio_dev *indio_dev, struct iio_trigger *trig); .channel2 = (_channel2), \ .address = (_address), \ .extend_name = (_extend_name), \ - .info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \ - IIO_CHAN_INFO_SCALE_SHARED_BIT | \ - IIO_CHAN_INFO_OFFSET_SEPARATE_BIT, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_OFFSET), \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ .scan_index = (_si), \ .scan_type = { \ .sign = 'u', \ -- cgit From b841f8abc27466026ecf4e5590c6c737c2e86e7e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 27 Feb 2013 19:35:55 +0000 Subject: staging:iio:accel:adis move to info_mask_(shared_by_type/separate) The original info_mask is going away in favour of the broken out versions. Signed-off-by: Jonathan Cameron Acked-by: Lars-Peter Clausen --- drivers/staging/iio/accel/adis16201_core.c | 8 +++---- drivers/staging/iio/accel/adis16203_core.c | 2 +- drivers/staging/iio/accel/adis16204_core.c | 8 +++---- drivers/staging/iio/accel/adis16209_core.c | 4 ++-- drivers/staging/iio/accel/adis16240_core.c | 9 +++----- drivers/staging/iio/gyro/adis16260_core.c | 4 ++-- include/linux/iio/imu/adis.h | 34 +++++++++++++++--------------- 7 files changed, 32 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/iio/accel/adis16201_core.c b/drivers/staging/iio/accel/adis16201_core.c index 9e5791ff2a04..ab8ec7af88b4 100644 --- a/drivers/staging/iio/accel/adis16201_core.c +++ b/drivers/staging/iio/accel/adis16201_core.c @@ -134,14 +134,14 @@ static const struct iio_chan_spec adis16201_channels[] = { ADIS_SUPPLY_CHAN(ADIS16201_SUPPLY_OUT, ADIS16201_SCAN_SUPPLY, 12), ADIS_TEMP_CHAN(ADIS16201_TEMP_OUT, ADIS16201_SCAN_TEMP, 12), ADIS_ACCEL_CHAN(X, ADIS16201_XACCL_OUT, ADIS16201_SCAN_ACC_X, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 14), ADIS_ACCEL_CHAN(Y, ADIS16201_YACCL_OUT, ADIS16201_SCAN_ACC_Y, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 14), ADIS_AUX_ADC_CHAN(ADIS16201_AUX_ADC, ADIS16201_SCAN_AUX_ADC, 12), ADIS_INCLI_CHAN(X, ADIS16201_XINCL_OUT, ADIS16201_SCAN_INCLI_X, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 14), ADIS_INCLI_CHAN(X, ADIS16201_YINCL_OUT, ADIS16201_SCAN_INCLI_Y, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 14), IIO_CHAN_SOFT_TIMESTAMP(7) }; diff --git a/drivers/staging/iio/accel/adis16203_core.c b/drivers/staging/iio/accel/adis16203_core.c index 8c235273ff13..b08ac8fdeee2 100644 --- a/drivers/staging/iio/accel/adis16203_core.c +++ b/drivers/staging/iio/accel/adis16203_core.c @@ -102,7 +102,7 @@ static const struct iio_chan_spec adis16203_channels[] = { ADIS_SUPPLY_CHAN(ADIS16203_SUPPLY_OUT, ADIS16203_SCAN_SUPPLY, 12), ADIS_AUX_ADC_CHAN(ADIS16203_AUX_ADC, ADIS16203_SCAN_AUX_ADC, 12), ADIS_INCLI_CHAN(X, ADIS16203_XINCL_OUT, ADIS16203_SCAN_INCLI_X, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 14), /* Fixme: Not what it appears to be - see data sheet */ ADIS_INCLI_CHAN(Y, ADIS16203_YINCL_OUT, ADIS16203_SCAN_INCLI_Y, 0, 14), ADIS_TEMP_CHAN(ADIS16203_TEMP_OUT, ADIS16203_SCAN_TEMP, 12), diff --git a/drivers/staging/iio/accel/adis16204_core.c b/drivers/staging/iio/accel/adis16204_core.c index f3592668e066..792ec25a50dc 100644 --- a/drivers/staging/iio/accel/adis16204_core.c +++ b/drivers/staging/iio/accel/adis16204_core.c @@ -140,13 +140,11 @@ static const struct iio_chan_spec adis16204_channels[] = { ADIS_AUX_ADC_CHAN(ADIS16204_AUX_ADC, ADIS16204_SCAN_AUX_ADC, 12), ADIS_TEMP_CHAN(ADIS16204_TEMP_OUT, ADIS16204_SCAN_TEMP, 12), ADIS_ACCEL_CHAN(X, ADIS16204_XACCL_OUT, ADIS16204_SCAN_ACC_X, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_PEAK_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 14), ADIS_ACCEL_CHAN(Y, ADIS16204_YACCL_OUT, ADIS16204_SCAN_ACC_Y, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_PEAK_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 14), ADIS_ACCEL_CHAN(ROOT_SUM_SQUARED_X_Y, ADIS16204_XY_RSS_OUT, - ADIS16204_SCAN_ACC_XY, IIO_CHAN_INFO_PEAK_SEPARATE_BIT, 14), + ADIS16204_SCAN_ACC_XY, BIT(IIO_CHAN_INFO_PEAK), 14), IIO_CHAN_SOFT_TIMESTAMP(5), }; diff --git a/drivers/staging/iio/accel/adis16209_core.c b/drivers/staging/iio/accel/adis16209_core.c index 69c50ee44ce3..323c169d699c 100644 --- a/drivers/staging/iio/accel/adis16209_core.c +++ b/drivers/staging/iio/accel/adis16209_core.c @@ -133,9 +133,9 @@ static const struct iio_chan_spec adis16209_channels[] = { ADIS_SUPPLY_CHAN(ADIS16209_SUPPLY_OUT, ADIS16209_SCAN_SUPPLY, 14), ADIS_TEMP_CHAN(ADIS16209_TEMP_OUT, ADIS16209_SCAN_TEMP, 12), ADIS_ACCEL_CHAN(X, ADIS16209_XACCL_OUT, ADIS16209_SCAN_ACC_X, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 14), ADIS_ACCEL_CHAN(Y, ADIS16209_YACCL_OUT, ADIS16209_SCAN_ACC_Y, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 14), ADIS_AUX_ADC_CHAN(ADIS16209_AUX_ADC, ADIS16209_SCAN_AUX_ADC, 12), ADIS_INCLI_CHAN(X, ADIS16209_XINCL_OUT, ADIS16209_SCAN_INCLI_X, 0, 14), ADIS_INCLI_CHAN(Y, ADIS16209_YINCL_OUT, ADIS16209_SCAN_INCLI_Y, 0, 14), diff --git a/drivers/staging/iio/accel/adis16240_core.c b/drivers/staging/iio/accel/adis16240_core.c index e97fa0b0233d..fd1f0fd0fba8 100644 --- a/drivers/staging/iio/accel/adis16240_core.c +++ b/drivers/staging/iio/accel/adis16240_core.c @@ -176,14 +176,11 @@ static const struct iio_chan_spec adis16240_channels[] = { ADIS_SUPPLY_CHAN(ADIS16240_SUPPLY_OUT, ADIS16240_SCAN_SUPPLY, 10), ADIS_AUX_ADC_CHAN(ADIS16240_AUX_ADC, ADIS16240_SCAN_AUX_ADC, 10), ADIS_ACCEL_CHAN(X, ADIS16240_XACCL_OUT, ADIS16240_SCAN_ACC_X, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_PEAK_SEPARATE_BIT, 10), + BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 10), ADIS_ACCEL_CHAN(Y, ADIS16240_YACCL_OUT, ADIS16240_SCAN_ACC_Y, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_PEAK_SEPARATE_BIT, 10), + BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 10), ADIS_ACCEL_CHAN(Z, ADIS16240_ZACCL_OUT, ADIS16240_SCAN_ACC_Z, - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_PEAK_SEPARATE_BIT, 10), + BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 10), ADIS_TEMP_CHAN(ADIS16240_TEMP_OUT, ADIS16240_SCAN_TEMP, 10), IIO_CHAN_SOFT_TIMESTAMP(6) }; diff --git a/drivers/staging/iio/gyro/adis16260_core.c b/drivers/staging/iio/gyro/adis16260_core.c index 6e80b8c768ae..620d63fd099b 100644 --- a/drivers/staging/iio/gyro/adis16260_core.c +++ b/drivers/staging/iio/gyro/adis16260_core.c @@ -124,8 +124,8 @@ static IIO_DEVICE_ATTR(sampling_frequency_available, #define ADIS16260_GYRO_CHANNEL_SET(axis, mod) \ struct iio_chan_spec adis16260_channels_##axis[] = { \ ADIS_GYRO_CHAN(mod, ADIS16260_GYRO_OUT, ADIS16260_SCAN_GYRO, \ - IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | \ - IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT, 14), \ + BIT(IIO_CHAN_INFO_CALIBBIAS) | \ + BIT(IIO_CHAN_INFO_CALIBSCALE), 14), \ ADIS_INCLI_CHAN(mod, ADIS16260_ANGL_OUT, ADIS16260_SCAN_ANGL, 0, 14), \ ADIS_TEMP_CHAN(ADIS16260_TEMP_OUT, ADIS16260_SCAN_TEMP, 12), \ ADIS_SUPPLY_CHAN(ADIS16260_SUPPLY_OUT, ADIS16260_SCAN_SUPPLY, 12), \ diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index ff781dca2e9a..b665dc7f017b 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -162,8 +162,8 @@ int adis_single_conversion(struct iio_dev *indio_dev, .indexed = 1, \ .channel = (chan), \ .extend_name = name, \ - .info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \ - IIO_CHAN_INFO_SCALE_SEPARATE_BIT, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE), \ .address = (addr), \ .scan_index = (si), \ .scan_type = { \ @@ -184,9 +184,9 @@ int adis_single_conversion(struct iio_dev *indio_dev, .type = IIO_TEMP, \ .indexed = 1, \ .channel = 0, \ - .info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \ - IIO_CHAN_INFO_SCALE_SEPARATE_BIT | \ - IIO_CHAN_INFO_OFFSET_SEPARATE_BIT, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_OFFSET), \ .address = (addr), \ .scan_index = (si), \ .scan_type = { \ @@ -197,13 +197,13 @@ int adis_single_conversion(struct iio_dev *indio_dev, }, \ } -#define ADIS_MOD_CHAN(_type, mod, addr, si, info, bits) { \ +#define ADIS_MOD_CHAN(_type, mod, addr, si, info_sep, bits) { \ .type = (_type), \ .modified = 1, \ .channel2 = IIO_MOD_ ## mod, \ - .info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \ - IIO_CHAN_INFO_SCALE_SHARED_BIT | \ - info, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + info_sep, \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ .address = (addr), \ .scan_index = (si), \ .scan_type = { \ @@ -214,17 +214,17 @@ int adis_single_conversion(struct iio_dev *indio_dev, }, \ } -#define ADIS_ACCEL_CHAN(mod, addr, si, info, bits) \ - ADIS_MOD_CHAN(IIO_ACCEL, mod, addr, si, info, bits) +#define ADIS_ACCEL_CHAN(mod, addr, si, info_sep, bits) \ + ADIS_MOD_CHAN(IIO_ACCEL, mod, addr, si, info_sep, bits) -#define ADIS_GYRO_CHAN(mod, addr, si, info, bits) \ - ADIS_MOD_CHAN(IIO_ANGL_VEL, mod, addr, si, info, bits) +#define ADIS_GYRO_CHAN(mod, addr, si, info_sep, bits) \ + ADIS_MOD_CHAN(IIO_ANGL_VEL, mod, addr, si, info_sep, bits) -#define ADIS_INCLI_CHAN(mod, addr, si, info, bits) \ - ADIS_MOD_CHAN(IIO_INCLI, mod, addr, si, info, bits) +#define ADIS_INCLI_CHAN(mod, addr, si, info_sep, bits) \ + ADIS_MOD_CHAN(IIO_INCLI, mod, addr, si, info_sep, bits) -#define ADIS_ROT_CHAN(mod, addr, si, info, bits) \ - ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info, bits) +#define ADIS_ROT_CHAN(mod, addr, si, info_sep, bits) \ + ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info_sep, bits) #ifdef CONFIG_IIO_ADIS_LIB_BUFFER -- cgit From b9606e2aa97d3d831d1236c0e789a33a2f867a8a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 27 Feb 2013 19:43:52 +0000 Subject: iio:core drop info_mask from struct iio_info This has been replaced by the pair of masks info_mask_separate and info_mask_shared_by_type. Other variants may follow. Signed-off-by: Jonathan Cameron Acked-by: Lars-Peter Clausen --- drivers/iio/industrialio-core.c | 17 ---------- include/linux/iio/iio.h | 73 +---------------------------------------- 2 files changed, 1 insertion(+), 89 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index f05289f7b512..e145931ef1b8 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -691,23 +691,6 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev, if (chan->channel < 0) return 0; - for_each_set_bit(i, &chan->info_mask, sizeof(long)*8) { - ret = __iio_add_chan_devattr(iio_chan_info_postfix[i/2], - chan, - &iio_read_channel_info, - &iio_write_channel_info, - i/2, - !(i%2), - &indio_dev->dev, - &indio_dev->channel_attr_list); - if (ret == -EBUSY && (i%2 == 0)) { - ret = 0; - continue; - } - if (ret < 0) - goto error_ret; - attrcount++; - } for_each_set_bit(i, &chan->info_mask_separate, sizeof(long)*8) { ret = __iio_add_chan_devattr(iio_chan_info_postfix[i], chan, diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 76976509d628..8d171f427632 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -38,76 +38,6 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_HYSTERESIS, }; -#define IIO_CHAN_INFO_SHARED_BIT(type) BIT(type*2) -#define IIO_CHAN_INFO_SEPARATE_BIT(type) BIT(type*2 + 1) -#define IIO_CHAN_INFO_BITS(type) (IIO_CHAN_INFO_SHARED_BIT(type) | \ - IIO_CHAN_INFO_SEPARATE_BIT(type)) - -#define IIO_CHAN_INFO_RAW_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_RAW) -#define IIO_CHAN_INFO_PROCESSED_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_PROCESSED) -#define IIO_CHAN_INFO_SCALE_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_SCALE) -#define IIO_CHAN_INFO_SCALE_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_SCALE) -#define IIO_CHAN_INFO_OFFSET_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_OFFSET) -#define IIO_CHAN_INFO_OFFSET_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_OFFSET) -#define IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_CALIBSCALE) -#define IIO_CHAN_INFO_CALIBSCALE_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_CALIBSCALE) -#define IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_CALIBBIAS) -#define IIO_CHAN_INFO_CALIBBIAS_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_CALIBBIAS) -#define IIO_CHAN_INFO_PEAK_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_PEAK) -#define IIO_CHAN_INFO_PEAK_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_PEAK) -#define IIO_CHAN_INFO_PEAKSCALE_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_PEAKSCALE) -#define IIO_CHAN_INFO_PEAKSCALE_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_PEAKSCALE) -#define IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT( \ - IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW) -#define IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT( \ - IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW) -#define IIO_CHAN_INFO_AVERAGE_RAW_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_AVERAGE_RAW) -#define IIO_CHAN_INFO_AVERAGE_RAW_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_AVERAGE_RAW) -#define IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT( \ - IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY) -#define IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT( \ - IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY) -#define IIO_CHAN_INFO_SAMP_FREQ_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_SAMP_FREQ) -#define IIO_CHAN_INFO_SAMP_FREQ_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_SAMP_FREQ) -#define IIO_CHAN_INFO_FREQUENCY_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_FREQUENCY) -#define IIO_CHAN_INFO_FREQUENCY_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_FREQUENCY) -#define IIO_CHAN_INFO_PHASE_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_PHASE) -#define IIO_CHAN_INFO_PHASE_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_PHASE) -#define IIO_CHAN_INFO_HARDWAREGAIN_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_HARDWAREGAIN) -#define IIO_CHAN_INFO_HARDWAREGAIN_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_HARDWAREGAIN) -#define IIO_CHAN_INFO_HYSTERESIS_SEPARATE_BIT \ - IIO_CHAN_INFO_SEPARATE_BIT(IIO_CHAN_INFO_HYSTERESIS) -#define IIO_CHAN_INFO_HYSTERESIS_SHARED_BIT \ - IIO_CHAN_INFO_SHARED_BIT(IIO_CHAN_INFO_HYSTERESIS) - enum iio_endian { IIO_CPU, IIO_BE, @@ -281,8 +211,7 @@ struct iio_chan_spec { static inline bool iio_channel_has_info(const struct iio_chan_spec *chan, enum iio_chan_info_enum type) { - return (chan->info_mask & IIO_CHAN_INFO_BITS(type)) | - (chan->info_mask_separate & type) | + return (chan->info_mask_separate & type) | (chan->info_mask_shared_by_type & type); } -- cgit From 717bfb5f46f0ee809f6ce04ebdf44521730fff05 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 17 Mar 2013 20:07:25 +0800 Subject: ALSA: snd-usb: handle raw data format of UAC2 devices UAC2 compliant audio devices may announce the capability to transport raw audio data on their endpoints. Catch this and handle it as 'special' stream on the ALSA side. Signed-off-by: Daniel Mack Reported-by: Andreas Koch Signed-off-by: Takashi Iwai --- include/linux/usb/audio-v2.h | 2 ++ sound/usb/format.c | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index ed13053153f4..c5f2158ab00e 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -170,6 +170,8 @@ struct uac2_as_header_descriptor { __u8 iChannelNames; } __attribute__((packed)); +#define UAC2_FORMAT_TYPE_I_RAW_DATA (1 << 31) + /* 4.10.1.2 Class-Specific AS Isochronous Audio Data Endpoint Descriptor */ struct uac2_iso_endpoint_descriptor { diff --git a/sound/usb/format.c b/sound/usb/format.c index b30d6fb89b40..a695cafc0599 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -47,7 +47,7 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, int protocol) { int sample_width, sample_bytes; - u64 pcm_formats; + u64 pcm_formats = 0; switch (protocol) { case UAC_VERSION_1: @@ -63,14 +63,17 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, struct uac_format_type_i_ext_descriptor *fmt = _fmt; sample_width = fmt->bBitResolution; sample_bytes = fmt->bSubslotSize; + + if (format & UAC2_FORMAT_TYPE_I_RAW_DATA) + pcm_formats |= SNDRV_PCM_FMTBIT_SPECIAL; + format <<= 1; break; } } - pcm_formats = 0; - - if (format == 0 || format == (1 << UAC_FORMAT_TYPE_I_UNDEFINED)) { + if ((pcm_formats == 0) && + (format == 0 || format == (1 << UAC_FORMAT_TYPE_I_UNDEFINED))) { /* some devices don't define this correctly... */ snd_printdd(KERN_INFO "%d:%u:%d : format type 0 is detected, processed as PCM\n", chip->dev->devnum, fp->iface, fp->altsetting); -- cgit From 1f0972f5b05a674d73e4eb314fa1b6c78e37aef1 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 12 Mar 2013 13:24:19 +0200 Subject: usb: phy: nop: Add some parameters to platform data Add clk_rate parameter to platform data. If supplied, the NOP phy driver will program the clock to that rate during probe. Also add 2 flags, needs_vcc and needs_reset. If the flag is set and the regulator couldn't be found then the driver will bail out with -EPROBE_DEFER. Signed-off-by: Roger Quadros Acked-by: Felipe Balbi Signed-off-by: Felipe Balbi --- include/linux/usb/nop-usb-xceiv.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/nop-usb-xceiv.h b/include/linux/usb/nop-usb-xceiv.h index 28884c717411..148d35171aac 100644 --- a/include/linux/usb/nop-usb-xceiv.h +++ b/include/linux/usb/nop-usb-xceiv.h @@ -5,6 +5,11 @@ struct nop_usb_xceiv_platform_data { enum usb_phy_type type; + unsigned long clk_rate; + + /* if set fails with -EPROBE_DEFER if can't get regulator */ + unsigned int needs_vcc:1; + unsigned int needs_reset:1; }; #if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE)) -- cgit From 4495afcf713adb5bdb16504052952bdd0d11f90a Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 26 Feb 2013 20:03:28 +0530 Subject: usb: dwc3: omap: remove platform data associated with dwc3-omap omap5 is not going to have support for non-dt boot making the platform data associated with dwc3 useless. Removed it here. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 24 ++++++++++-------------- include/linux/platform_data/dwc3-omap.h | 4 ---- 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index e1206b419932..43a248219aae 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -309,7 +309,6 @@ static int dwc3_omap_remove_core(struct device *dev, void *c) static int dwc3_omap_probe(struct platform_device *pdev) { - struct dwc3_omap_data *pdata = pdev->dev.platform_data; struct device_node *node = pdev->dev.of_node; struct dwc3_omap *omap; @@ -326,6 +325,11 @@ static int dwc3_omap_probe(struct platform_device *pdev) void __iomem *base; void *context; + if (!node) { + dev_err(dev, "device node not found\n"); + return -EINVAL; + } + omap = devm_kzalloc(dev, sizeof(*omap), GFP_KERNEL); if (!omap) { dev_err(dev, "not enough memory\n"); @@ -387,12 +391,7 @@ static int dwc3_omap_probe(struct platform_device *pdev) reg = dwc3_omap_readl(omap->base, USBOTGSS_UTMI_OTG_STATUS); - if (node) - of_property_read_u32(node, "utmi-mode", &utmi_mode); - else if (pdata) - utmi_mode = pdata->utmi_mode; - else - dev_dbg(dev, "missing platform data\n"); + of_property_read_u32(node, "utmi-mode", &utmi_mode); switch (utmi_mode) { case DWC3_OMAP_UTMI_MODE_SW: @@ -435,13 +434,10 @@ static int dwc3_omap_probe(struct platform_device *pdev) dwc3_omap_writel(omap->base, USBOTGSS_IRQENABLE_SET_1, reg); - if (node) { - ret = of_platform_populate(node, NULL, NULL, dev); - if (ret) { - dev_err(&pdev->dev, - "failed to add create dwc3 core\n"); - return ret; - } + ret = of_platform_populate(node, NULL, NULL, dev); + if (ret) { + dev_err(&pdev->dev, "failed to create dwc3 core\n"); + return ret; } return 0; diff --git a/include/linux/platform_data/dwc3-omap.h b/include/linux/platform_data/dwc3-omap.h index ada401244e0b..1d36ca874cc8 100644 --- a/include/linux/platform_data/dwc3-omap.h +++ b/include/linux/platform_data/dwc3-omap.h @@ -41,7 +41,3 @@ enum dwc3_omap_utmi_mode { DWC3_OMAP_UTMI_MODE_HW, DWC3_OMAP_UTMI_MODE_SW, }; - -struct dwc3_omap_data { - enum dwc3_omap_utmi_mode utmi_mode; -}; -- cgit From f07bd56bbdaa2340ebf46af9a37e7b2d1b4578e3 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 24 Jan 2013 14:52:24 +0200 Subject: usb: gadget: udc-core: allow udc class register gadget device Currently all UDC drivers are calling device_register() before calling usb_add_gadget_udc(). In order to avoid code duplication, we can allow udc-core.c register that device. However that would become a really large patch, so to cope with the meanwhile and allow us to write bite-sized patches, we're adding a flag which will be set by UDC driver once it removes the code for registering the gadget device. Once all are converted, the new flag will be removed. Reviewed-by: Tomasz Figa Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc-core.c | 23 +++++++++++++++++++---- include/linux/usb/gadget.h | 4 ++++ 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index 2a9cd369f71c..919505426ec1 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -173,6 +173,14 @@ int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) if (!udc) goto err1; + if (gadget->register_my_device) { + dev_set_name(&gadget->dev, "gadget"); + + ret = device_register(&gadget->dev); + if (ret) + goto err2; + } + device_initialize(&udc->dev); udc->dev.release = usb_udc_release; udc->dev.class = udc_class; @@ -180,7 +188,7 @@ int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) udc->dev.parent = parent; ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj)); if (ret) - goto err2; + goto err3; udc->gadget = gadget; @@ -189,18 +197,22 @@ int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) ret = device_add(&udc->dev); if (ret) - goto err3; + goto err4; mutex_unlock(&udc_lock); return 0; -err3: + +err4: list_del(&udc->list); mutex_unlock(&udc_lock); -err2: +err3: put_device(&udc->dev); +err2: + if (gadget->register_my_device) + put_device(&gadget->dev); err1: return ret; } @@ -254,6 +266,9 @@ found: kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE); device_unregister(&udc->dev); + + if (gadget->register_my_device) + device_unregister(&gadget->dev); } EXPORT_SYMBOL_GPL(usb_del_gadget_udc); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 2e297e80d59a..fcd9ef8d3f70 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -494,6 +494,9 @@ struct usb_gadget_ops { * only supports HNP on a different root port. * @b_hnp_enable: OTG device feature flag, indicating that the A-Host * enabled HNP support. + * @register_my_device: Flag telling udc-core that UDC driver didn't + * register the gadget device to the driver model. Temporary until + * all UDC drivers are fixed up properly. * @name: Identifies the controller hardware type. Used in diagnostics * and sometimes configuration. * @dev: Driver model state for this abstract device. @@ -531,6 +534,7 @@ struct usb_gadget { unsigned b_hnp_enable:1; unsigned a_hnp_support:1; unsigned a_alt_hnp_support:1; + unsigned register_my_device:1; const char *name; struct device dev; unsigned out_epnum; -- cgit From 7bce401cc6db5508ef2517e45bd8caf7ce0a15ee Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 24 Jan 2013 17:41:00 +0200 Subject: usb: gadget: drop now unnecessary flag We don't need the ->register_my_device flag anymore because all UDC drivers have been properly converted. Let's remove every history of it. Signed-off-by: Felipe Balbi --- drivers/usb/chipidea/udc.c | 1 - drivers/usb/dwc3/gadget.c | 1 - drivers/usb/gadget/amd5536udc.c | 1 - drivers/usb/gadget/at91_udc.c | 1 - drivers/usb/gadget/atmel_usba_udc.c | 1 - drivers/usb/gadget/bcm63xx_udc.c | 1 - drivers/usb/gadget/dummy_hcd.c | 1 - drivers/usb/gadget/fsl_qe_udc.c | 1 - drivers/usb/gadget/fsl_udc_core.c | 1 - drivers/usb/gadget/fusb300_udc.c | 1 - drivers/usb/gadget/goku_udc.c | 1 - drivers/usb/gadget/imx_udc.c | 1 - drivers/usb/gadget/lpc32xx_udc.c | 1 - drivers/usb/gadget/m66592-udc.c | 1 - drivers/usb/gadget/mv_u3d_core.c | 1 - drivers/usb/gadget/mv_udc_core.c | 1 - drivers/usb/gadget/net2272.c | 1 - drivers/usb/gadget/net2280.c | 1 - drivers/usb/gadget/omap_udc.c | 1 - drivers/usb/gadget/pch_udc.c | 1 - drivers/usb/gadget/pxa25x_udc.c | 1 - drivers/usb/gadget/pxa27x_udc.c | 1 - drivers/usb/gadget/r8a66597-udc.c | 1 - drivers/usb/gadget/s3c-hsotg.c | 1 - drivers/usb/gadget/s3c-hsudc.c | 1 - drivers/usb/gadget/s3c2410_udc.c | 1 - drivers/usb/gadget/udc-core.c | 18 +++++++----------- drivers/usb/musb/musb_gadget.c | 1 - drivers/usb/renesas_usbhs/mod_gadget.c | 1 - include/linux/usb/gadget.h | 4 ---- 30 files changed, 7 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index e95e8bbde988..1b65ac8f3c9b 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1721,7 +1721,6 @@ static int udc_start(struct ci13xxx *ci) ci->gadget.dev.coherent_dma_mask = dev->coherent_dma_mask; ci->gadget.dev.parent = dev; ci->gadget.dev.release = udc_release; - ci->gadget.register_my_device = true; /* alloc resources */ ci->qh_pool = dma_pool_create("ci13xxx_qh", dev, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 10bb161eec88..65493b6cd5a6 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2499,7 +2499,6 @@ int dwc3_gadget_init(struct dwc3 *dwc) dwc->gadget.dev.dma_parms = dwc->dev->dma_parms; dwc->gadget.dev.dma_mask = dwc->dev->dma_mask; dwc->gadget.dev.release = dwc3_gadget_release; - dwc->gadget.register_my_device = true; dwc->gadget.name = "dwc3-gadget"; /* diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index eee01ea70f8c..eec4461fb45f 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -3275,7 +3275,6 @@ static int udc_probe(struct udc *dev) dev->gadget.dev.release = gadget_release; dev->gadget.name = name; dev->gadget.max_speed = USB_SPEED_HIGH; - dev->gadget.register_my_device = true; /* init registers, interrupts, ... */ startup_registers(dev); diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c index 47b7e58f8415..9936de9bbe50 100644 --- a/drivers/usb/gadget/at91_udc.c +++ b/drivers/usb/gadget/at91_udc.c @@ -1726,7 +1726,6 @@ static int at91udc_probe(struct platform_device *pdev) /* init software state */ udc = &controller; - udc->gadget.register_my_device = true; udc->gadget.dev.parent = dev; if (pdev->dev.of_node) at91udc_of_init(udc, pdev->dev.of_node); diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c index 2404d0c25668..41518e612808 100644 --- a/drivers/usb/gadget/atmel_usba_udc.c +++ b/drivers/usb/gadget/atmel_usba_udc.c @@ -1902,7 +1902,6 @@ static int __init usba_udc_probe(struct platform_device *pdev) udc->gadget.dev.parent = &pdev->dev; udc->gadget.dev.dma_mask = pdev->dev.dma_mask; - udc->gadget.register_my_device = true; platform_set_drvdata(pdev, udc); diff --git a/drivers/usb/gadget/bcm63xx_udc.c b/drivers/usb/gadget/bcm63xx_udc.c index c020b877219d..d4f73e1b37e6 100644 --- a/drivers/usb/gadget/bcm63xx_udc.c +++ b/drivers/usb/gadget/bcm63xx_udc.c @@ -2374,7 +2374,6 @@ static int bcm63xx_udc_probe(struct platform_device *pdev) udc->gadget.dev.parent = dev; udc->gadget.dev.release = bcm63xx_udc_gadget_release; udc->gadget.dev.dma_mask = dev->dma_mask; - udc->gadget.register_my_device = true; if (!pd->use_fullspeed && !use_fullspeed) udc->gadget.max_speed = USB_SPEED_HIGH; diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c index a6950aa8f3be..c4f27d5a2b9c 100644 --- a/drivers/usb/gadget/dummy_hcd.c +++ b/drivers/usb/gadget/dummy_hcd.c @@ -983,7 +983,6 @@ static int dummy_udc_probe(struct platform_device *pdev) dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; dum->gadget.max_speed = USB_SPEED_SUPER; - dum->gadget.register_my_device = true; dum->gadget.dev.parent = &pdev->dev; dum->gadget.dev.release = dummy_gadget_release; diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 0f78cd859d68..0e7531bd33f4 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -2525,7 +2525,6 @@ static int qe_udc_probe(struct platform_device *ofdev) udc->gadget.name = driver_name; udc->gadget.dev.release = qe_udc_release; udc->gadget.dev.parent = &ofdev->dev; - udc->gadget.register_my_device = true; /* initialize qe_ep struct */ for (i = 0; i < USB_MAX_ENDPOINTS ; i++) { diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 9140a2daad87..f33b9005eeac 100644 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -2524,7 +2524,6 @@ static int __init fsl_udc_probe(struct platform_device *pdev) udc_controller->gadget.dev.release = fsl_udc_release; udc_controller->gadget.dev.parent = &pdev->dev; udc_controller->gadget.dev.of_node = pdev->dev.of_node; - udc_controller->gadget.register_my_device = true; if (!IS_ERR_OR_NULL(udc_controller->transceiver)) udc_controller->gadget.is_otg = 1; diff --git a/drivers/usb/gadget/fusb300_udc.c b/drivers/usb/gadget/fusb300_udc.c index d29017218b01..2d3c8b351f42 100644 --- a/drivers/usb/gadget/fusb300_udc.c +++ b/drivers/usb/gadget/fusb300_udc.c @@ -1427,7 +1427,6 @@ static int __init fusb300_probe(struct platform_device *pdev) fusb300->gadget.dev.dma_mask = pdev->dev.dma_mask; fusb300->gadget.dev.release = pdev->dev.release; fusb300->gadget.name = udc_name; - fusb300->gadget.register_my_device = true; fusb300->reg = reg; ret = request_irq(ires->start, fusb300_irq, IRQF_SHARED, diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index b4ea2cf465a6..8a6c66618bd3 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -1758,7 +1758,6 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev->gadget.dev.dma_mask = pdev->dev.dma_mask; dev->gadget.dev.release = gadget_release; dev->gadget.name = driver_name; - dev->gadget.register_my_device = true; /* now all the pci goodies ... */ retval = pci_enable_device(pdev); diff --git a/drivers/usb/gadget/imx_udc.c b/drivers/usb/gadget/imx_udc.c index 435b20346ead..9c5b7451a7d1 100644 --- a/drivers/usb/gadget/imx_udc.c +++ b/drivers/usb/gadget/imx_udc.c @@ -1461,7 +1461,6 @@ static int __init imx_udc_probe(struct platform_device *pdev) imx_usb->clk = clk; imx_usb->dev = &pdev->dev; - imx_usb->gadget.register_my_device = true; imx_usb->gadget.dev.parent = &pdev->dev; imx_usb->gadget.dev.dma_mask = pdev->dev.dma_mask; diff --git a/drivers/usb/gadget/lpc32xx_udc.c b/drivers/usb/gadget/lpc32xx_udc.c index 329e1c5f0ef9..67c3ef9d9bed 100644 --- a/drivers/usb/gadget/lpc32xx_udc.c +++ b/drivers/usb/gadget/lpc32xx_udc.c @@ -3090,7 +3090,6 @@ static int __init lpc32xx_udc_probe(struct platform_device *pdev) /* init software state */ udc->gadget.dev.parent = dev; - udc->gadget.register_my_device = true; udc->pdev = pdev; udc->dev = &pdev->dev; udc->enabled = 0; diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c index 43ad70dff74d..eb61d0b54f21 100644 --- a/drivers/usb/gadget/m66592-udc.c +++ b/drivers/usb/gadget/m66592-udc.c @@ -1612,7 +1612,6 @@ static int __init m66592_probe(struct platform_device *pdev) m66592->gadget.dev.dma_mask = pdev->dev.dma_mask; m66592->gadget.dev.release = pdev->dev.release; m66592->gadget.name = udc_name; - m66592->gadget.register_my_device = true; init_timer(&m66592->timer); m66592->timer.function = m66592_timer; diff --git a/drivers/usb/gadget/mv_u3d_core.c b/drivers/usb/gadget/mv_u3d_core.c index 734ade11505f..e5735fc610de 100644 --- a/drivers/usb/gadget/mv_u3d_core.c +++ b/drivers/usb/gadget/mv_u3d_core.c @@ -1959,7 +1959,6 @@ static int mv_u3d_probe(struct platform_device *dev) u3d->gadget.dev.dma_mask = dev->dev.dma_mask; u3d->gadget.dev.release = mv_u3d_gadget_release; u3d->gadget.name = driver_name; /* gadget name */ - u3d->gadget.register_my_device = true; mv_u3d_eps_init(u3d); diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c index a7afdfb413b3..be35573f8703 100644 --- a/drivers/usb/gadget/mv_udc_core.c +++ b/drivers/usb/gadget/mv_udc_core.c @@ -2313,7 +2313,6 @@ static int mv_udc_probe(struct platform_device *pdev) udc->gadget.dev.dma_mask = pdev->dev.dma_mask; udc->gadget.dev.release = gadget_release; udc->gadget.name = driver_name; /* gadget name */ - udc->gadget.register_my_device = true; eps_init(udc); diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c index 635248f42dcd..78c8bb538332 100644 --- a/drivers/usb/gadget/net2272.c +++ b/drivers/usb/gadget/net2272.c @@ -2239,7 +2239,6 @@ static struct net2272 *net2272_probe_init(struct device *dev, unsigned int irq) ret->gadget.dev.dma_mask = dev->dma_mask; ret->gadget.dev.release = net2272_gadget_release; ret->gadget.name = driver_name; - ret->gadget.register_my_device = true; return ret; } diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index c55af4293509..2089d9b0058c 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -2714,7 +2714,6 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id) dev->gadget.dev.dma_mask = pdev->dev.dma_mask; dev->gadget.dev.release = gadget_release; dev->gadget.name = driver_name; - dev->gadget.register_my_device = true; /* now all the pci goodies ... */ if (pci_enable_device (pdev) < 0) { diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index c979272e7c86..b23c861e2a97 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -2634,7 +2634,6 @@ omap_udc_setup(struct platform_device *odev, struct usb_phy *xceiv) udc->gadget.dev.release = omap_udc_release; udc->gadget.dev.parent = &odev->dev; - udc->gadget.register_my_device = true; if (use_dma) udc->gadget.dev.dma_mask = odev->dev.dma_mask; diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c index 703214543dd4..e8c9afd8fbf0 100644 --- a/drivers/usb/gadget/pch_udc.c +++ b/drivers/usb/gadget/pch_udc.c @@ -3198,7 +3198,6 @@ static int pch_udc_probe(struct pci_dev *pdev, dev->gadget.dev.release = gadget_release; dev->gadget.name = KBUILD_MODNAME; dev->gadget.max_speed = USB_SPEED_HIGH; - dev->gadget.register_my_device = true; /* Put the device in disconnected state till a driver is bound */ pch_udc_set_disconnect(dev); diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c index 8996fcb053ef..e29bb878b2d7 100644 --- a/drivers/usb/gadget/pxa25x_udc.c +++ b/drivers/usb/gadget/pxa25x_udc.c @@ -2140,7 +2140,6 @@ static int __init pxa25x_udc_probe(struct platform_device *pdev) dev->gadget.dev.parent = &pdev->dev; dev->gadget.dev.dma_mask = pdev->dev.dma_mask; - dev->gadget.register_my_device = true; the_controller = dev; platform_set_drvdata(pdev, dev); diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c index 1c5bfaafa6c8..07ce1477f911 100644 --- a/drivers/usb/gadget/pxa27x_udc.c +++ b/drivers/usb/gadget/pxa27x_udc.c @@ -2457,7 +2457,6 @@ static int __init pxa_udc_probe(struct platform_device *pdev) udc->gadget.dev.parent = &pdev->dev; udc->gadget.dev.dma_mask = NULL; - udc->gadget.register_my_device = true; udc->vbus_sensed = 0; the_controller = udc; diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c index ae94c0eaf633..a67d47708b98 100644 --- a/drivers/usb/gadget/r8a66597-udc.c +++ b/drivers/usb/gadget/r8a66597-udc.c @@ -1919,7 +1919,6 @@ static int __init r8a66597_probe(struct platform_device *pdev) r8a66597->gadget.dev.dma_mask = pdev->dev.dma_mask; r8a66597->gadget.dev.release = pdev->dev.release; r8a66597->gadget.name = udc_name; - r8a66597->gadget.register_my_device = true; init_timer(&r8a66597->timer); r8a66597->timer.function = r8a66597_timer; diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c index 5fbd233eb6a0..8ae0bd99ffde 100644 --- a/drivers/usb/gadget/s3c-hsotg.c +++ b/drivers/usb/gadget/s3c-hsotg.c @@ -3573,7 +3573,6 @@ static int s3c_hsotg_probe(struct platform_device *pdev) hsotg->gadget.dev.parent = dev; hsotg->gadget.dev.dma_mask = dev->dma_mask; hsotg->gadget.dev.release = s3c_hsotg_release; - hsotg->gadget.register_my_device = true; /* reset the system */ diff --git a/drivers/usb/gadget/s3c-hsudc.c b/drivers/usb/gadget/s3c-hsudc.c index c4ff747f53fc..7fc3de537c9a 100644 --- a/drivers/usb/gadget/s3c-hsudc.c +++ b/drivers/usb/gadget/s3c-hsudc.c @@ -1312,7 +1312,6 @@ static int s3c_hsudc_probe(struct platform_device *pdev) hsudc->gadget.is_otg = 0; hsudc->gadget.is_a_peripheral = 0; hsudc->gadget.speed = USB_SPEED_UNKNOWN; - hsudc->gadget.register_my_device = true; s3c_hsudc_setup_ep(hsudc); diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index c4134948dd9e..a669081bbb88 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -1826,7 +1826,6 @@ static int s3c2410_udc_probe(struct platform_device *pdev) udc->gadget.dev.parent = &pdev->dev; udc->gadget.dev.dma_mask = pdev->dev.dma_mask; - udc->gadget.register_my_device = true; the_controller = udc; platform_set_drvdata(pdev, udc); diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index 919505426ec1..40b1d888d5a1 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -173,13 +173,11 @@ int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) if (!udc) goto err1; - if (gadget->register_my_device) { - dev_set_name(&gadget->dev, "gadget"); + dev_set_name(&gadget->dev, "gadget"); - ret = device_register(&gadget->dev); - if (ret) - goto err2; - } + ret = device_register(&gadget->dev); + if (ret) + goto err2; device_initialize(&udc->dev); udc->dev.release = usb_udc_release; @@ -211,8 +209,8 @@ err3: put_device(&udc->dev); err2: - if (gadget->register_my_device) - put_device(&gadget->dev); + put_device(&gadget->dev); + err1: return ret; } @@ -266,9 +264,7 @@ found: kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE); device_unregister(&udc->dev); - - if (gadget->register_my_device) - device_unregister(&gadget->dev); + device_unregister(&gadget->dev); } EXPORT_SYMBOL_GPL(usb_del_gadget_udc); diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index cadb750921e9..e363033f6754 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1891,7 +1891,6 @@ int musb_gadget_setup(struct musb *musb) musb->g.dev.dma_mask = musb->controller->dma_mask; musb->g.dev.release = musb_gadget_release; musb->g.name = musb_driver_name; - musb->g.register_my_device = true; musb->g.is_otg = 1; musb_g_init_endpoints(musb); diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 5d5fab0ad0d1..6a3afa9b764c 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -981,7 +981,6 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) gpriv->gadget.name = "renesas_usbhs_udc"; gpriv->gadget.ops = &usbhsg_gadget_ops; gpriv->gadget.max_speed = USB_SPEED_HIGH; - gpriv->gadget.register_my_device = true; INIT_LIST_HEAD(&gpriv->gadget.ep_list); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index fcd9ef8d3f70..2e297e80d59a 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -494,9 +494,6 @@ struct usb_gadget_ops { * only supports HNP on a different root port. * @b_hnp_enable: OTG device feature flag, indicating that the A-Host * enabled HNP support. - * @register_my_device: Flag telling udc-core that UDC driver didn't - * register the gadget device to the driver model. Temporary until - * all UDC drivers are fixed up properly. * @name: Identifies the controller hardware type. Used in diagnostics * and sometimes configuration. * @dev: Driver model state for this abstract device. @@ -534,7 +531,6 @@ struct usb_gadget { unsigned b_hnp_enable:1; unsigned a_hnp_support:1; unsigned a_alt_hnp_support:1; - unsigned register_my_device:1; const char *name; struct device dev; unsigned out_epnum; -- cgit From d1e3d757f7aa91f15db347fc05ffd7ef7f413091 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 24 Jan 2013 22:29:48 +0200 Subject: usb: common: introduce usb_state_string() this function will receive enum usb_device_state and return a human-readable string from it or, case an unknown value is passed as argument, the string "UNKNOWN". Signed-off-by: Felipe Balbi --- drivers/usb/usb-common.c | 21 +++++++++++++++++++++ include/linux/usb/ch9.h | 9 +++++++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/usb-common.c b/drivers/usb/usb-common.c index d29503e954ab..070b681e5d17 100644 --- a/drivers/usb/usb-common.c +++ b/drivers/usb/usb-common.c @@ -32,4 +32,25 @@ const char *usb_speed_string(enum usb_device_speed speed) } EXPORT_SYMBOL_GPL(usb_speed_string); +const char *usb_state_string(enum usb_device_state state) +{ + static const char *const names[] = { + [USB_STATE_NOTATTACHED] = "not attached", + [USB_STATE_ATTACHED] = "attached", + [USB_STATE_POWERED] = "powered", + [USB_STATE_RECONNECTING] = "reconnecting", + [USB_STATE_UNAUTHENTICATED] = "unauthenticated", + [USB_STATE_DEFAULT] = "default", + [USB_STATE_ADDRESS] = "addresssed", + [USB_STATE_CONFIGURED] = "configured", + [USB_STATE_SUSPENDED] = "suspended", + }; + + if (state < 0 || state >= ARRAY_SIZE(names)) + return "UNKNOWN"; + + return names[state]; +} +EXPORT_SYMBOL_GPL(usb_state_string); + MODULE_LICENSE("GPL"); diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 9c210f2283df..27603bcbb9b9 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -43,4 +43,13 @@ */ extern const char *usb_speed_string(enum usb_device_speed speed); + +/** + * usb_state_string - Returns human readable name for the state. + * @state: The state to return a human-readable name for. If it's not + * any of the states devices in usb_device_state_string enum, + * the string UNKNOWN will be returned. + */ +extern const char *usb_state_string(enum usb_device_state state); + #endif /* __LINUX_USB_CH9_H */ -- cgit From 49401f4169c0e5a1b38f1a676d6f12eecaf77485 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 19 Dec 2011 12:57:04 +0200 Subject: usb: gadget: introduce gadget state tracking that's useful information to expose to userland. Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc-core.c | 23 +++++++++++++++++++++++ include/linux/usb/gadget.h | 9 +++++++++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index 40b1d888d5a1..8a1eeb24ae6a 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -101,6 +101,16 @@ EXPORT_SYMBOL_GPL(usb_gadget_unmap_request); /* ------------------------------------------------------------------------- */ +void usb_gadget_set_state(struct usb_gadget *gadget, + enum usb_device_state state) +{ + gadget->state = state; + sysfs_notify(&gadget->dev.kobj, NULL, "status"); +} +EXPORT_SYMBOL_GPL(usb_gadget_set_state); + +/* ------------------------------------------------------------------------- */ + /** * usb_gadget_udc_start - tells usb device controller to start up * @gadget: The gadget we want to get started @@ -197,6 +207,8 @@ int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) if (ret) goto err4; + usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED); + mutex_unlock(&udc_lock); return 0; @@ -406,6 +418,16 @@ static ssize_t usb_udc_softconn_store(struct device *dev, } static DEVICE_ATTR(soft_connect, S_IWUSR, NULL, usb_udc_softconn_store); +static ssize_t usb_gadget_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct usb_udc *udc = container_of(dev, struct usb_udc, dev); + struct usb_gadget *gadget = udc->gadget; + + return sprintf(buf, "%s\n", usb_state_string(gadget->state)); +} +static DEVICE_ATTR(state, S_IRUGO, usb_gadget_state_show, NULL); + #define USB_UDC_SPEED_ATTR(name, param) \ ssize_t usb_udc_##param##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -439,6 +461,7 @@ static USB_UDC_ATTR(a_alt_hnp_support); static struct attribute *usb_udc_attrs[] = { &dev_attr_srp.attr, &dev_attr_soft_connect.attr, + &dev_attr_state.attr, &dev_attr_current_speed.attr, &dev_attr_maximum_speed.attr, diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 2e297e80d59a..32b734d88d6b 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -482,6 +482,7 @@ struct usb_gadget_ops { * @speed: Speed of current connection to USB host. * @max_speed: Maximal speed the UDC can handle. UDC must support this * and all slower speeds. + * @state: the state we are now (attached, suspended, configured, etc) * @sg_supported: true if we can handle scatter-gather * @is_otg: True if the USB device port uses a Mini-AB jack, so that the * gadget driver must provide a USB OTG descriptor. @@ -525,6 +526,7 @@ struct usb_gadget { struct list_head ep_list; /* of usb_ep */ enum usb_device_speed speed; enum usb_device_speed max_speed; + enum usb_device_state state; unsigned sg_supported:1; unsigned is_otg:1; unsigned is_a_peripheral:1; @@ -959,6 +961,13 @@ extern void usb_gadget_unmap_request(struct usb_gadget *gadget, /*-------------------------------------------------------------------------*/ +/* utility to set gadget state properly */ + +extern void usb_gadget_set_state(struct usb_gadget *gadget, + enum usb_device_state state); + +/*-------------------------------------------------------------------------*/ + /* utility wrapping a simple endpoint selection policy */ extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *, -- cgit From 792bfcf7a1cd7913fa5d55f2b3a40e3275e98f6f Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 26 Feb 2013 14:47:44 +0200 Subject: usb: gadget: udc-core: introduce usb_add_gadget_udc_release() not all UDC drivers need a proper release function, for those which don't need it, we udc-core will provide a no-op release method so we can remove "redefinition" of such methods in almost every UDC driver. Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc-core.c | 39 ++++++++++++++++++++++++++++++++++----- include/linux/usb/gadget.h | 2 ++ 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index 2423d024654f..a50811e35bdb 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -166,15 +166,23 @@ static void usb_udc_release(struct device *dev) } static const struct attribute_group *usb_udc_attr_groups[]; + +static void usb_udc_nop_release(struct device *dev) +{ + dev_vdbg(dev, "%s\n", __func__); +} + /** - * usb_add_gadget_udc - adds a new gadget to the udc class driver list - * @parent: the parent device to this udc. Usually the controller - * driver's device. - * @gadget: the gadget to be added to the list + * usb_add_gadget_udc_release - adds a new gadget to the udc class driver list + * @parent: the parent device to this udc. Usually the controller driver's + * device. + * @gadget: the gadget to be added to the list. + * @release: a gadget release function. * * Returns zero on success, negative errno otherwise. */ -int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) +int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, + void (*release)(struct device *dev)) { struct usb_udc *udc; int ret = -ENOMEM; @@ -190,6 +198,13 @@ int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) gadget->dev.dma_parms = parent->dma_parms; gadget->dev.dma_mask = parent->dma_mask; + if (release) { + gadget->dev.release = release; + } else { + if (!gadget->dev.release) + gadget->dev.release = usb_udc_nop_release; + } + ret = device_register(&gadget->dev); if (ret) goto err2; @@ -231,6 +246,20 @@ err2: err1: return ret; } +EXPORT_SYMBOL_GPL(usb_add_gadget_udc_release); + +/** + * usb_add_gadget_udc - adds a new gadget to the udc class driver list + * @parent: the parent device to this udc. Usually the controller + * driver's device. + * @gadget: the gadget to be added to the list + * + * Returns zero on success, negative errno otherwise. + */ +int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget) +{ + return usb_add_gadget_udc_release(parent, gadget, NULL); +} EXPORT_SYMBOL_GPL(usb_add_gadget_udc); static void usb_gadget_remove_driver(struct usb_udc *udc) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 32b734d88d6b..c454a88abf2e 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -874,6 +874,8 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver); */ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver); +extern int usb_add_gadget_udc_release(struct device *parent, + struct usb_gadget *gadget, void (*release)(struct device *dev)); extern int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget); extern void usb_del_gadget_udc(struct usb_gadget *gadget); extern int udc_attach_driver(const char *name, -- cgit From 42c0bf1ce7c067bbc3e77d5626f102a16bc4fb6b Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 7 Mar 2013 10:39:57 +0200 Subject: usb: otg: prefix otg_state_string with usb_ all other functions under drivers/usb/ start with usb_, let's do the same thing. This patch is in preparation for moving otg_state_string to usb-common.c and deleting otg.c completely. Signed-off-by: Felipe Balbi --- drivers/usb/musb/am35x.c | 8 ++++---- drivers/usb/musb/blackfin.c | 6 +++--- drivers/usb/musb/da8xx.c | 8 ++++---- drivers/usb/musb/davinci.c | 4 ++-- drivers/usb/musb/musb_core.c | 39 ++++++++++++++++++++------------------- drivers/usb/musb/musb_dsps.c | 8 ++++---- drivers/usb/musb/musb_gadget.c | 8 ++++---- drivers/usb/musb/musb_host.c | 2 +- drivers/usb/musb/musb_virthub.c | 4 ++-- drivers/usb/musb/omap2430.c | 6 +++--- drivers/usb/musb/tusb6010.c | 14 +++++++------- drivers/usb/otg/fsl_otg.c | 2 +- drivers/usb/otg/isp1301_omap.c | 6 +++--- drivers/usb/otg/otg.c | 4 ++-- drivers/usb/otg/otg_fsm.c | 2 +- include/linux/usb/otg.h | 4 ++-- 16 files changed, 63 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c index 59eea219034a..2231850c0625 100644 --- a/drivers/usb/musb/am35x.c +++ b/drivers/usb/musb/am35x.c @@ -149,7 +149,7 @@ static void otg_timer(unsigned long _musb) */ devctl = musb_readb(mregs, MUSB_DEVCTL); dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl, - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); spin_lock_irqsave(&musb->lock, flags); switch (musb->xceiv->state) { @@ -195,7 +195,7 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout) if (musb->is_active || (musb->a_wait_bcon == 0 && musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) { dev_dbg(musb->controller, "%s active, deleting timer\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); del_timer(&otg_workaround); last_timer = jiffies; return; @@ -208,7 +208,7 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout) last_timer = timeout; dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), jiffies_to_msecs(timeout - jiffies)); mod_timer(&otg_workaround, timeout); } @@ -298,7 +298,7 @@ static irqreturn_t am35x_musb_interrupt(int irq, void *hci) /* NOTE: this must complete power-on within 100 ms. */ dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n", drvvbus ? "on" : "off", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), err ? " ERROR" : "", devctl); ret = IRQ_HANDLED; diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c index dbb31b30c7fa..5e63b160db0c 100644 --- a/drivers/usb/musb/blackfin.c +++ b/drivers/usb/musb/blackfin.c @@ -280,13 +280,13 @@ static void musb_conn_timer_handler(unsigned long _musb) break; default: dev_dbg(musb->controller, "%s state not handled\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); break; } spin_unlock_irqrestore(&musb->lock, flags); dev_dbg(musb->controller, "state is %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } static void bfin_musb_enable(struct musb *musb) @@ -307,7 +307,7 @@ static void bfin_musb_set_vbus(struct musb *musb, int is_on) dev_dbg(musb->controller, "VBUS %s, devctl %02x " /* otg %3x conf %08x prcm %08x */ "\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), musb_readb(musb->mregs, MUSB_DEVCTL)); } diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 7c71769d71ff..ea7e591093ee 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -198,7 +198,7 @@ static void otg_timer(unsigned long _musb) */ devctl = musb_readb(mregs, MUSB_DEVCTL); dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl, - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); spin_lock_irqsave(&musb->lock, flags); switch (musb->xceiv->state) { @@ -267,7 +267,7 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout) if (musb->is_active || (musb->a_wait_bcon == 0 && musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) { dev_dbg(musb->controller, "%s active, deleting timer\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); del_timer(&otg_workaround); last_timer = jiffies; return; @@ -280,7 +280,7 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout) last_timer = timeout; dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), jiffies_to_msecs(timeout - jiffies)); mod_timer(&otg_workaround, timeout); } @@ -360,7 +360,7 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci) dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n", drvvbus ? "on" : "off", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), err ? " ERROR" : "", devctl); ret = IRQ_HANDLED; diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c index e040d9103735..bea6cc35471c 100644 --- a/drivers/usb/musb/davinci.c +++ b/drivers/usb/musb/davinci.c @@ -215,7 +215,7 @@ static void otg_timer(unsigned long _musb) */ devctl = musb_readb(mregs, MUSB_DEVCTL); dev_dbg(musb->controller, "poll devctl %02x (%s)\n", devctl, - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); spin_lock_irqsave(&musb->lock, flags); switch (musb->xceiv->state) { @@ -349,7 +349,7 @@ static irqreturn_t davinci_musb_interrupt(int irq, void *__hci) davinci_musb_source_power(musb, drvvbus, 0); dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n", drvvbus ? "on" : "off", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), err ? " ERROR" : "", devctl); retval = IRQ_HANDLED; diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index fad8571ed433..6bd879257e4c 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -372,13 +372,13 @@ static void musb_otg_timer_func(unsigned long data) case OTG_STATE_A_SUSPEND: case OTG_STATE_A_WAIT_BCON: dev_dbg(musb->controller, "HNP: %s timeout\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); musb_platform_set_vbus(musb, 0); musb->xceiv->state = OTG_STATE_A_WAIT_VFALL; break; default: dev_dbg(musb->controller, "HNP: Unhandled mode %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } musb->ignore_disconnect = 0; spin_unlock_irqrestore(&musb->lock, flags); @@ -393,13 +393,14 @@ void musb_hnp_stop(struct musb *musb) void __iomem *mbase = musb->mregs; u8 reg; - dev_dbg(musb->controller, "HNP: stop from %s\n", otg_state_string(musb->xceiv->state)); + dev_dbg(musb->controller, "HNP: stop from %s\n", + usb_otg_state_string(musb->xceiv->state)); switch (musb->xceiv->state) { case OTG_STATE_A_PERIPHERAL: musb_g_disconnect(musb); dev_dbg(musb->controller, "HNP: back to %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); break; case OTG_STATE_B_HOST: dev_dbg(musb->controller, "HNP: Disabling HR\n"); @@ -413,7 +414,7 @@ void musb_hnp_stop(struct musb *musb) break; default: dev_dbg(musb->controller, "HNP: Stopping in unknown state %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } /* @@ -451,7 +452,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, */ if (int_usb & MUSB_INTR_RESUME) { handled = IRQ_HANDLED; - dev_dbg(musb->controller, "RESUME (%s)\n", otg_state_string(musb->xceiv->state)); + dev_dbg(musb->controller, "RESUME (%s)\n", usb_otg_state_string(musb->xceiv->state)); if (devctl & MUSB_DEVCTL_HM) { void __iomem *mbase = musb->mregs; @@ -493,7 +494,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, default: WARNING("bogus %s RESUME (%s)\n", "host", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } } else { switch (musb->xceiv->state) { @@ -522,7 +523,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, default: WARNING("bogus %s RESUME (%s)\n", "peripheral", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } } } @@ -538,7 +539,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, } dev_dbg(musb->controller, "SESSION_REQUEST (%s)\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); /* IRQ arrives from ID pin sense or (later, if VBUS power * is removed) SRP. responses are time critical: @@ -603,7 +604,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, } dev_dbg(musb->controller, "VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), devctl, ({ char *s; switch (devctl & MUSB_DEVCTL_VBUS) { @@ -628,7 +629,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, if (int_usb & MUSB_INTR_SUSPEND) { dev_dbg(musb->controller, "SUSPEND (%s) devctl %02x\n", - otg_state_string(musb->xceiv->state), devctl); + usb_otg_state_string(musb->xceiv->state), devctl); handled = IRQ_HANDLED; switch (musb->xceiv->state) { @@ -745,12 +746,12 @@ b_host: usb_hcd_resume_root_hub(hcd); dev_dbg(musb->controller, "CONNECT (%s) devctl %02x\n", - otg_state_string(musb->xceiv->state), devctl); + usb_otg_state_string(musb->xceiv->state), devctl); } if ((int_usb & MUSB_INTR_DISCONNECT) && !musb->ignore_disconnect) { dev_dbg(musb->controller, "DISCONNECT (%s) as %s, devctl %02x\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), MUSB_MODE(musb), devctl); handled = IRQ_HANDLED; @@ -787,7 +788,7 @@ b_host: break; default: WARNING("unhandled DISCONNECT transition (%s)\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); break; } } @@ -813,7 +814,7 @@ b_host: } } else { dev_dbg(musb->controller, "BUS RESET as %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); switch (musb->xceiv->state) { case OTG_STATE_A_SUSPEND: /* We need to ignore disconnect on suspend @@ -826,7 +827,7 @@ b_host: case OTG_STATE_A_WAIT_BCON: /* OPT TD.4.7-900ms */ /* never use invalid T(a_wait_bcon) */ dev_dbg(musb->controller, "HNP: in %s, %d msec timeout\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), TA_WAIT_BCON(musb)); mod_timer(&musb->otg_timer, jiffies + msecs_to_jiffies(TA_WAIT_BCON(musb))); @@ -838,7 +839,7 @@ b_host: break; case OTG_STATE_B_WAIT_ACON: dev_dbg(musb->controller, "HNP: RESET (%s), to b_peripheral\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); musb->xceiv->state = OTG_STATE_B_PERIPHERAL; musb_g_reset(musb); break; @@ -850,7 +851,7 @@ b_host: break; default: dev_dbg(musb->controller, "Unhandled BUS RESET as %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } } } @@ -1632,7 +1633,7 @@ musb_mode_show(struct device *dev, struct device_attribute *attr, char *buf) int ret = -EINVAL; spin_lock_irqsave(&musb->lock, flags); - ret = sprintf(buf, "%s\n", otg_state_string(musb->xceiv->state)); + ret = sprintf(buf, "%s\n", usb_otg_state_string(musb->xceiv->state)); spin_unlock_irqrestore(&musb->lock, flags); return ret; diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 4b4987461adb..1ea553d2b77f 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -225,7 +225,7 @@ static void otg_timer(unsigned long _musb) */ devctl = dsps_readb(mregs, MUSB_DEVCTL); dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl, - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); spin_lock_irqsave(&musb->lock, flags); switch (musb->xceiv->state) { @@ -274,7 +274,7 @@ static void dsps_musb_try_idle(struct musb *musb, unsigned long timeout) if (musb->is_active || (musb->a_wait_bcon == 0 && musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) { dev_dbg(musb->controller, "%s active, deleting timer\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); del_timer(&glue->timer[pdev->id]); glue->last_timer[pdev->id] = jiffies; return; @@ -289,7 +289,7 @@ static void dsps_musb_try_idle(struct musb *musb, unsigned long timeout) glue->last_timer[pdev->id] = timeout; dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), jiffies_to_msecs(timeout - jiffies)); mod_timer(&glue->timer[pdev->id], timeout); } @@ -378,7 +378,7 @@ static irqreturn_t dsps_interrupt(int irq, void *hci) /* NOTE: this must complete power-on within 100 ms. */ dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n", drvvbus ? "on" : "off", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), err ? " ERROR" : "", devctl); ret = IRQ_HANDLED; diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 6101ebf803fd..e8408883ab0d 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1571,7 +1571,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget) goto done; default: dev_dbg(musb->controller, "Unhandled wake: %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); goto done; } @@ -1970,7 +1970,7 @@ void musb_g_resume(struct musb *musb) break; default: WARNING("unhandled RESUME transition (%s)\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } } @@ -2000,7 +2000,7 @@ void musb_g_suspend(struct musb *musb) * A_PERIPHERAL may need care too */ WARNING("unhandled SUSPEND transition (%s)\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } } @@ -2034,7 +2034,7 @@ void musb_g_disconnect(struct musb *musb) switch (musb->xceiv->state) { default: dev_dbg(musb->controller, "Unhandled disconnect %s, setting a_idle\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); musb->xceiv->state = OTG_STATE_A_IDLE; MUSB_HST_MODE(musb); break; diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 1ce1fcf3f3e7..51e9e8a38444 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2453,7 +2453,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd) if (musb->is_active) { WARNING("trying to suspend as %s while active\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); return -EBUSY; } else return 0; diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index f70579154ded..ef7d11045f56 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -95,7 +95,7 @@ static void musb_port_suspend(struct musb *musb, bool do_suspend) break; default: dev_dbg(musb->controller, "bogus rh suspend? %s\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } } else if (power & MUSB_POWER_SUSPENDM) { power &= ~MUSB_POWER_SUSPENDM; @@ -203,7 +203,7 @@ void musb_root_disconnect(struct musb *musb) break; default: dev_dbg(musb->controller, "host disconnect (%s)\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } } diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 1a42a458f2c4..8ba9bb2a91a7 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -117,7 +117,7 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout) if (musb->is_active || ((musb->a_wait_bcon == 0) && (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) { dev_dbg(musb->controller, "%s active, deleting timer\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); del_timer(&musb_idle_timer); last_timer = jiffies; return; @@ -134,7 +134,7 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout) last_timer = timeout; dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), (unsigned long)jiffies_to_msecs(timeout - jiffies)); mod_timer(&musb_idle_timer, timeout); } @@ -200,7 +200,7 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on) dev_dbg(musb->controller, "VBUS %s, devctl %02x " /* otg %3x conf %08x prcm %08x */ "\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), musb_readb(musb->mregs, MUSB_DEVCTL)); } diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index 464bd23cccda..7369ba33c94f 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -423,7 +423,7 @@ static void musb_do_idle(unsigned long _musb) && (musb->idle_timeout == 0 || time_after(jiffies, musb->idle_timeout))) { dev_dbg(musb->controller, "Nothing connected %s, turning off VBUS\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); } /* FALLTHROUGH */ case OTG_STATE_A_IDLE: @@ -478,7 +478,7 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout) if (musb->is_active || ((musb->a_wait_bcon == 0) && (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) { dev_dbg(musb->controller, "%s active, deleting timer\n", - otg_state_string(musb->xceiv->state)); + usb_otg_state_string(musb->xceiv->state)); del_timer(&musb_idle_timer); last_timer = jiffies; return; @@ -495,7 +495,7 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout) last_timer = timeout; dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), (unsigned long)jiffies_to_msecs(timeout - jiffies)); mod_timer(&musb_idle_timer, timeout); } @@ -571,7 +571,7 @@ static void tusb_musb_set_vbus(struct musb *musb, int is_on) musb_writeb(musb->mregs, MUSB_DEVCTL, devctl); dev_dbg(musb->controller, "VBUS %s, devctl %02x otg %3x conf %08x prcm %08x\n", - otg_state_string(musb->xceiv->state), + usb_otg_state_string(musb->xceiv->state), musb_readb(musb->mregs, MUSB_DEVCTL), musb_readl(tbase, TUSB_DEV_OTG_STAT), conf, prcm); @@ -678,13 +678,13 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase) musb->is_active = 0; } dev_dbg(musb->controller, "vbus change, %s, otg %03x\n", - otg_state_string(musb->xceiv->state), otg_stat); + usb_otg_state_string(musb->xceiv->state), otg_stat); idle_timeout = jiffies + (1 * HZ); schedule_work(&musb->irq_work); } else /* A-dev state machine */ { dev_dbg(musb->controller, "vbus change, %s, otg %03x\n", - otg_state_string(musb->xceiv->state), otg_stat); + usb_otg_state_string(musb->xceiv->state), otg_stat); switch (musb->xceiv->state) { case OTG_STATE_A_IDLE: @@ -733,7 +733,7 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase) u8 devctl; dev_dbg(musb->controller, "%s timer, %03x\n", - otg_state_string(musb->xceiv->state), otg_stat); + usb_otg_state_string(musb->xceiv->state), otg_stat); switch (musb->xceiv->state) { case OTG_STATE_A_WAIT_VRISE: diff --git a/drivers/usb/otg/fsl_otg.c b/drivers/usb/otg/fsl_otg.c index 37e8e1578316..72a2a00c2487 100644 --- a/drivers/usb/otg/fsl_otg.c +++ b/drivers/usb/otg/fsl_otg.c @@ -992,7 +992,7 @@ static int show_fsl_usb2_otg_state(struct device *dev, /* State */ t = scnprintf(next, size, "OTG state: %s\n\n", - otg_state_string(fsl_otg_dev->phy.state)); + usb_otg_state_string(fsl_otg_dev->phy.state)); size -= t; next += t; diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c index af9cb11626b2..8fe0c3b95261 100644 --- a/drivers/usb/otg/isp1301_omap.c +++ b/drivers/usb/otg/isp1301_omap.c @@ -236,7 +236,7 @@ isp1301_clear_bits(struct isp1301 *isp, u8 reg, u8 bits) static inline const char *state_name(struct isp1301 *isp) { - return otg_state_string(isp->phy.state); + return usb_otg_state_string(isp->phy.state); } /*-------------------------------------------------------------------------*/ @@ -481,7 +481,7 @@ static void check_state(struct isp1301 *isp, const char *tag) if (isp->phy.state == state && !extra) return; pr_debug("otg: %s FSM %s/%02x, %s, %06x\n", tag, - otg_state_string(state), fsm, state_name(isp), + usb_otg_state_string(state), fsm, state_name(isp), omap_readl(OTG_CTRL)); } @@ -1077,7 +1077,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat) if (state != isp->phy.state) pr_debug(" isp, %s -> %s\n", - otg_state_string(state), state_name(isp)); + usb_otg_state_string(state), state_name(isp)); #ifdef CONFIG_USB_OTG /* update the OTG controller state to match the isp1301; may diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c index 358cfd9bce89..fd9a4b7bebe7 100644 --- a/drivers/usb/otg/otg.c +++ b/drivers/usb/otg/otg.c @@ -11,7 +11,7 @@ #include #include -const char *otg_state_string(enum usb_otg_state state) +const char *usb_otg_state_string(enum usb_otg_state state) { switch (state) { case OTG_STATE_A_IDLE: @@ -44,4 +44,4 @@ const char *otg_state_string(enum usb_otg_state state) return "UNDEFINED"; } } -EXPORT_SYMBOL(otg_state_string); +EXPORT_SYMBOL(usb_otg_state_string); diff --git a/drivers/usb/otg/otg_fsm.c b/drivers/usb/otg/otg_fsm.c index ade131a8ae5e..1f729a15decb 100644 --- a/drivers/usb/otg/otg_fsm.c +++ b/drivers/usb/otg/otg_fsm.c @@ -119,7 +119,7 @@ int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state) state_changed = 1; if (fsm->otg->phy->state == new_state) return 0; - VDBG("Set state: %s\n", otg_state_string(new_state)); + VDBG("Set state: %s\n", usb_otg_state_string(new_state)); otg_leave_state(fsm, fsm->otg->phy->state); switch (new_state) { case OTG_STATE_B_IDLE: diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index e8a5fe87c6bd..9f9fb3927b0a 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -37,9 +37,9 @@ struct usb_otg { }; #ifdef CONFIG_USB_OTG_UTILS -extern const char *otg_state_string(enum usb_otg_state state); +extern const char *usb_otg_state_string(enum usb_otg_state state); #else -static inline const char *otg_state_string(enum usb_otg_state state) +static inline const char *usb_otg_state_string(enum usb_otg_state state) { return NULL; } -- cgit From 7009bdd7f31ed6e769af0f76e2368bb6033be572 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 7 Mar 2013 10:45:56 +0200 Subject: usb: otg: move usb_otg_state_string to usb-common.c otg.c only had a single function definition which might make more sense to be placed in usb-common.c. While doing that, we also delete otg.c since it's now empty. Signed-off-by: Felipe Balbi --- drivers/usb/otg/Makefile | 3 --- drivers/usb/otg/otg.c | 47 ----------------------------------------------- drivers/usb/usb-common.c | 26 ++++++++++++++++++++++++++ include/linux/usb/otg.h | 7 ------- 4 files changed, 26 insertions(+), 57 deletions(-) delete mode 100644 drivers/usb/otg/otg.c (limited to 'include/linux') diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile index a844b8d35d14..6abc45388e24 100644 --- a/drivers/usb/otg/Makefile +++ b/drivers/usb/otg/Makefile @@ -5,9 +5,6 @@ ccflags-$(CONFIG_USB_DEBUG) := -DDEBUG ccflags-$(CONFIG_USB_GADGET_DEBUG) += -DDEBUG -# infrastructure -obj-$(CONFIG_USB_OTG_UTILS) += otg.o - # transceiver drivers obj-$(CONFIG_USB_GPIO_VBUS) += gpio_vbus.o obj-$(CONFIG_ISP1301_OMAP) += isp1301_omap.o diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c deleted file mode 100644 index fd9a4b7bebe7..000000000000 --- a/drivers/usb/otg/otg.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * otg.c -- USB OTG utility code - * - * Copyright (C) 2004 Texas Instruments - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ -#include -#include - -const char *usb_otg_state_string(enum usb_otg_state state) -{ - switch (state) { - case OTG_STATE_A_IDLE: - return "a_idle"; - case OTG_STATE_A_WAIT_VRISE: - return "a_wait_vrise"; - case OTG_STATE_A_WAIT_BCON: - return "a_wait_bcon"; - case OTG_STATE_A_HOST: - return "a_host"; - case OTG_STATE_A_SUSPEND: - return "a_suspend"; - case OTG_STATE_A_PERIPHERAL: - return "a_peripheral"; - case OTG_STATE_A_WAIT_VFALL: - return "a_wait_vfall"; - case OTG_STATE_A_VBUS_ERR: - return "a_vbus_err"; - case OTG_STATE_B_IDLE: - return "b_idle"; - case OTG_STATE_B_SRP_INIT: - return "b_srp_init"; - case OTG_STATE_B_PERIPHERAL: - return "b_peripheral"; - case OTG_STATE_B_WAIT_ACON: - return "b_wait_acon"; - case OTG_STATE_B_HOST: - return "b_host"; - default: - return "UNDEFINED"; - } -} -EXPORT_SYMBOL(usb_otg_state_string); diff --git a/drivers/usb/usb-common.c b/drivers/usb/usb-common.c index 070b681e5d17..0db0a919d72b 100644 --- a/drivers/usb/usb-common.c +++ b/drivers/usb/usb-common.c @@ -14,6 +14,32 @@ #include #include #include +#include + +const char *usb_otg_state_string(enum usb_otg_state state) +{ + static const char *const names[] = { + [OTG_STATE_A_IDLE] = "a_idle", + [OTG_STATE_A_WAIT_VRISE] = "a_wait_vrise", + [OTG_STATE_A_WAIT_BCON] = "a_wait_bcon", + [OTG_STATE_A_HOST] = "a_host", + [OTG_STATE_A_SUSPEND] = "a_suspend", + [OTG_STATE_A_PERIPHERAL] = "a_peripheral", + [OTG_STATE_A_WAIT_VFALL] = "a_wait_vfall", + [OTG_STATE_A_VBUS_ERR] = "a_vbus_err", + [OTG_STATE_B_IDLE] = "b_idle", + [OTG_STATE_B_SRP_INIT] = "b_srp_init", + [OTG_STATE_B_PERIPHERAL] = "b_peripheral", + [OTG_STATE_B_WAIT_ACON] = "b_wait_acon", + [OTG_STATE_B_HOST] = "b_host", + }; + + if (state < 0 || state >= ARRAY_SIZE(names)) + return "UNDEFINED"; + + return names[state]; +} +EXPORT_SYMBOL_GPL(usb_otg_state_string); const char *usb_speed_string(enum usb_device_speed speed) { diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index 9f9fb3927b0a..291e01ba32e5 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -36,14 +36,7 @@ struct usb_otg { }; -#ifdef CONFIG_USB_OTG_UTILS extern const char *usb_otg_state_string(enum usb_otg_state state); -#else -static inline const char *usb_otg_state_string(enum usb_otg_state state) -{ - return NULL; -} -#endif /* Context: can sleep */ static inline int -- cgit From edc7cb2e955f222fe51cd44c1cf9c94d58017344 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 7 Mar 2013 11:13:43 +0200 Subject: usb: phy: make it a menuconfig We already have a considerable amount of USB PHY drivers, making it a menuconfig just prevents us from adding too much churn to USB's menuconfig. While at that, also select USB_OTG_UTILS from this new menuconfig just to keep backwards compatibility until we manage to remove that symbol. Signed-off-by: Felipe Balbi --- drivers/Makefile | 2 +- drivers/usb/phy/Kconfig | 17 ++++++++++++----- drivers/usb/phy/Makefile | 2 +- include/linux/usb/phy.h | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/Makefile b/drivers/Makefile index dce39a95fa71..3c200a243af0 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -79,7 +79,7 @@ obj-$(CONFIG_ATA_OVER_ETH) += block/aoe/ obj-$(CONFIG_PARIDE) += block/paride/ obj-$(CONFIG_TC) += tc/ obj-$(CONFIG_UWB) += uwb/ -obj-$(CONFIG_USB_OTG_UTILS) += usb/ +obj-$(CONFIG_USB_PHY) += usb/ obj-$(CONFIG_USB) += usb/ obj-$(CONFIG_PCI) += usb/ obj-$(CONFIG_USB_GADGET) += usb/ diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 32ce740a9dd5..832cd694fb8b 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -1,8 +1,17 @@ # # Physical Layer USB driver configuration # -comment "USB Physical Layer drivers" - depends on USB || USB_GADGET +menuconfig USB_PHY + tristate "USB Physical Layer drivers" + select USB_OTG_UTILS + help + USB controllers (those which are host, device or DRD) need a + device to handle the physical layer signalling, commonly called + a PHY. + + The following drivers add support for such PHY devices. + +if USB_PHY config USB_OTG_UTILS bool @@ -10,8 +19,6 @@ config USB_OTG_UTILS Select this to make sure the build includes objects from the OTG infrastructure directory. -if USB || USB_GADGET - # # USB Transceiver Drivers # @@ -206,4 +213,4 @@ config USB_ULPI_VIEWPORT Provides read/write operations to the ULPI phy register set for controllers with a viewport register (e.g. Chipidea/ARC controllers). -endif # USB || OTG +endif # USB_PHY diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile index 34488ceef491..d10a8b387ffe 100644 --- a/drivers/usb/phy/Makefile +++ b/drivers/usb/phy/Makefile @@ -4,7 +4,7 @@ ccflags-$(CONFIG_USB_DEBUG) := -DDEBUG -obj-$(CONFIG_USB_OTG_UTILS) += phy.o +obj-$(CONFIG_USB_PHY) += phy.o # transceiver drivers, keep the list sorted diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index 15847cbdb512..b001dc3d6354 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -161,7 +161,7 @@ usb_phy_shutdown(struct usb_phy *x) } /* for usb host and peripheral controller drivers */ -#ifdef CONFIG_USB_OTG_UTILS +#if IS_ENABLED(CONFIG_USB_PHY) extern struct usb_phy *usb_get_phy(enum usb_phy_type type); extern struct usb_phy *devm_usb_get_phy(struct device *dev, enum usb_phy_type type); -- cgit From b774212ea5f13911a5e0211a7088e42dad46b4c8 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 8 Mar 2013 13:22:58 +0200 Subject: usb: phy: introduce ->set_vbus() method this method will be used to enable or disable the charge pump. Whenever we have DRD devices, we need to be able to turn VBUS on or off whenever we want. Note that in the ideal case, this would be controlled by the ID-pin Interrupt, but not all devices have ID-pin properly routed since manufacturers can choose to save that trace if they're building a host-only product out of a DRD IP. This is also useful during debugging where we might not have the proper cable hanging around. Signed-off-by: Felipe Balbi --- include/linux/usb/phy.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index b001dc3d6354..b7c2217c585f 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -91,6 +91,9 @@ struct usb_phy { int (*init)(struct usb_phy *x); void (*shutdown)(struct usb_phy *x); + /* enable/disable VBUS */ + int (*set_vbus)(struct usb_phy *x, int on); + /* effective for B devices, ignored for A-peripheral */ int (*set_power)(struct usb_phy *x, unsigned mA); @@ -160,6 +163,24 @@ usb_phy_shutdown(struct usb_phy *x) x->shutdown(x); } +static inline int +usb_phy_vbus_on(struct usb_phy *x) +{ + if (!x->set_vbus) + return 0; + + return x->set_vbus(x, true); +} + +static inline int +usb_phy_vbus_off(struct usb_phy *x) +{ + if (!x->set_vbus) + return 0; + + return x->set_vbus(x, false); +} + /* for usb host and peripheral controller drivers */ #if IS_ENABLED(CONFIG_USB_PHY) extern struct usb_phy *usb_get_phy(enum usb_phy_type type); -- cgit From 2ba7943af0f0cca5a069cd3aff807815bc76fff1 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 7 Mar 2013 18:51:44 +0530 Subject: usb: dwc3: dwc3-omap: return -EPROBE_DEFER if probe has not yet executed return -EPROBE_DEFER from dwc3_omap_mailbox in dwc3-omap.c, if the probe of dwc3-omap has not yet been executed or failed. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 7 +++++-- include/linux/usb/dwc3-omap.h | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 546f1fd84920..2fe9723ff1df 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -138,11 +138,14 @@ static inline void dwc3_omap_writel(void __iomem *base, u32 offset, u32 value) writel(value, base + offset); } -void dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status) +int dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status) { u32 val; struct dwc3_omap *omap = _omap; + if (!omap) + return -EPROBE_DEFER; + switch (status) { case OMAP_DWC3_ID_GROUND: dev_dbg(omap->dev, "ID GND\n"); @@ -185,7 +188,7 @@ void dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status) dev_dbg(omap->dev, "ID float\n"); } - return; + return 0; } EXPORT_SYMBOL_GPL(dwc3_omap_mailbox); diff --git a/include/linux/usb/dwc3-omap.h b/include/linux/usb/dwc3-omap.h index 51eae14477f7..5615f4d82724 100644 --- a/include/linux/usb/dwc3-omap.h +++ b/include/linux/usb/dwc3-omap.h @@ -19,11 +19,11 @@ enum omap_dwc3_vbus_id_status { }; #if (defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_DWC3_MODULE)) -extern void dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status); +extern int dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status); #else -static inline void dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status) +static inline int dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status) { - return; + return -ENODEV; } #endif -- cgit From b7fa5c2aec5be083eb2719b405089703608e9bc6 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 14 Mar 2013 17:59:06 +0200 Subject: usb: phy: return -ENXIO when PHY layer isn't enabled in cases where PHY layer isn't enabled, we want to still return an error code (actually an error pointer) so that our users don't need to cope with either error pointer of NULL. This will simplify users as below: - return IS_ERR(phy) ? PTR_ERR(phy) : -ENODEV; + return PTR_ERR(phy); Acked-by: Kishon Vijay Abraham I Reported-by: Alan Stern Signed-off-by: Felipe Balbi --- include/linux/usb/phy.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index b7c2217c585f..6b5978f57633 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -197,29 +197,29 @@ extern int usb_bind_phy(const char *dev_name, u8 index, #else static inline struct usb_phy *usb_get_phy(enum usb_phy_type type) { - return NULL; + return ERR_PTR(-ENXIO); } static inline struct usb_phy *devm_usb_get_phy(struct device *dev, enum usb_phy_type type) { - return NULL; + return ERR_PTR(-ENXIO); } static inline struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index) { - return NULL; + return ERR_PTR(-ENXIO); } static inline struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index) { - return NULL; + return ERR_PTR(-ENXIO); } static inline struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev, const char *phandle, u8 index) { - return NULL; + return ERR_PTR(-ENXIO); } static inline void usb_put_phy(struct usb_phy *x) -- cgit From e6251fc244a18a53830f38de84e4fcaee2f58662 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Fri, 15 Mar 2013 16:32:05 +0100 Subject: itg3200: fix incorrect ifdef comment Signed-off-by: Paul Bolle Signed-off-by: Jiri Kosina --- include/linux/iio/gyro/itg3200.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/gyro/itg3200.h b/include/linux/iio/gyro/itg3200.h index c53f16914b77..2a820850f284 100644 --- a/include/linux/iio/gyro/itg3200.h +++ b/include/linux/iio/gyro/itg3200.h @@ -149,6 +149,6 @@ static inline void itg3200_buffer_unconfigure(struct iio_dev *indio_dev) { } -#endif /* CONFIG_IIO_RING_BUFFER */ +#endif /* CONFIG_IIO_BUFFER */ #endif /* ITG3200_H_ */ -- cgit From 5a20d339c785d98d8b050b9afc098e4184a6098c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 3 Mar 2013 13:58:05 +0900 Subject: f2fs: align f2fs maximum name length to linux based filesystem The maximum filename length supported in linux is 255 characters. So let's follow that. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 +++ fs/f2fs/namei.c | 2 +- fs/f2fs/super.c | 2 +- include/linux/f2fs_fs.h | 17 +++++++++-------- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a1f38443ecee..2851ae6948a1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -189,6 +189,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + if (namelen > F2FS_NAME_LEN) + return NULL; + if (npages == 0) return NULL; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1a49b881bac0..d4a171b1a68b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -197,7 +197,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_dir_entry *de; struct page *page; - if (dentry->d_name.len > F2FS_MAX_NAME_LEN) + if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); de = f2fs_find_entry(dir, &dentry->d_name, &page); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c117649a035..1c7f595ca47c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -180,7 +180,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = sbi->total_node_count; buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); - buf->f_namelen = F2FS_MAX_NAME_LEN; + buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f9a12f6243a5..df6fab82f87e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -139,7 +139,7 @@ struct f2fs_extent { __le32 len; /* lengh of the extent */ } __packed; -#define F2FS_MAX_NAME_LEN 256 +#define F2FS_NAME_LEN 255 #define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ @@ -165,7 +165,8 @@ struct f2fs_inode { __le32 i_flags; /* file attributes */ __le32 i_pino; /* parent inode number */ __le32 i_namelen; /* file name length */ - __u8 i_name[F2FS_MAX_NAME_LEN]; /* file name for SPOR */ + __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ + __u8 i_reserved2; /* for backward compatibility */ struct f2fs_extent i_ext; /* caching a largest extent */ @@ -362,10 +363,10 @@ struct f2fs_summary_block { typedef __le32 f2fs_hash_t; /* One directory entry slot covers 8bytes-long file name */ -#define F2FS_NAME_LEN 8 -#define F2FS_NAME_LEN_BITS 3 +#define F2FS_SLOT_LEN 8 +#define F2FS_SLOT_LEN_BITS 3 -#define GET_DENTRY_SLOTS(x) ((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS) +#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) /* the number of dentry in a block */ #define NR_DENTRY_IN_BLOCK 214 @@ -377,10 +378,10 @@ typedef __le32 f2fs_hash_t; #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) #define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ - F2FS_NAME_LEN) * \ + F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) -/* One directory entry slot representing F2FS_NAME_LEN-sized file name */ +/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ struct f2fs_dir_entry { __le32 hash_code; /* hash code of file name */ __le32 ino; /* inode number */ @@ -394,7 +395,7 @@ struct f2fs_dentry_block { __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; __u8 reserved[SIZE_OF_RESERVED]; struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; - __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; /* file types used in inode_info->flags */ -- cgit From 443580486e3b96578928c1c91e8fbdcf0c9c9c7f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 18 Feb 2013 23:28:34 +0900 Subject: irqchip: Renesas INTC External IRQ pin driver This patch adds a driver for external IRQ pins connected to the INTC block on recent SoCs from Renesas. The INTC hardware block usually contains a rather wide range of features ranging from external IRQ pin handling to legacy interrupt controller support. On older SoCs the INTC is used as a general purpose interrupt controller both for external IRQ pins and on-chip devices. On more recent ARM based SoCs with Cortex-A9 the main interrupt controller is the GIC, but IRQ trigger setup still need to happen in the INTC hardware block. This driver implements the glue code needed to configure IRQ trigger and also handle mask/unmask and demux of external IRQ pins hooked up from the INTC to the GIC. Tested on sh73a0 and r8a7779. The hardware varies quite a bit with SoC model, for instance register width and bitfield widths vary wildly. The driver requires one GIC SPI per external IRQ pin to operate. Each driver instance will handle up to 8 external IRQ pins. The SoCs using this driver are currently mainly used together with regular platform devices so this driver allows configuration via platform data to support things like static interrupt base address. DT support will be added incrementally in the not so distant future. Signed-off-by: Magnus Damm Acked-by: Thomas Gleixner Signed-off-by: Simon Horman --- drivers/irqchip/Kconfig | 4 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-renesas-intc-irqpin.c | 464 +++++++++++++++++++++ .../linux/platform_data/irq-renesas-intc-irqpin.h | 10 + 4 files changed, 479 insertions(+) create mode 100644 drivers/irqchip/irq-renesas-intc-irqpin.c create mode 100644 include/linux/platform_data/irq-renesas-intc-irqpin.h (limited to 'include/linux') diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index a350969e5efe..0f5f1c3825bc 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -25,6 +25,10 @@ config ARM_VIC_NR The maximum number of VICs available in the system, for power management. +config RENESAS_INTC_IRQPIN + bool + select IRQ_DOMAIN + config VERSATILE_FPGA_IRQ bool select IRQ_DOMAIN diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 98e3b87bdf1b..1aaa4073ab60 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o obj-$(CONFIG_ARM_VIC) += irq-vic.o +obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c new file mode 100644 index 000000000000..1e5058a56517 --- /dev/null +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -0,0 +1,464 @@ +/* + * Renesas INTC External IRQ Pin Driver + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define INTC_IRQPIN_MAX 8 /* maximum 8 interrupts per driver instance */ + +#define INTC_IRQPIN_REG_SENSE 0 /* ICRn */ +#define INTC_IRQPIN_REG_PRIO 1 /* INTPRInn */ +#define INTC_IRQPIN_REG_SOURCE 2 /* INTREQnn */ +#define INTC_IRQPIN_REG_MASK 3 /* INTMSKnn */ +#define INTC_IRQPIN_REG_CLEAR 4 /* INTMSKCLRnn */ +#define INTC_IRQPIN_REG_NR 5 + +/* INTC external IRQ PIN hardware register access: + * + * SENSE is read-write 32-bit with 2-bits or 4-bits per IRQ (*) + * PRIO is read-write 32-bit with 4-bits per IRQ (**) + * SOURCE is read-only 32-bit or 8-bit with 1-bit per IRQ (***) + * MASK is write-only 32-bit or 8-bit with 1-bit per IRQ (***) + * CLEAR is write-only 32-bit or 8-bit with 1-bit per IRQ (***) + * + * (*) May be accessed by more than one driver instance - lock needed + * (**) Read-modify-write access by one driver instance - lock needed + * (***) Accessed by one driver instance only - no locking needed + */ + +struct intc_irqpin_iomem { + void __iomem *iomem; + unsigned long (*read)(void __iomem *iomem); + void (*write)(void __iomem *iomem, unsigned long data); + int width; +}; + +struct intc_irqpin_irq { + int hw_irq; + int irq; + struct intc_irqpin_priv *p; +}; + +struct intc_irqpin_priv { + struct intc_irqpin_iomem iomem[INTC_IRQPIN_REG_NR]; + struct intc_irqpin_irq irq[INTC_IRQPIN_MAX]; + struct renesas_intc_irqpin_config config; + unsigned int number_of_irqs; + struct platform_device *pdev; + struct irq_chip irq_chip; + struct irq_domain *irq_domain; +}; + +static unsigned long intc_irqpin_read32(void __iomem *iomem) +{ + return ioread32(iomem); +} + +static unsigned long intc_irqpin_read8(void __iomem *iomem) +{ + return ioread8(iomem); +} + +static void intc_irqpin_write32(void __iomem *iomem, unsigned long data) +{ + iowrite32(data, iomem); +} + +static void intc_irqpin_write8(void __iomem *iomem, unsigned long data) +{ + iowrite8(data, iomem); +} + +static inline unsigned long intc_irqpin_read(struct intc_irqpin_priv *p, + int reg) +{ + struct intc_irqpin_iomem *i = &p->iomem[reg]; + return i->read(i->iomem); +} + +static inline void intc_irqpin_write(struct intc_irqpin_priv *p, + int reg, unsigned long data) +{ + struct intc_irqpin_iomem *i = &p->iomem[reg]; + i->write(i->iomem, data); +} + +static inline unsigned long intc_irqpin_hwirq_mask(struct intc_irqpin_priv *p, + int reg, int hw_irq) +{ + return BIT((p->iomem[reg].width - 1) - hw_irq); +} + +static inline void intc_irqpin_irq_write_hwirq(struct intc_irqpin_priv *p, + int reg, int hw_irq) +{ + intc_irqpin_write(p, reg, intc_irqpin_hwirq_mask(p, reg, hw_irq)); +} + +static DEFINE_RAW_SPINLOCK(intc_irqpin_lock); /* only used by slow path */ + +static void intc_irqpin_read_modify_write(struct intc_irqpin_priv *p, + int reg, int shift, + int width, int value) +{ + unsigned long flags; + unsigned long tmp; + + raw_spin_lock_irqsave(&intc_irqpin_lock, flags); + + tmp = intc_irqpin_read(p, reg); + tmp &= ~(((1 << width) - 1) << shift); + tmp |= value << shift; + intc_irqpin_write(p, reg, tmp); + + raw_spin_unlock_irqrestore(&intc_irqpin_lock, flags); +} + +static void intc_irqpin_mask_unmask_prio(struct intc_irqpin_priv *p, + int irq, int do_mask) +{ + int bitfield_width = 4; /* PRIO assumed to have fixed bitfield width */ + int shift = (7 - irq) * bitfield_width; /* PRIO assumed to be 32-bit */ + + intc_irqpin_read_modify_write(p, INTC_IRQPIN_REG_PRIO, + shift, bitfield_width, + do_mask ? 0 : (1 << bitfield_width) - 1); +} + +static int intc_irqpin_set_sense(struct intc_irqpin_priv *p, int irq, int value) +{ + int bitfield_width = p->config.sense_bitfield_width; + int shift = (7 - irq) * bitfield_width; /* SENSE assumed to be 32-bit */ + + dev_dbg(&p->pdev->dev, "sense irq = %d, mode = %d\n", irq, value); + + if (value >= (1 << bitfield_width)) + return -EINVAL; + + intc_irqpin_read_modify_write(p, INTC_IRQPIN_REG_SENSE, shift, + bitfield_width, value); + return 0; +} + +static void intc_irqpin_dbg(struct intc_irqpin_irq *i, char *str) +{ + dev_dbg(&i->p->pdev->dev, "%s (%d:%d:%d)\n", + str, i->irq, i->hw_irq, + irq_find_mapping(i->p->irq_domain, i->hw_irq)); +} + +static void intc_irqpin_irq_enable(struct irq_data *d) +{ + struct intc_irqpin_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + intc_irqpin_dbg(&p->irq[hw_irq], "enable"); + intc_irqpin_irq_write_hwirq(p, INTC_IRQPIN_REG_CLEAR, hw_irq); +} + +static void intc_irqpin_irq_disable(struct irq_data *d) +{ + struct intc_irqpin_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + intc_irqpin_dbg(&p->irq[hw_irq], "disable"); + intc_irqpin_irq_write_hwirq(p, INTC_IRQPIN_REG_MASK, hw_irq); +} + +static void intc_irqpin_irq_enable_force(struct irq_data *d) +{ + struct intc_irqpin_priv *p = irq_data_get_irq_chip_data(d); + int irq = p->irq[irqd_to_hwirq(d)].irq; + + intc_irqpin_irq_enable(d); + irq_get_chip(irq)->irq_unmask(irq_get_irq_data(irq)); +} + +static void intc_irqpin_irq_disable_force(struct irq_data *d) +{ + struct intc_irqpin_priv *p = irq_data_get_irq_chip_data(d); + int irq = p->irq[irqd_to_hwirq(d)].irq; + + irq_get_chip(irq)->irq_mask(irq_get_irq_data(irq)); + intc_irqpin_irq_disable(d); +} + +#define INTC_IRQ_SENSE_VALID 0x10 +#define INTC_IRQ_SENSE(x) (x + INTC_IRQ_SENSE_VALID) + +static unsigned char intc_irqpin_sense[IRQ_TYPE_SENSE_MASK + 1] = { + [IRQ_TYPE_EDGE_FALLING] = INTC_IRQ_SENSE(0x00), + [IRQ_TYPE_EDGE_RISING] = INTC_IRQ_SENSE(0x01), + [IRQ_TYPE_LEVEL_LOW] = INTC_IRQ_SENSE(0x02), + [IRQ_TYPE_LEVEL_HIGH] = INTC_IRQ_SENSE(0x03), + [IRQ_TYPE_EDGE_BOTH] = INTC_IRQ_SENSE(0x04), +}; + +static int intc_irqpin_irq_set_type(struct irq_data *d, unsigned int type) +{ + unsigned char value = intc_irqpin_sense[type & IRQ_TYPE_SENSE_MASK]; + struct intc_irqpin_priv *p = irq_data_get_irq_chip_data(d); + + if (!(value & INTC_IRQ_SENSE_VALID)) + return -EINVAL; + + return intc_irqpin_set_sense(p, irqd_to_hwirq(d), + value ^ INTC_IRQ_SENSE_VALID); +} + +static irqreturn_t intc_irqpin_irq_handler(int irq, void *dev_id) +{ + struct intc_irqpin_irq *i = dev_id; + struct intc_irqpin_priv *p = i->p; + unsigned long bit; + + intc_irqpin_dbg(i, "demux1"); + bit = intc_irqpin_hwirq_mask(p, INTC_IRQPIN_REG_SOURCE, i->hw_irq); + + if (intc_irqpin_read(p, INTC_IRQPIN_REG_SOURCE) & bit) { + intc_irqpin_write(p, INTC_IRQPIN_REG_SOURCE, ~bit); + intc_irqpin_dbg(i, "demux2"); + generic_handle_irq(irq_find_mapping(p->irq_domain, i->hw_irq)); + return IRQ_HANDLED; + } + return IRQ_NONE; +} + +static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct intc_irqpin_priv *p = h->host_data; + + intc_irqpin_dbg(&p->irq[hw], "map"); + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); + set_irq_flags(virq, IRQF_VALID); /* kill me now */ + return 0; +} + +static struct irq_domain_ops intc_irqpin_irq_domain_ops = { + .map = intc_irqpin_irq_domain_map, +}; + +static int intc_irqpin_probe(struct platform_device *pdev) +{ + struct renesas_intc_irqpin_config *pdata = pdev->dev.platform_data; + struct intc_irqpin_priv *p; + struct intc_irqpin_iomem *i; + struct resource *io[INTC_IRQPIN_REG_NR]; + struct resource *irq; + struct irq_chip *irq_chip; + void (*enable_fn)(struct irq_data *d); + void (*disable_fn)(struct irq_data *d); + const char *name = dev_name(&pdev->dev); + int ret; + int k; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + ret = -ENOMEM; + goto err0; + } + + /* deal with driver instance configuration */ + if (pdata) + memcpy(&p->config, pdata, sizeof(*pdata)); + if (!p->config.sense_bitfield_width) + p->config.sense_bitfield_width = 4; /* default to 4 bits */ + + p->pdev = pdev; + platform_set_drvdata(pdev, p); + + /* get hold of manadatory IOMEM */ + for (k = 0; k < INTC_IRQPIN_REG_NR; k++) { + io[k] = platform_get_resource(pdev, IORESOURCE_MEM, k); + if (!io[k]) { + dev_err(&pdev->dev, "not enough IOMEM resources\n"); + ret = -EINVAL; + goto err1; + } + } + + /* allow any number of IRQs between 1 and INTC_IRQPIN_MAX */ + for (k = 0; k < INTC_IRQPIN_MAX; k++) { + irq = platform_get_resource(pdev, IORESOURCE_IRQ, k); + if (!irq) + break; + + p->irq[k].hw_irq = k; + p->irq[k].p = p; + p->irq[k].irq = irq->start; + } + + p->number_of_irqs = k; + if (p->number_of_irqs < 1) { + dev_err(&pdev->dev, "not enough IRQ resources\n"); + ret = -EINVAL; + goto err1; + } + + /* ioremap IOMEM and setup read/write callbacks */ + for (k = 0; k < INTC_IRQPIN_REG_NR; k++) { + i = &p->iomem[k]; + + switch (resource_size(io[k])) { + case 1: + i->width = 8; + i->read = intc_irqpin_read8; + i->write = intc_irqpin_write8; + break; + case 4: + i->width = 32; + i->read = intc_irqpin_read32; + i->write = intc_irqpin_write32; + break; + default: + dev_err(&pdev->dev, "IOMEM size mismatch\n"); + ret = -EINVAL; + goto err2; + } + + i->iomem = ioremap_nocache(io[k]->start, resource_size(io[k])); + if (!i->iomem) { + dev_err(&pdev->dev, "failed to remap IOMEM\n"); + ret = -ENXIO; + goto err2; + } + } + + /* mask all interrupts using priority */ + for (k = 0; k < p->number_of_irqs; k++) + intc_irqpin_mask_unmask_prio(p, k, 1); + + /* use more severe masking method if requested */ + if (p->config.control_parent) { + enable_fn = intc_irqpin_irq_enable_force; + disable_fn = intc_irqpin_irq_disable_force; + } else { + enable_fn = intc_irqpin_irq_enable; + disable_fn = intc_irqpin_irq_disable; + } + + irq_chip = &p->irq_chip; + irq_chip->name = name; + irq_chip->irq_mask = disable_fn; + irq_chip->irq_unmask = enable_fn; + irq_chip->irq_enable = enable_fn; + irq_chip->irq_disable = disable_fn; + irq_chip->irq_set_type = intc_irqpin_irq_set_type; + irq_chip->flags = IRQCHIP_SKIP_SET_WAKE; + + p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, + p->number_of_irqs, + p->config.irq_base, + &intc_irqpin_irq_domain_ops, p); + if (!p->irq_domain) { + ret = -ENXIO; + dev_err(&pdev->dev, "cannot initialize irq domain\n"); + goto err2; + } + + /* request and set priority on interrupts one by one */ + for (k = 0; k < p->number_of_irqs; k++) { + if (request_irq(p->irq[k].irq, intc_irqpin_irq_handler, + 0, name, &p->irq[k])) { + dev_err(&pdev->dev, "failed to request low IRQ\n"); + ret = -ENOENT; + goto err3; + } + intc_irqpin_mask_unmask_prio(p, k, 0); + } + + dev_info(&pdev->dev, "driving %d irqs\n", p->number_of_irqs); + + /* warn in case of mismatch if irq base is specified */ + if (p->config.irq_base) { + k = irq_find_mapping(p->irq_domain, 0); + if (p->config.irq_base != k) + dev_warn(&pdev->dev, "irq base mismatch (%d/%d)\n", + p->config.irq_base, k); + } + + return 0; + +err3: + for (; k >= 0; k--) + free_irq(p->irq[k - 1].irq, &p->irq[k - 1]); + + irq_domain_remove(p->irq_domain); +err2: + for (k = 0; k < INTC_IRQPIN_REG_NR; k++) + iounmap(p->iomem[k].iomem); +err1: + kfree(p); +err0: + return ret; +} + +static int intc_irqpin_remove(struct platform_device *pdev) +{ + struct intc_irqpin_priv *p = platform_get_drvdata(pdev); + int k; + + for (k = 0; k < p->number_of_irqs; k++) + free_irq(p->irq[k].irq, &p->irq[k]); + + irq_domain_remove(p->irq_domain); + + for (k = 0; k < INTC_IRQPIN_REG_NR; k++) + iounmap(p->iomem[k].iomem); + + kfree(p); + return 0; +} + +static struct platform_driver intc_irqpin_device_driver = { + .probe = intc_irqpin_probe, + .remove = intc_irqpin_remove, + .driver = { + .name = "renesas_intc_irqpin", + } +}; + +static int __init intc_irqpin_init(void) +{ + return platform_driver_register(&intc_irqpin_device_driver); +} +postcore_initcall(intc_irqpin_init); + +static void __exit intc_irqpin_exit(void) +{ + platform_driver_unregister(&intc_irqpin_device_driver); +} +module_exit(intc_irqpin_exit); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("Renesas INTC External IRQ Pin Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/irq-renesas-intc-irqpin.h b/include/linux/platform_data/irq-renesas-intc-irqpin.h new file mode 100644 index 000000000000..00ccac34dac8 --- /dev/null +++ b/include/linux/platform_data/irq-renesas-intc-irqpin.h @@ -0,0 +1,10 @@ +#ifndef __IRQ_RENESAS_INTC_IRQPIN_H__ +#define __IRQ_RENESAS_INTC_IRQPIN_H__ + +struct renesas_intc_irqpin_config { + unsigned int sense_bitfield_width; + unsigned int irq_base; + bool control_parent; +}; + +#endif /* __IRQ_RENESAS_INTC_IRQPIN_H__ */ -- cgit From 0ca8712285e9e762ce4f5faf9f803b52e48c6837 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Feb 2013 20:59:23 +0900 Subject: irqchip: intc-irqpin: GPL header for platform data Add GPL header to platform data include file. Signed-off-by: Magnus Damm Reviewed-by: Thomas Gleixner Tested-by: Guennadi Liakhovetski Signed-off-by: Simon Horman --- include/linux/platform_data/irq-renesas-intc-irqpin.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/irq-renesas-intc-irqpin.h b/include/linux/platform_data/irq-renesas-intc-irqpin.h index 00ccac34dac8..e4cb911066a6 100644 --- a/include/linux/platform_data/irq-renesas-intc-irqpin.h +++ b/include/linux/platform_data/irq-renesas-intc-irqpin.h @@ -1,3 +1,22 @@ +/* + * Renesas INTC External IRQ Pin Driver + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + #ifndef __IRQ_RENESAS_INTC_IRQPIN_H__ #define __IRQ_RENESAS_INTC_IRQPIN_H__ -- cgit From fbc83b7f59dd8ed1154286b6de00b6d03c24a3c4 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 27 Feb 2013 17:15:01 +0900 Subject: irqchip: Renesas IRQC driver This patch adds a driver for external IRQ pins connected to the IRQC hardware block on recent SoCs from Renesas. The IRQC hardware block is used together with more recent ARM based SoCs using the GIC. As usual the GIC requires external IRQ trigger setup somewhere else which in this particular case happens to be IRQC. This driver implements the glue code needed to configure IRQ trigger and also handle mask/unmask and demux of external IRQ pins hooked up from the IRQC to the GIC. Tested on r8a73a4 but is designed to work with a wide range of SoCs. The driver requires one GIC SPI per external IRQ pin to operate. Each driver instance will handle up to 32 external IRQ pins. The SoCs using this driver are currently mainly used together with regular platform devices so this driver allows configuration via platform data to support things like static interrupt base address. DT support will be added incrementally in the not so distant future. Signed-off-by: Magnus Damm Tested-by: Guennadi Liakhovetski Signed-off-by: Simon Horman --- drivers/irqchip/Kconfig | 4 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-renesas-irqc.c | 298 +++++++++++++++++++++++++ include/linux/platform_data/irq-renesas-irqc.h | 27 +++ 4 files changed, 330 insertions(+) create mode 100644 drivers/irqchip/irq-renesas-irqc.c create mode 100644 include/linux/platform_data/irq-renesas-irqc.h (limited to 'include/linux') diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 0f5f1c3825bc..4a33351c25dc 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -29,6 +29,10 @@ config RENESAS_INTC_IRQPIN bool select IRQ_DOMAIN +config RENESAS_IRQC + bool + select IRQ_DOMAIN + config VERSATILE_FPGA_IRQ bool select IRQ_DOMAIN diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 1aaa4073ab60..e41ceb9bec22 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -9,4 +9,5 @@ obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o obj-$(CONFIG_ARM_VIC) += irq-vic.o obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o +obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c new file mode 100644 index 000000000000..95d69bfac982 --- /dev/null +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -0,0 +1,298 @@ +/* + * Renesas IRQC Driver + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IRQC_IRQ_MAX 32 /* maximum 32 interrupts per driver instance */ + +#define IRQC_REQ_STS 0x00 +#define IRQC_EN_STS 0x04 +#define IRQC_EN_SET 0x08 +#define IRQC_INT_CPU_BASE(n) (0x000 + ((n) * 0x10)) +#define DETECT_STATUS 0x100 +#define IRQC_CONFIG(n) (0x180 + ((n) * 0x04)) + +struct irqc_irq { + int hw_irq; + int requested_irq; + int domain_irq; + struct irqc_priv *p; +}; + +struct irqc_priv { + void __iomem *iomem; + void __iomem *cpu_int_base; + struct irqc_irq irq[IRQC_IRQ_MAX]; + struct renesas_irqc_config config; + unsigned int number_of_irqs; + struct platform_device *pdev; + struct irq_chip irq_chip; + struct irq_domain *irq_domain; +}; + +static void irqc_dbg(struct irqc_irq *i, char *str) +{ + dev_dbg(&i->p->pdev->dev, "%s (%d:%d:%d)\n", + str, i->requested_irq, i->hw_irq, i->domain_irq); +} + +static void irqc_irq_enable(struct irq_data *d) +{ + struct irqc_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + irqc_dbg(&p->irq[hw_irq], "enable"); + iowrite32(BIT(hw_irq), p->cpu_int_base + IRQC_EN_SET); +} + +static void irqc_irq_disable(struct irq_data *d) +{ + struct irqc_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + irqc_dbg(&p->irq[hw_irq], "disable"); + iowrite32(BIT(hw_irq), p->cpu_int_base + IRQC_EN_STS); +} + +#define INTC_IRQ_SENSE_VALID 0x10 +#define INTC_IRQ_SENSE(x) (x + INTC_IRQ_SENSE_VALID) + +static unsigned char irqc_sense[IRQ_TYPE_SENSE_MASK + 1] = { + [IRQ_TYPE_LEVEL_LOW] = INTC_IRQ_SENSE(0x01), + [IRQ_TYPE_LEVEL_HIGH] = INTC_IRQ_SENSE(0x02), + [IRQ_TYPE_EDGE_FALLING] = INTC_IRQ_SENSE(0x04), /* Synchronous */ + [IRQ_TYPE_EDGE_RISING] = INTC_IRQ_SENSE(0x08), /* Synchronous */ + [IRQ_TYPE_EDGE_BOTH] = INTC_IRQ_SENSE(0x0c), /* Synchronous */ +}; + +static int irqc_irq_set_type(struct irq_data *d, unsigned int type) +{ + struct irqc_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + unsigned char value = irqc_sense[type & IRQ_TYPE_SENSE_MASK]; + unsigned long tmp; + + irqc_dbg(&p->irq[hw_irq], "sense"); + + if (!(value & INTC_IRQ_SENSE_VALID)) + return -EINVAL; + + tmp = ioread32(p->iomem + IRQC_CONFIG(hw_irq)); + tmp &= ~0x3f; + tmp |= value ^ INTC_IRQ_SENSE_VALID; + iowrite32(tmp, p->iomem + IRQC_CONFIG(hw_irq)); + return 0; +} + +static irqreturn_t irqc_irq_handler(int irq, void *dev_id) +{ + struct irqc_irq *i = dev_id; + struct irqc_priv *p = i->p; + unsigned long bit = BIT(i->hw_irq); + + irqc_dbg(i, "demux1"); + + if (ioread32(p->iomem + DETECT_STATUS) & bit) { + iowrite32(bit, p->iomem + DETECT_STATUS); + irqc_dbg(i, "demux2"); + generic_handle_irq(i->domain_irq); + return IRQ_HANDLED; + } + return IRQ_NONE; +} + +static int irqc_irq_domain_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct irqc_priv *p = h->host_data; + + p->irq[hw].domain_irq = virq; + p->irq[hw].hw_irq = hw; + + irqc_dbg(&p->irq[hw], "map"); + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); + set_irq_flags(virq, IRQF_VALID); /* kill me now */ + return 0; +} + +static struct irq_domain_ops irqc_irq_domain_ops = { + .map = irqc_irq_domain_map, +}; + +static int irqc_probe(struct platform_device *pdev) +{ + struct renesas_irqc_config *pdata = pdev->dev.platform_data; + struct irqc_priv *p; + struct resource *io; + struct resource *irq; + struct irq_chip *irq_chip; + const char *name = dev_name(&pdev->dev); + int ret; + int k; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + ret = -ENOMEM; + goto err0; + } + + /* deal with driver instance configuration */ + if (pdata) + memcpy(&p->config, pdata, sizeof(*pdata)); + + p->pdev = pdev; + platform_set_drvdata(pdev, p); + + /* get hold of manadatory IOMEM */ + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) { + dev_err(&pdev->dev, "not enough IOMEM resources\n"); + ret = -EINVAL; + goto err1; + } + + /* allow any number of IRQs between 1 and IRQC_IRQ_MAX */ + for (k = 0; k < IRQC_IRQ_MAX; k++) { + irq = platform_get_resource(pdev, IORESOURCE_IRQ, k); + if (!irq) + break; + + p->irq[k].p = p; + p->irq[k].requested_irq = irq->start; + } + + p->number_of_irqs = k; + if (p->number_of_irqs < 1) { + dev_err(&pdev->dev, "not enough IRQ resources\n"); + ret = -EINVAL; + goto err1; + } + + /* ioremap IOMEM and setup read/write callbacks */ + p->iomem = ioremap_nocache(io->start, resource_size(io)); + if (!p->iomem) { + dev_err(&pdev->dev, "failed to remap IOMEM\n"); + ret = -ENXIO; + goto err2; + } + + p->cpu_int_base = p->iomem + IRQC_INT_CPU_BASE(0); /* SYS-SPI */ + + irq_chip = &p->irq_chip; + irq_chip->name = name; + irq_chip->irq_mask = irqc_irq_disable; + irq_chip->irq_unmask = irqc_irq_enable; + irq_chip->irq_enable = irqc_irq_enable; + irq_chip->irq_disable = irqc_irq_disable; + irq_chip->irq_set_type = irqc_irq_set_type; + irq_chip->flags = IRQCHIP_SKIP_SET_WAKE; + + p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, + p->number_of_irqs, + p->config.irq_base, + &irqc_irq_domain_ops, p); + if (!p->irq_domain) { + ret = -ENXIO; + dev_err(&pdev->dev, "cannot initialize irq domain\n"); + goto err2; + } + + /* request interrupts one by one */ + for (k = 0; k < p->number_of_irqs; k++) { + if (request_irq(p->irq[k].requested_irq, irqc_irq_handler, + 0, name, &p->irq[k])) { + dev_err(&pdev->dev, "failed to request IRQ\n"); + ret = -ENOENT; + goto err3; + } + } + + dev_info(&pdev->dev, "driving %d irqs\n", p->number_of_irqs); + + /* warn in case of mismatch if irq base is specified */ + if (p->config.irq_base) { + if (p->config.irq_base != p->irq[0].domain_irq) + dev_warn(&pdev->dev, "irq base mismatch (%d/%d)\n", + p->config.irq_base, p->irq[0].domain_irq); + } + + return 0; +err3: + for (; k >= 0; k--) + free_irq(p->irq[k - 1].requested_irq, &p->irq[k - 1]); + + irq_domain_remove(p->irq_domain); +err2: + iounmap(p->iomem); +err1: + kfree(p); +err0: + return ret; +} + +static int irqc_remove(struct platform_device *pdev) +{ + struct irqc_priv *p = platform_get_drvdata(pdev); + int k; + + for (k = 0; k < p->number_of_irqs; k++) + free_irq(p->irq[k].requested_irq, &p->irq[k]); + + irq_domain_remove(p->irq_domain); + iounmap(p->iomem); + kfree(p); + return 0; +} + +static struct platform_driver irqc_device_driver = { + .probe = irqc_probe, + .remove = irqc_remove, + .driver = { + .name = "renesas_irqc", + } +}; + +static int __init irqc_init(void) +{ + return platform_driver_register(&irqc_device_driver); +} +postcore_initcall(irqc_init); + +static void __exit irqc_exit(void) +{ + platform_driver_unregister(&irqc_device_driver); +} +module_exit(irqc_exit); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("Renesas IRQC Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/irq-renesas-irqc.h b/include/linux/platform_data/irq-renesas-irqc.h new file mode 100644 index 000000000000..3ae17b3e00ed --- /dev/null +++ b/include/linux/platform_data/irq-renesas-irqc.h @@ -0,0 +1,27 @@ +/* + * Renesas IRQC Driver + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __IRQ_RENESAS_IRQC_H__ +#define __IRQ_RENESAS_IRQC_H__ + +struct renesas_irqc_config { + unsigned int irq_base; +}; + +#endif /* __IRQ_RENESAS_IRQC_H__ */ -- cgit From af6882be363d3a7bf0f72dd17ac2a639c4da0059 Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Fri, 8 Mar 2013 10:27:09 +0800 Subject: usb: phy: ab8500-usb: update irq handling code Update irq handling code to notify all possible link status changes of AB8500 and AB8505 to the ux500-musb glue driver. The additional event codes will be used for pm-runtime implementation, and are defined in a separate ux500-specific header. This also modify the irq registration code to use devm_* helpers and drop all non necessary fail path code. Acked-by: Linus Walleij Signed-off-by: Fabio Baltieri Signed-off-by: Felipe Balbi --- drivers/usb/musb/ux500.c | 7 +- drivers/usb/phy/phy-ab8500-usb.c | 440 ++++++++++++++++++++++++++++++--------- include/linux/usb/musb-ux500.h | 31 +++ 3 files changed, 382 insertions(+), 96 deletions(-) create mode 100644 include/linux/usb/musb-ux500.h (limited to 'include/linux') diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c index 0ae9472a68a8..88795f532370 100644 --- a/drivers/usb/musb/ux500.c +++ b/drivers/usb/musb/ux500.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "musb_core.h" @@ -107,15 +108,15 @@ static int musb_otg_notifications(struct notifier_block *nb, event, usb_otg_state_string(musb->xceiv->state)); switch (event) { - case USB_EVENT_ID: + case UX500_MUSB_ID: dev_dbg(musb->controller, "ID GND\n"); ux500_musb_set_vbus(musb, 1); break; - case USB_EVENT_VBUS: + case UX500_MUSB_VBUS: dev_dbg(musb->controller, "VBUS Connect\n"); ux500_musb_set_vbus(musb, 0); break; - case USB_EVENT_NONE: + case UX500_MUSB_NONE: dev_dbg(musb->controller, "VBUS Disconnect\n"); if (is_host_active(musb)) ux500_musb_set_vbus(musb, 0); diff --git a/drivers/usb/phy/phy-ab8500-usb.c b/drivers/usb/phy/phy-ab8500-usb.c index 9f5e0e4ab02a..351b0369a611 100644 --- a/drivers/usb/phy/phy-ab8500-usb.c +++ b/drivers/usb/phy/phy-ab8500-usb.c @@ -31,9 +31,11 @@ #include #include #include +#include #define AB8500_MAIN_WD_CTRL_REG 0x01 #define AB8500_USB_LINE_STAT_REG 0x80 +#define AB8505_USB_LINE_STAT_REG 0x94 #define AB8500_USB_PHY_CTRL_REG 0x8A #define AB8500_BIT_OTG_STAT_ID (1 << 0) @@ -44,36 +46,76 @@ #define AB8500_WD_KICK_DELAY_US 100 /* usec */ #define AB8500_WD_V11_DISABLE_DELAY_US 100 /* usec */ +#define AB8500_V20_31952_DISABLE_DELAY_US 100 /* usec */ /* Usb line status register */ enum ab8500_usb_link_status { - USB_LINK_NOT_CONFIGURED = 0, - USB_LINK_STD_HOST_NC, - USB_LINK_STD_HOST_C_NS, - USB_LINK_STD_HOST_C_S, - USB_LINK_HOST_CHG_NM, - USB_LINK_HOST_CHG_HS, - USB_LINK_HOST_CHG_HS_CHIRP, - USB_LINK_DEDICATED_CHG, - USB_LINK_ACA_RID_A, - USB_LINK_ACA_RID_B, - USB_LINK_ACA_RID_C_NM, - USB_LINK_ACA_RID_C_HS, - USB_LINK_ACA_RID_C_HS_CHIRP, - USB_LINK_HM_IDGND, - USB_LINK_RESERVED, - USB_LINK_NOT_VALID_LINK + USB_LINK_NOT_CONFIGURED_8500 = 0, + USB_LINK_STD_HOST_NC_8500, + USB_LINK_STD_HOST_C_NS_8500, + USB_LINK_STD_HOST_C_S_8500, + USB_LINK_HOST_CHG_NM_8500, + USB_LINK_HOST_CHG_HS_8500, + USB_LINK_HOST_CHG_HS_CHIRP_8500, + USB_LINK_DEDICATED_CHG_8500, + USB_LINK_ACA_RID_A_8500, + USB_LINK_ACA_RID_B_8500, + USB_LINK_ACA_RID_C_NM_8500, + USB_LINK_ACA_RID_C_HS_8500, + USB_LINK_ACA_RID_C_HS_CHIRP_8500, + USB_LINK_HM_IDGND_8500, + USB_LINK_RESERVED_8500, + USB_LINK_NOT_VALID_LINK_8500, +}; + +enum ab8505_usb_link_status { + USB_LINK_NOT_CONFIGURED_8505 = 0, + USB_LINK_STD_HOST_NC_8505, + USB_LINK_STD_HOST_C_NS_8505, + USB_LINK_STD_HOST_C_S_8505, + USB_LINK_CDP_8505, + USB_LINK_RESERVED0_8505, + USB_LINK_RESERVED1_8505, + USB_LINK_DEDICATED_CHG_8505, + USB_LINK_ACA_RID_A_8505, + USB_LINK_ACA_RID_B_8505, + USB_LINK_ACA_RID_C_NM_8505, + USB_LINK_RESERVED2_8505, + USB_LINK_RESERVED3_8505, + USB_LINK_HM_IDGND_8505, + USB_LINK_CHARGERPORT_NOT_OK_8505, + USB_LINK_CHARGER_DM_HIGH_8505, + USB_LINK_PHYEN_NO_VBUS_NO_IDGND_8505, + USB_LINK_STD_UPSTREAM_NO_IDGNG_NO_VBUS_8505, + USB_LINK_STD_UPSTREAM_8505, + USB_LINK_CHARGER_SE1_8505, + USB_LINK_CARKIT_CHGR_1_8505, + USB_LINK_CARKIT_CHGR_2_8505, + USB_LINK_ACA_DOCK_CHGR_8505, + USB_LINK_SAMSUNG_BOOT_CBL_PHY_EN_8505, + USB_LINK_SAMSUNG_BOOT_CBL_PHY_DISB_8505, + USB_LINK_SAMSUNG_UART_CBL_PHY_EN_8505, + USB_LINK_SAMSUNG_UART_CBL_PHY_DISB_8505, + USB_LINK_MOTOROLA_FACTORY_CBL_PHY_EN_8505, +}; + +enum ab8500_usb_mode { + USB_IDLE = 0, + USB_PERIPHERAL, + USB_HOST, + USB_DEDICATED_CHG }; struct ab8500_usb { struct usb_phy phy; struct device *dev; struct ab8500 *ab8500; - int irq_num_link_status; unsigned vbus_draw; struct delayed_work dwork; struct work_struct phy_dis_work; unsigned long link_status_wait; + enum ab8500_usb_mode mode; + int previous_link_status_state; }; static inline struct ab8500_usb *phy_to_ab(struct usb_phy *x) @@ -104,6 +146,17 @@ static void ab8500_usb_wd_workaround(struct ab8500_usb *ab) 0); } +static void ab8500_usb_wd_linkstatus(struct ab8500_usb *ab, u8 bit) +{ + /* Workaround for v2.0 bug # 31952 */ + if (is_ab8500_2p0(ab->ab8500)) { + abx500_mask_and_set_register_interruptible(ab->dev, + AB8500_USB, AB8500_USB_PHY_CTRL_REG, + bit, bit); + udelay(AB8500_V20_31952_DISABLE_DELAY_US); + } +} + static void ab8500_usb_phy_ctrl(struct ab8500_usb *ab, bool sel_host, bool enable) { @@ -139,92 +192,276 @@ static void ab8500_usb_phy_ctrl(struct ab8500_usb *ab, bool sel_host, #define ab8500_usb_peri_phy_en(ab) ab8500_usb_phy_ctrl(ab, false, true) #define ab8500_usb_peri_phy_dis(ab) ab8500_usb_phy_ctrl(ab, false, false) -static int ab8500_usb_link_status_update(struct ab8500_usb *ab) +static int ab8505_usb_link_status_update(struct ab8500_usb *ab, + enum ab8505_usb_link_status lsts) { - u8 reg; - enum ab8500_usb_link_status lsts; - void *v = NULL; - enum usb_phy_events event; + enum ux500_musb_vbus_id_status event = 0; - abx500_get_register_interruptible(ab->dev, - AB8500_USB, - AB8500_USB_LINE_STAT_REG, - ®); + dev_dbg(ab->dev, "ab8505_usb_link_status_update %d\n", lsts); - lsts = (reg >> 3) & 0x0F; + /* + * Spurious link_status interrupts are seen at the time of + * disconnection of a device in RIDA state + */ + if (ab->previous_link_status_state == USB_LINK_ACA_RID_A_8505 && + (lsts == USB_LINK_STD_HOST_NC_8505)) + return 0; + + ab->previous_link_status_state = lsts; switch (lsts) { - case USB_LINK_NOT_CONFIGURED: - case USB_LINK_RESERVED: - case USB_LINK_NOT_VALID_LINK: - /* TODO: Disable regulators. */ - ab8500_usb_host_phy_dis(ab); - ab8500_usb_peri_phy_dis(ab); - ab->phy.state = OTG_STATE_B_IDLE; + case USB_LINK_ACA_RID_B_8505: + event = UX500_MUSB_RIDB; + case USB_LINK_NOT_CONFIGURED_8505: + case USB_LINK_RESERVED0_8505: + case USB_LINK_RESERVED1_8505: + case USB_LINK_RESERVED2_8505: + case USB_LINK_RESERVED3_8505: + ab->mode = USB_IDLE; ab->phy.otg->default_a = false; ab->vbus_draw = 0; - event = USB_EVENT_NONE; + if (event != UX500_MUSB_RIDB) + event = UX500_MUSB_NONE; + /* + * Fallback to default B_IDLE as nothing + * is connected + */ + ab->phy.state = OTG_STATE_B_IDLE; break; - case USB_LINK_STD_HOST_NC: - case USB_LINK_STD_HOST_C_NS: - case USB_LINK_STD_HOST_C_S: - case USB_LINK_HOST_CHG_NM: - case USB_LINK_HOST_CHG_HS: - case USB_LINK_HOST_CHG_HS_CHIRP: - if (ab->phy.otg->gadget) { - /* TODO: Enable regulators. */ + case USB_LINK_ACA_RID_C_NM_8505: + event = UX500_MUSB_RIDC; + case USB_LINK_STD_HOST_NC_8505: + case USB_LINK_STD_HOST_C_NS_8505: + case USB_LINK_STD_HOST_C_S_8505: + case USB_LINK_CDP_8505: + if (ab->mode == USB_IDLE) { + ab->mode = USB_PERIPHERAL; ab8500_usb_peri_phy_en(ab); - v = ab->phy.otg->gadget; + atomic_notifier_call_chain(&ab->phy.notifier, + UX500_MUSB_PREPARE, &ab->vbus_draw); } - event = USB_EVENT_VBUS; + if (event != UX500_MUSB_RIDC) + event = UX500_MUSB_VBUS; break; - case USB_LINK_HM_IDGND: - if (ab->phy.otg->host) { - /* TODO: Enable regulators. */ + case USB_LINK_ACA_RID_A_8505: + case USB_LINK_ACA_DOCK_CHGR_8505: + event = UX500_MUSB_RIDA; + case USB_LINK_HM_IDGND_8505: + if (ab->mode == USB_IDLE) { + ab->mode = USB_HOST; ab8500_usb_host_phy_en(ab); - v = ab->phy.otg->host; + atomic_notifier_call_chain(&ab->phy.notifier, + UX500_MUSB_PREPARE, &ab->vbus_draw); } - ab->phy.state = OTG_STATE_A_IDLE; ab->phy.otg->default_a = true; - event = USB_EVENT_ID; + if (event != UX500_MUSB_RIDA) + event = UX500_MUSB_ID; + atomic_notifier_call_chain(&ab->phy.notifier, + event, &ab->vbus_draw); break; - case USB_LINK_ACA_RID_A: - case USB_LINK_ACA_RID_B: - /* TODO */ - case USB_LINK_ACA_RID_C_NM: - case USB_LINK_ACA_RID_C_HS: - case USB_LINK_ACA_RID_C_HS_CHIRP: - case USB_LINK_DEDICATED_CHG: - /* TODO: vbus_draw */ - event = USB_EVENT_CHARGER; + case USB_LINK_DEDICATED_CHG_8505: + ab->mode = USB_DEDICATED_CHG; + event = UX500_MUSB_CHARGER; + atomic_notifier_call_chain(&ab->phy.notifier, + event, &ab->vbus_draw); + break; + + default: break; } - atomic_notifier_call_chain(&ab->phy.notifier, event, v); + return 0; +} + +static int ab8500_usb_link_status_update(struct ab8500_usb *ab, + enum ab8500_usb_link_status lsts) +{ + enum ux500_musb_vbus_id_status event = 0; + + dev_dbg(ab->dev, "ab8500_usb_link_status_update %d\n", lsts); + + /* + * Spurious link_status interrupts are seen in case of a + * disconnection of a device in IDGND and RIDA stage + */ + if (ab->previous_link_status_state == USB_LINK_HM_IDGND_8500 && + (lsts == USB_LINK_STD_HOST_C_NS_8500 || + lsts == USB_LINK_STD_HOST_NC_8500)) + return 0; + + if (ab->previous_link_status_state == USB_LINK_ACA_RID_A_8500 && + lsts == USB_LINK_STD_HOST_NC_8500) + return 0; + + ab->previous_link_status_state = lsts; + + switch (lsts) { + case USB_LINK_ACA_RID_B_8500: + event = UX500_MUSB_RIDB; + case USB_LINK_NOT_CONFIGURED_8500: + case USB_LINK_NOT_VALID_LINK_8500: + ab->mode = USB_IDLE; + ab->phy.otg->default_a = false; + ab->vbus_draw = 0; + if (event != UX500_MUSB_RIDB) + event = UX500_MUSB_NONE; + /* Fallback to default B_IDLE as nothing is connected */ + ab->phy.state = OTG_STATE_B_IDLE; + break; + + case USB_LINK_ACA_RID_C_NM_8500: + case USB_LINK_ACA_RID_C_HS_8500: + case USB_LINK_ACA_RID_C_HS_CHIRP_8500: + event = UX500_MUSB_RIDC; + case USB_LINK_STD_HOST_NC_8500: + case USB_LINK_STD_HOST_C_NS_8500: + case USB_LINK_STD_HOST_C_S_8500: + case USB_LINK_HOST_CHG_NM_8500: + case USB_LINK_HOST_CHG_HS_8500: + case USB_LINK_HOST_CHG_HS_CHIRP_8500: + if (ab->mode == USB_IDLE) { + ab->mode = USB_PERIPHERAL; + ab8500_usb_peri_phy_en(ab); + atomic_notifier_call_chain(&ab->phy.notifier, + UX500_MUSB_PREPARE, &ab->vbus_draw); + } + if (event != UX500_MUSB_RIDC) + event = UX500_MUSB_VBUS; + break; + + case USB_LINK_ACA_RID_A_8500: + event = UX500_MUSB_RIDA; + case USB_LINK_HM_IDGND_8500: + if (ab->mode == USB_IDLE) { + ab->mode = USB_HOST; + ab8500_usb_host_phy_en(ab); + atomic_notifier_call_chain(&ab->phy.notifier, + UX500_MUSB_PREPARE, &ab->vbus_draw); + } + ab->phy.otg->default_a = true; + if (event != UX500_MUSB_RIDA) + event = UX500_MUSB_ID; + atomic_notifier_call_chain(&ab->phy.notifier, + event, &ab->vbus_draw); + break; + + case USB_LINK_DEDICATED_CHG_8500: + ab->mode = USB_DEDICATED_CHG; + event = UX500_MUSB_CHARGER; + atomic_notifier_call_chain(&ab->phy.notifier, + event, &ab->vbus_draw); + break; + + case USB_LINK_RESERVED_8500: + break; + } return 0; } -static void ab8500_usb_delayed_work(struct work_struct *work) +/* + * Connection Sequence: + * 1. Link Status Interrupt + * 2. Enable AB clock + * 3. Enable AB regulators + * 4. Enable USB phy + * 5. Reset the musb controller + * 6. Switch the ULPI GPIO pins to fucntion mode + * 7. Enable the musb Peripheral5 clock + * 8. Restore MUSB context + */ +static int abx500_usb_link_status_update(struct ab8500_usb *ab) { - struct ab8500_usb *ab = container_of(work, struct ab8500_usb, - dwork.work); + u8 reg; + int ret = 0; + + if (is_ab8500(ab->ab8500)) { + enum ab8500_usb_link_status lsts; + + abx500_get_register_interruptible(ab->dev, + AB8500_USB, AB8500_USB_LINE_STAT_REG, ®); + lsts = (reg >> 3) & 0x0F; + ret = ab8500_usb_link_status_update(ab, lsts); + } else if (is_ab8505(ab->ab8500)) { + enum ab8505_usb_link_status lsts; + + abx500_get_register_interruptible(ab->dev, + AB8500_USB, AB8505_USB_LINE_STAT_REG, ®); + lsts = (reg >> 3) & 0x1F; + ret = ab8505_usb_link_status_update(ab, lsts); + } + + return ret; +} + +/* + * Disconnection Sequence: + * 1. Disconect Interrupt + * 2. Disable regulators + * 3. Disable AB clock + * 4. Disable the Phy + * 5. Link Status Interrupt + * 6. Disable Musb Clock + */ +static irqreturn_t ab8500_usb_disconnect_irq(int irq, void *data) +{ + struct ab8500_usb *ab = (struct ab8500_usb *) data; + enum usb_phy_events event = UX500_MUSB_NONE; + + /* Link status will not be updated till phy is disabled. */ + if (ab->mode == USB_HOST) { + ab->phy.otg->default_a = false; + ab->vbus_draw = 0; + atomic_notifier_call_chain(&ab->phy.notifier, + event, &ab->vbus_draw); + ab8500_usb_host_phy_dis(ab); + ab->mode = USB_IDLE; + } + + if (ab->mode == USB_PERIPHERAL) { + atomic_notifier_call_chain(&ab->phy.notifier, + event, &ab->vbus_draw); + ab8500_usb_peri_phy_dis(ab); + atomic_notifier_call_chain(&ab->phy.notifier, + UX500_MUSB_CLEAN, &ab->vbus_draw); + ab->mode = USB_IDLE; + ab->phy.otg->default_a = false; + ab->vbus_draw = 0; + } + + if (is_ab8500_2p0(ab->ab8500)) { + if (ab->mode == USB_DEDICATED_CHG) { + ab8500_usb_wd_linkstatus(ab, + AB8500_BIT_PHY_CTRL_DEVICE_EN); + abx500_mask_and_set_register_interruptible(ab->dev, + AB8500_USB, AB8500_USB_PHY_CTRL_REG, + AB8500_BIT_PHY_CTRL_DEVICE_EN, 0); + } + } - ab8500_usb_link_status_update(ab); + return IRQ_HANDLED; } -static irqreturn_t ab8500_usb_v20_irq(int irq, void *data) +static irqreturn_t ab8500_usb_link_status_irq(int irq, void *data) { struct ab8500_usb *ab = (struct ab8500_usb *) data; - ab8500_usb_link_status_update(ab); + abx500_usb_link_status_update(ab); return IRQ_HANDLED; } +static void ab8500_usb_delayed_work(struct work_struct *work) +{ + struct ab8500_usb *ab = container_of(work, struct ab8500_usb, + dwork.work); + + abx500_usb_link_status_update(ab); +} + static void ab8500_usb_phy_disable_work(struct work_struct *work) { struct ab8500_usb *ab = container_of(work, struct ab8500_usb, @@ -250,7 +487,7 @@ static int ab8500_usb_set_power(struct usb_phy *phy, unsigned mA) if (mA) atomic_notifier_call_chain(&ab->phy.notifier, - USB_EVENT_ENUMERATED, ab->phy.otg->gadget); + UX500_MUSB_ENUMERATED, ab->phy.otg->gadget); return 0; } @@ -327,30 +564,48 @@ static int ab8500_usb_set_host(struct usb_otg *otg, struct usb_bus *host) return 0; } -static void ab8500_usb_irq_free(struct ab8500_usb *ab) -{ - free_irq(ab->irq_num_link_status, ab); -} - -static int ab8500_usb_v2_res_setup(struct platform_device *pdev, - struct ab8500_usb *ab) +static int ab8500_usb_irq_setup(struct platform_device *pdev, + struct ab8500_usb *ab) { int err; + int irq; - ab->irq_num_link_status = platform_get_irq_byname(pdev, - "USB_LINK_STATUS"); - if (ab->irq_num_link_status < 0) { + irq = platform_get_irq_byname(pdev, "USB_LINK_STATUS"); + if (irq < 0) { dev_err(&pdev->dev, "Link status irq not found\n"); - return ab->irq_num_link_status; + return irq; + } + err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + ab8500_usb_link_status_irq, + IRQF_NO_SUSPEND | IRQF_SHARED, "usb-link-status", ab); + if (err < 0) { + dev_err(ab->dev, "request_irq failed for link status irq\n"); + return err; } - err = request_threaded_irq(ab->irq_num_link_status, NULL, - ab8500_usb_v20_irq, - IRQF_NO_SUSPEND | IRQF_SHARED, - "usb-link-status", ab); + irq = platform_get_irq_byname(pdev, "ID_WAKEUP_F"); + if (irq < 0) { + dev_err(&pdev->dev, "ID fall irq not found\n"); + return irq; + } + err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + ab8500_usb_disconnect_irq, + IRQF_NO_SUSPEND | IRQF_SHARED, "usb-id-fall", ab); if (err < 0) { - dev_err(ab->dev, - "request_irq failed for link status irq\n"); + dev_err(ab->dev, "request_irq failed for ID fall irq\n"); + return err; + } + + irq = platform_get_irq_byname(pdev, "VBUS_DET_F"); + if (irq < 0) { + dev_err(&pdev->dev, "VBUS fall irq not found\n"); + return irq; + } + err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + ab8500_usb_disconnect_irq, + IRQF_NO_SUSPEND | IRQF_SHARED, "usb-vbus-fall", ab); + if (err < 0) { + dev_err(ab->dev, "request_irq failed for Vbus fall irq\n"); return err; } @@ -408,22 +663,23 @@ static int ab8500_usb_probe(struct platform_device *pdev) /* all: Disable phy when called from set_host and set_peripheral */ INIT_WORK(&ab->phy_dis_work, ab8500_usb_phy_disable_work); - err = ab8500_usb_v2_res_setup(pdev, ab); + err = ab8500_usb_irq_setup(pdev, ab); if (err < 0) - goto fail0; + goto fail; err = usb_add_phy(&ab->phy, USB_PHY_TYPE_USB2); if (err) { dev_err(&pdev->dev, "Can't register transceiver\n"); - goto fail1; + goto fail; } + /* Needed to enable ID detection. */ + ab8500_usb_wd_workaround(ab); + dev_info(&pdev->dev, "revision 0x%2x driver initialized\n", rev); return 0; -fail1: - ab8500_usb_irq_free(ab); -fail0: +fail: kfree(otg); kfree(ab); return err; @@ -433,8 +689,6 @@ static int ab8500_usb_remove(struct platform_device *pdev) { struct ab8500_usb *ab = platform_get_drvdata(pdev); - ab8500_usb_irq_free(ab); - cancel_delayed_work_sync(&ab->dwork); cancel_work_sync(&ab->phy_dis_work); diff --git a/include/linux/usb/musb-ux500.h b/include/linux/usb/musb-ux500.h new file mode 100644 index 000000000000..1e2c7130f6e1 --- /dev/null +++ b/include/linux/usb/musb-ux500.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2013 ST-Ericsson AB + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MUSB_UX500_H__ +#define __MUSB_UX500_H__ + +enum ux500_musb_vbus_id_status { + UX500_MUSB_NONE = 0, + UX500_MUSB_VBUS, + UX500_MUSB_ID, + UX500_MUSB_CHARGER, + UX500_MUSB_ENUMERATED, + UX500_MUSB_RIDA, + UX500_MUSB_RIDB, + UX500_MUSB_RIDC, + UX500_MUSB_PREPARE, + UX500_MUSB_CLEAN, +}; + +#endif /* __MUSB_UX500_H__ */ -- cgit From 70bc126471af30bb115e635512dcf6d86fe6e29a Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Wed, 6 Mar 2013 08:20:52 -0500 Subject: tty: Add safe tty throttle/unthrottle functions The tty driver can become stuck throttled due to race conditions between throttle and unthrottle, when the decision to throttle or unthrottle is conditional. The following example helps to illustrate the race: CPU 0 | CPU 1 | if (condition A) | | | if (!condition A) | unthrottle() throttle() | | Note the converse is also possible; ie., CPU 0 | CPU 1 | | if (!condition A) | if (condition A) | throttle() | | unthrottle() | Add new throttle/unthrottle functions based on the familiar model of task state and schedule/wake. For example, while (1) { tty_set_flow_change(tty, TTY_THROTTLE_SAFE); if (!condition) break; if (!tty_throttle_safe(tty)) break; } __tty_set_flow_change(tty, 0); In this example, if an unthrottle occurs after the condition is evaluated but before tty_throttle_safe(), then tty_throttle_safe() will return non-zero, looping and forcing the re-evaluation of condition. Reported-by: Vincent Pillet Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/tty.h | 18 ++++++++++++++ 2 files changed, 82 insertions(+) (limited to 'include/linux') diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index d58b92cc187c..132d452578bb 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -106,6 +106,7 @@ void tty_throttle(struct tty_struct *tty) if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && tty->ops->throttle) tty->ops->throttle(tty); + tty->flow_change = 0; mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_throttle); @@ -129,10 +130,73 @@ void tty_unthrottle(struct tty_struct *tty) if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && tty->ops->unthrottle) tty->ops->unthrottle(tty); + tty->flow_change = 0; mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_unthrottle); +/** + * tty_throttle_safe - flow control + * @tty: terminal + * + * Similar to tty_throttle() but will only attempt throttle + * if tty->flow_change is TTY_THROTTLE_SAFE. Prevents an accidental + * throttle due to race conditions when throttling is conditional + * on factors evaluated prior to throttling. + * + * Returns 0 if tty is throttled (or was already throttled) + */ + +int tty_throttle_safe(struct tty_struct *tty) +{ + int ret = 0; + + mutex_lock(&tty->termios_mutex); + if (!test_bit(TTY_THROTTLED, &tty->flags)) { + if (tty->flow_change != TTY_THROTTLE_SAFE) + ret = 1; + else { + __set_bit(TTY_THROTTLED, &tty->flags); + if (tty->ops->throttle) + tty->ops->throttle(tty); + } + } + mutex_unlock(&tty->termios_mutex); + + return ret; +} + +/** + * tty_unthrottle_safe - flow control + * @tty: terminal + * + * Similar to tty_unthrottle() but will only attempt unthrottle + * if tty->flow_change is TTY_UNTHROTTLE_SAFE. Prevents an accidental + * unthrottle due to race conditions when unthrottling is conditional + * on factors evaluated prior to unthrottling. + * + * Returns 0 if tty is unthrottled (or was already unthrottled) + */ + +int tty_unthrottle_safe(struct tty_struct *tty) +{ + int ret = 0; + + mutex_lock(&tty->termios_mutex); + if (test_bit(TTY_THROTTLED, &tty->flags)) { + if (tty->flow_change != TTY_UNTHROTTLE_SAFE) + ret = 1; + else { + __clear_bit(TTY_THROTTLED, &tty->flags); + if (tty->ops->unthrottle) + tty->ops->unthrottle(tty); + } + } + mutex_unlock(&tty->termios_mutex); + + return ret; +} + /** * tty_wait_until_sent - wait for I/O to finish * @tty: tty we are waiting for diff --git a/include/linux/tty.h b/include/linux/tty.h index c75d886b0307..189ca80494d1 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -258,6 +258,7 @@ struct tty_struct { unsigned char warned:1; unsigned char ctrl_status; /* ctrl_lock */ unsigned int receive_room; /* Bytes free for queue */ + int flow_change; struct tty_struct *link; struct fasync_struct *fasync; @@ -318,6 +319,21 @@ struct tty_file_private { #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) +/* Values for tty->flow_change */ +#define TTY_THROTTLE_SAFE 1 +#define TTY_UNTHROTTLE_SAFE 2 + +static inline void __tty_set_flow_change(struct tty_struct *tty, int val) +{ + tty->flow_change = val; +} + +static inline void tty_set_flow_change(struct tty_struct *tty, int val) +{ + tty->flow_change = val; + smp_mb(); +} + #ifdef CONFIG_TTY extern void console_init(void); extern void tty_kref_put(struct tty_struct *tty); @@ -400,6 +416,8 @@ extern int tty_write_room(struct tty_struct *tty); extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); +extern int tty_throttle_safe(struct tty_struct *tty); +extern int tty_unthrottle_safe(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty); -- cgit From 6be06e7273c4682a15ca1f4adf1aeae510823530 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Wed, 6 Mar 2013 08:38:21 -0500 Subject: tty: Fix checkpatch errors in tty_ldisc.h Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_ldisc.h | 132 +++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 455a0d7bf220..58390c73df8b 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -9,89 +9,89 @@ * * int (*open)(struct tty_struct *); * - * This function is called when the line discipline is associated - * with the tty. The line discipline can use this as an - * opportunity to initialize any state needed by the ldisc routines. - * + * This function is called when the line discipline is associated + * with the tty. The line discipline can use this as an + * opportunity to initialize any state needed by the ldisc routines. + * * void (*close)(struct tty_struct *); * * This function is called when the line discipline is being - * shutdown, either because the tty is being closed or because - * the tty is being changed to use a new line discipline - * + * shutdown, either because the tty is being closed or because + * the tty is being changed to use a new line discipline + * * void (*flush_buffer)(struct tty_struct *tty); * - * This function instructs the line discipline to clear its - * buffers of any input characters it may have queued to be - * delivered to the user mode process. - * + * This function instructs the line discipline to clear its + * buffers of any input characters it may have queued to be + * delivered to the user mode process. + * * ssize_t (*chars_in_buffer)(struct tty_struct *tty); * - * This function returns the number of input characters the line + * This function returns the number of input characters the line * discipline may have queued up to be delivered to the user mode * process. - * + * * ssize_t (*read)(struct tty_struct * tty, struct file * file, * unsigned char * buf, size_t nr); * - * This function is called when the user requests to read from - * the tty. The line discipline will return whatever characters - * it has buffered up for the user. If this function is not - * defined, the user will receive an EIO error. - * + * This function is called when the user requests to read from + * the tty. The line discipline will return whatever characters + * it has buffered up for the user. If this function is not + * defined, the user will receive an EIO error. + * * ssize_t (*write)(struct tty_struct * tty, struct file * file, - * const unsigned char * buf, size_t nr); - * - * This function is called when the user requests to write to the - * tty. The line discipline will deliver the characters to the - * low-level tty device for transmission, optionally performing - * some processing on the characters first. If this function is - * not defined, the user will receive an EIO error. - * + * const unsigned char * buf, size_t nr); + * + * This function is called when the user requests to write to the + * tty. The line discipline will deliver the characters to the + * low-level tty device for transmission, optionally performing + * some processing on the characters first. If this function is + * not defined, the user will receive an EIO error. + * * int (*ioctl)(struct tty_struct * tty, struct file * file, - * unsigned int cmd, unsigned long arg); + * unsigned int cmd, unsigned long arg); * * This function is called when the user requests an ioctl which - * is not handled by the tty layer or the low-level tty driver. - * It is intended for ioctls which affect line discpline - * operation. Note that the search order for ioctls is (1) tty - * layer, (2) tty low-level driver, (3) line discpline. So a - * low-level driver can "grab" an ioctl request before the line - * discpline has a chance to see it. - * + * is not handled by the tty layer or the low-level tty driver. + * It is intended for ioctls which affect line discpline + * operation. Note that the search order for ioctls is (1) tty + * layer, (2) tty low-level driver, (3) line discpline. So a + * low-level driver can "grab" an ioctl request before the line + * discpline has a chance to see it. + * * long (*compat_ioctl)(struct tty_struct * tty, struct file * file, - * unsigned int cmd, unsigned long arg); + * unsigned int cmd, unsigned long arg); * - * Process ioctl calls from 32-bit process on 64-bit system + * Process ioctl calls from 32-bit process on 64-bit system * * void (*set_termios)(struct tty_struct *tty, struct ktermios * old); * - * This function notifies the line discpline that a change has - * been made to the termios structure. - * + * This function notifies the line discpline that a change has + * been made to the termios structure. + * * int (*poll)(struct tty_struct * tty, struct file * file, - * poll_table *wait); + * poll_table *wait); * - * This function is called when a user attempts to select/poll on a - * tty device. It is solely the responsibility of the line - * discipline to handle poll requests. + * This function is called when a user attempts to select/poll on a + * tty device. It is solely the responsibility of the line + * discipline to handle poll requests. * * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, - * char *fp, int count); - * - * This function is called by the low-level tty driver to send - * characters received by the hardware to the line discpline for - * processing. is a pointer to the buffer of input - * character received by the device. is a pointer to a - * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. - * + * char *fp, int count); + * + * This function is called by the low-level tty driver to send + * characters received by the hardware to the line discpline for + * processing. is a pointer to the buffer of input + * character received by the device. is a pointer to a + * pointer of flag bytes which indicate whether a character was + * received with a parity error, etc. + * * void (*write_wakeup)(struct tty_struct *); * - * This function is called by the low-level tty driver to signal - * that line discpline should try to send more characters to the - * low-level driver for transmission. If the line discpline does - * not have any more data to send, it can just return. + * This function is called by the low-level tty driver to signal + * that line discpline should try to send more characters to the + * low-level driver for transmission. If the line discpline does + * not have any more data to send, it can just return. * * int (*hangup)(struct tty_struct *) * @@ -115,7 +115,7 @@ struct tty_ldisc_ops { char *name; int num; int flags; - + /* * The following routines are called from above. */ @@ -123,19 +123,19 @@ struct tty_ldisc_ops { void (*close)(struct tty_struct *); void (*flush_buffer)(struct tty_struct *tty); ssize_t (*chars_in_buffer)(struct tty_struct *tty); - ssize_t (*read)(struct tty_struct * tty, struct file * file, - unsigned char __user * buf, size_t nr); - ssize_t (*write)(struct tty_struct * tty, struct file * file, - const unsigned char * buf, size_t nr); - int (*ioctl)(struct tty_struct * tty, struct file * file, + ssize_t (*read)(struct tty_struct *tty, struct file *file, + unsigned char __user *buf, size_t nr); + ssize_t (*write)(struct tty_struct *tty, struct file *file, + const unsigned char *buf, size_t nr); + int (*ioctl)(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); - long (*compat_ioctl)(struct tty_struct * tty, struct file * file, + long (*compat_ioctl)(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); - void (*set_termios)(struct tty_struct *tty, struct ktermios * old); + void (*set_termios)(struct tty_struct *tty, struct ktermios *old); unsigned int (*poll)(struct tty_struct *, struct file *, struct poll_table_struct *); int (*hangup)(struct tty_struct *tty); - + /* * The following routines are called from below. */ @@ -145,7 +145,7 @@ struct tty_ldisc_ops { void (*dcd_change)(struct tty_struct *, unsigned int); struct module *owner; - + int refcount; }; -- cgit From 6865ff222ccab371c04afce17aec1f7d70b17dbc Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 7 Mar 2013 13:12:27 +0100 Subject: TTY: do not warn about setting speed via SPD_* The warning is there since 2.1.69 and we have not seen anybody reporting it in the past decade. Remove the warning now. tty_get_baud_rate can now be inline. This gives us one less EXPORT_SYMBOL. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 28 ---------------------------- include/linux/tty.h | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 132d452578bb..28715e48b2f7 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -478,34 +478,6 @@ void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud) } EXPORT_SYMBOL_GPL(tty_encode_baud_rate); -/** - * tty_get_baud_rate - get tty bit rates - * @tty: tty to query - * - * Returns the baud rate as an integer for this terminal. The - * termios lock must be held by the caller and the terminal bit - * flags may be updated. - * - * Locking: none - */ - -speed_t tty_get_baud_rate(struct tty_struct *tty) -{ - speed_t baud = tty_termios_baud_rate(&tty->termios); - - if (baud == 38400 && tty->alt_speed) { - if (!tty->warned) { - printk(KERN_WARNING "Use of setserial/setrocket to " - "set SPD_* flags is deprecated\n"); - tty->warned = 1; - } - baud = tty->alt_speed; - } - - return baud; -} -EXPORT_SYMBOL(tty_get_baud_rate); - /** * tty_termios_copy_hw - copy hardware settings * @new: New termios diff --git a/include/linux/tty.h b/include/linux/tty.h index 189ca80494d1..63b62865c8e9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -255,7 +255,6 @@ struct tty_struct { int count; struct winsize winsize; /* termios mutex */ unsigned char stopped:1, hw_stopped:1, flow_stopped:1, packet:1; - unsigned char warned:1; unsigned char ctrl_status; /* ctrl_lock */ unsigned int receive_room; /* Bytes free for queue */ int flow_change; @@ -437,13 +436,28 @@ extern void tty_flush_to_ldisc(struct tty_struct *tty); extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty); extern void tty_buffer_init(struct tty_port *port); -extern speed_t tty_get_baud_rate(struct tty_struct *tty); extern speed_t tty_termios_baud_rate(struct ktermios *termios); extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); extern void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); + +/** + * tty_get_baud_rate - get tty bit rates + * @tty: tty to query + * + * Returns the baud rate as an integer for this terminal. The + * termios lock must be held by the caller and the terminal bit + * flags may be updated. + * + * Locking: none + */ +static inline speed_t tty_get_baud_rate(struct tty_struct *tty) +{ + return tty_termios_baud_rate(&tty->termios); +} + extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); -- cgit From 6aad04f21374633bd8cecf25024553d1e11a9522 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 7 Mar 2013 13:12:29 +0100 Subject: TTY: add tty_port_tty_wakeup helper It allows for cleaning up on a considerable amount of places. They did port_get, wakeup, kref_put. Now the only thing needed is to call tty_port_tty_wakeup which does exactly that. One exception is ifx6x60 where tty_wakeup was open-coded. We now call tty_wakeup properly there. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- arch/um/drivers/line.c | 8 +------- drivers/isdn/capi/capi.c | 7 +------ drivers/isdn/gigaset/interface.c | 6 +----- drivers/net/usb/hso.c | 13 ++----------- drivers/s390/char/sclp_tty.c | 9 ++------- drivers/s390/char/sclp_vt220.c | 8 +------- drivers/staging/fwserial/fwserial.c | 10 ++-------- drivers/staging/serqt_usb2/serqt_usb2.c | 7 +------ drivers/tty/ehv_bytechan.c | 6 +----- drivers/tty/hvc/hvsi.c | 7 +------ drivers/tty/nozomi.c | 6 +----- drivers/tty/serial/ifx6x60.c | 33 ++------------------------------- drivers/tty/tty_port.c | 16 ++++++++++++++++ drivers/usb/class/cdc-acm.c | 7 +------ drivers/usb/serial/digi_acceleport.c | 17 +++-------------- drivers/usb/serial/io_edgeport.c | 28 +++++----------------------- drivers/usb/serial/keyspan_pda.c | 6 ++---- drivers/usb/serial/mos7720.c | 8 ++------ drivers/usb/serial/mos7840.c | 7 ++----- drivers/usb/serial/ti_usb_3410_5052.c | 7 ++----- drivers/usb/serial/usb-serial.c | 10 +--------- include/linux/tty.h | 1 + 22 files changed, 51 insertions(+), 176 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index f1b38571f94e..cc206eda245c 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -248,7 +248,6 @@ static irqreturn_t line_write_interrupt(int irq, void *data) { struct chan *chan = data; struct line *line = chan->line; - struct tty_struct *tty; int err; /* @@ -267,12 +266,7 @@ static irqreturn_t line_write_interrupt(int irq, void *data) } spin_unlock(&line->lock); - tty = tty_port_tty_get(&line->port); - if (tty == NULL) - return IRQ_NONE; - - tty_wakeup(tty); - tty_kref_put(tty); + tty_port_tty_wakeup(&line->port); return IRQ_HANDLED; } diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 89562a845f6a..ac6f72b455d1 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -569,7 +569,6 @@ static void capi_recv_message(struct capi20_appl *ap, struct sk_buff *skb) { struct capidev *cdev = ap->private; #ifdef CONFIG_ISDN_CAPI_MIDDLEWARE - struct tty_struct *tty; struct capiminor *mp; u16 datahandle; struct capincci *np; @@ -627,11 +626,7 @@ static void capi_recv_message(struct capi20_appl *ap, struct sk_buff *skb) CAPIMSG_U16(skb->data, CAPIMSG_BASELEN + 4 + 2)); kfree_skb(skb); capiminor_del_ack(mp, datahandle); - tty = tty_port_tty_get(&mp->port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } + tty_port_tty_wakeup(&mp->port); handle_minor_send(mp); } else { diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c index e2b539675b66..600c79b030cd 100644 --- a/drivers/isdn/gigaset/interface.c +++ b/drivers/isdn/gigaset/interface.c @@ -487,12 +487,8 @@ static const struct tty_operations if_ops = { static void if_wake(unsigned long data) { struct cardstate *cs = (struct cardstate *)data; - struct tty_struct *tty = tty_port_tty_get(&cs->port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } + tty_port_tty_wakeup(&cs->port); } /*** interface to common ***/ diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index e2dd3249b6bd..a7714b4f29ad 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1925,7 +1925,6 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) { struct hso_serial *serial = urb->context; int status = urb->status; - struct tty_struct *tty; /* sanity check */ if (!serial) { @@ -1941,11 +1940,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) return; } hso_put_activity(serial->parent); - tty = tty_port_tty_get(&serial->port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } + tty_port_tty_wakeup(&serial->port); hso_kick_transmit(serial); D1(" "); @@ -2008,12 +2003,8 @@ static void ctrl_callback(struct urb *urb) put_rxbuf_data_and_resubmit_ctrl_urb(serial); spin_unlock(&serial->serial_lock); } else { - struct tty_struct *tty = tty_port_tty_get(&serial->port); hso_put_activity(serial->parent); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } + tty_port_tty_wakeup(&serial->port); /* response to a write command */ hso_kick_transmit(serial); } diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c index 14b4cb8abcc8..7ed7a5987816 100644 --- a/drivers/s390/char/sclp_tty.c +++ b/drivers/s390/char/sclp_tty.c @@ -107,7 +107,6 @@ sclp_tty_write_room (struct tty_struct *tty) static void sclp_ttybuf_callback(struct sclp_buffer *buffer, int rc) { - struct tty_struct *tty; unsigned long flags; void *page; @@ -125,12 +124,8 @@ sclp_ttybuf_callback(struct sclp_buffer *buffer, int rc) struct sclp_buffer, list); spin_unlock_irqrestore(&sclp_tty_lock, flags); } while (buffer && sclp_emit_buffer(buffer, sclp_ttybuf_callback)); - /* check if the tty needs a wake up call */ - tty = tty_port_tty_get(&sclp_port); - if (tty != NULL) { - tty_wakeup(tty); - tty_kref_put(tty); - } + + tty_port_tty_wakeup(&sclp_port); } static inline void diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index 6c92f62623be..5aaaa2ec8df4 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -114,7 +114,6 @@ static struct sclp_register sclp_vt220_register = { static void sclp_vt220_process_queue(struct sclp_vt220_request *request) { - struct tty_struct *tty; unsigned long flags; void *page; @@ -139,12 +138,7 @@ sclp_vt220_process_queue(struct sclp_vt220_request *request) } while (__sclp_vt220_emit(request)); if (request == NULL && sclp_vt220_flush_later) sclp_vt220_emit_current(); - /* Check if the tty needs a wake up call */ - tty = tty_port_tty_get(&sclp_vt220_port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } + tty_port_tty_wakeup(&sclp_vt220_port); } #define SCLP_BUFFER_MAX_RETRY 1 diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c index 5a6fb44f38a8..5c64e3a35b28 100644 --- a/drivers/staging/fwserial/fwserial.c +++ b/drivers/staging/fwserial/fwserial.c @@ -744,7 +744,6 @@ static void fwtty_tx_complete(struct fw_card *card, int rcode, struct fwtty_transaction *txn) { struct fwtty_port *port = txn->port; - struct tty_struct *tty; int len; fwtty_dbg(port, "rcode: %d", rcode); @@ -769,13 +768,8 @@ static void fwtty_tx_complete(struct fw_card *card, int rcode, port->stats.dropped += txn->dma_pended.len; } - if (len < WAKEUP_CHARS) { - tty = tty_port_tty_get(&port->port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } - } + if (len < WAKEUP_CHARS) + tty_port_tty_wakeup(&port->port); } static int fwtty_tx(struct fwtty_port *port, bool drain) diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c index b1bb1a6abe81..8a6e5ea476e1 100644 --- a/drivers/staging/serqt_usb2/serqt_usb2.c +++ b/drivers/staging/serqt_usb2/serqt_usb2.c @@ -264,7 +264,6 @@ static void ProcessRxChar(struct usb_serial_port *port, unsigned char data) static void qt_write_bulk_callback(struct urb *urb) { - struct tty_struct *tty; int status; struct quatech_port *quatech_port; @@ -278,11 +277,7 @@ static void qt_write_bulk_callback(struct urb *urb) quatech_port = urb->context; - tty = tty_port_tty_get(&quatech_port->port->port); - - if (tty) - tty_wakeup(tty); - tty_kref_put(tty); + tty_port_tty_wakeup(&quatech_port->port->port); } static void qt_interrupt_callback(struct urb *urb) diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index ed92622b8949..6d0c27cd03da 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -472,13 +472,9 @@ static void ehv_bc_tx_dequeue(struct ehv_bc_data *bc) static irqreturn_t ehv_bc_tty_tx_isr(int irq, void *data) { struct ehv_bc_data *bc = data; - struct tty_struct *ttys = tty_port_tty_get(&bc->port); ehv_bc_tx_dequeue(bc); - if (ttys) { - tty_wakeup(ttys); - tty_kref_put(ttys); - } + tty_port_tty_wakeup(&bc->port); return IRQ_HANDLED; } diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c index ef95a154854a..41901997c0d6 100644 --- a/drivers/tty/hvc/hvsi.c +++ b/drivers/tty/hvc/hvsi.c @@ -861,7 +861,6 @@ static void hvsi_write_worker(struct work_struct *work) { struct hvsi_struct *hp = container_of(work, struct hvsi_struct, writer.work); - struct tty_struct *tty; unsigned long flags; #ifdef DEBUG static long start_j = 0; @@ -895,11 +894,7 @@ static void hvsi_write_worker(struct work_struct *work) start_j = 0; #endif /* DEBUG */ wake_up_all(&hp->emptyq); - tty = tty_port_tty_get(&hp->port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } + tty_port_tty_wakeup(&hp->port); } out: diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c index 2dff19796157..2e5bbdc09e1c 100644 --- a/drivers/tty/nozomi.c +++ b/drivers/tty/nozomi.c @@ -791,7 +791,6 @@ static int send_data(enum port_type index, struct nozomi *dc) const u8 toggle = port->toggle_ul; void __iomem *addr = port->ul_addr[toggle]; const u32 ul_size = port->ul_size[toggle]; - struct tty_struct *tty = tty_port_tty_get(&port->port); /* Get data from tty and place in buf for now */ size = kfifo_out(&port->fifo_ul, dc->send_buf, @@ -799,7 +798,6 @@ static int send_data(enum port_type index, struct nozomi *dc) if (size == 0) { DBG4("No more data to send, disable link:"); - tty_kref_put(tty); return 0; } @@ -809,10 +807,8 @@ static int send_data(enum port_type index, struct nozomi *dc) write_mem32(addr, (u32 *) &size, 4); write_mem32(addr + 4, (u32 *) dc->send_buf, size); - if (tty) - tty_wakeup(tty); + tty_port_tty_wakeup(&port->port); - tty_kref_put(tty); return 1; } diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c index 68d7ce997ede..d723d4193b90 100644 --- a/drivers/tty/serial/ifx6x60.c +++ b/drivers/tty/serial/ifx6x60.c @@ -442,25 +442,6 @@ static void ifx_spi_setup_spi_header(unsigned char *txbuffer, int tx_count, txbuffer[1] |= (more << IFX_SPI_MORE_BIT) & IFX_SPI_MORE_MASK; } -/** - * ifx_spi_wakeup_serial - SPI space made - * @port_data: our SPI device - * - * We have emptied the FIFO enough that we want to get more data - * queued into it. Poke the line discipline via tty_wakeup so that - * it will feed us more bits - */ -static void ifx_spi_wakeup_serial(struct ifx_spi_device *ifx_dev) -{ - struct tty_struct *tty; - - tty = tty_port_tty_get(&ifx_dev->tty_port); - if (!tty) - return; - tty_wakeup(tty); - tty_kref_put(tty); -} - /** * ifx_spi_prepare_tx_buffer - prepare transmit frame * @ifx_dev: our SPI device @@ -506,7 +487,7 @@ static int ifx_spi_prepare_tx_buffer(struct ifx_spi_device *ifx_dev) tx_count += temp_count; if (temp_count == queue_length) /* poke port to get more data */ - ifx_spi_wakeup_serial(ifx_dev); + tty_port_tty_wakeup(&ifx_dev->tty_port); else /* more data in port, use next SPI message */ ifx_dev->spi_more = 1; } @@ -683,8 +664,6 @@ static void ifx_spi_insert_flip_string(struct ifx_spi_device *ifx_dev, static void ifx_spi_complete(void *ctx) { struct ifx_spi_device *ifx_dev = ctx; - struct tty_struct *tty; - struct tty_ldisc *ldisc = NULL; int length; int actual_length; unsigned char more; @@ -762,15 +741,7 @@ complete_exit: */ ifx_spi_power_state_clear(ifx_dev, IFX_SPI_POWER_DATA_PENDING); - tty = tty_port_tty_get(&ifx_dev->tty_port); - if (tty) { - ldisc = tty_ldisc_ref(tty); - if (ldisc) { - ldisc->ops->write_wakeup(tty); - tty_ldisc_deref(ldisc); - } - tty_kref_put(tty); - } + tty_port_tty_wakeup(&ifx_dev->tty_port); } } } diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index b7ff59d3db88..8bb757c62ee2 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -232,6 +232,22 @@ void tty_port_hangup(struct tty_port *port) } EXPORT_SYMBOL(tty_port_hangup); +/** + * tty_port_tty_wakeup - helper to wake up a tty + * + * @port: tty port + */ +void tty_port_tty_wakeup(struct tty_port *port) +{ + struct tty_struct *tty = tty_port_tty_get(port); + + if (tty) { + tty_wakeup(tty); + tty_kref_put(tty); + } +} +EXPORT_SYMBOL_GPL(tty_port_tty_wakeup); + /** * tty_port_carrier_raised - carrier raised check * @port: tty port diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 8ac25adf31b4..755766e4b756 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -475,15 +475,10 @@ static void acm_write_bulk(struct urb *urb) static void acm_softint(struct work_struct *work) { struct acm *acm = container_of(work, struct acm, work); - struct tty_struct *tty; dev_vdbg(&acm->data->dev, "%s\n", __func__); - tty = tty_port_tty_get(&acm->port); - if (!tty) - return; - tty_wakeup(tty); - tty_kref_put(tty); + tty_port_tty_wakeup(&acm->port); } /* diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index ebe45fa0ed50..31191581060c 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -210,7 +210,6 @@ struct digi_port { /* Local Function Declarations */ -static void digi_wakeup_write(struct usb_serial_port *port); static void digi_wakeup_write_lock(struct work_struct *work); static int digi_write_oob_command(struct usb_serial_port *port, unsigned char *buf, int count, int interruptible); @@ -374,20 +373,10 @@ static void digi_wakeup_write_lock(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&priv->dp_port_lock, flags); - digi_wakeup_write(port); + tty_port_tty_wakeup(&port->port); spin_unlock_irqrestore(&priv->dp_port_lock, flags); } -static void digi_wakeup_write(struct usb_serial_port *port) -{ - struct tty_struct *tty = tty_port_tty_get(&port->port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } -} - - /* * Digi Write OOB Command * @@ -1044,7 +1033,7 @@ static void digi_write_bulk_callback(struct urb *urb) } } /* wake up processes sleeping on writes immediately */ - digi_wakeup_write(port); + tty_port_tty_wakeup(&port->port); /* also queue up a wakeup at scheduler time, in case we */ /* lost the race in write_chan(). */ schedule_work(&priv->dp_wakeup_work); @@ -1522,7 +1511,7 @@ static int digi_read_oob_callback(struct urb *urb) /* port must be open to use tty struct */ if (rts) { tty->hw_stopped = 0; - digi_wakeup_write(port); + tty_port_tty_wakeup(&port->port); } } else { priv->dp_modem_signals &= ~TIOCM_CTS; diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index b00e5cbf741f..44e5208f7c61 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -565,7 +565,6 @@ static void edge_interrupt_callback(struct urb *urb) struct device *dev; struct edgeport_port *edge_port; struct usb_serial_port *port; - struct tty_struct *tty; unsigned char *data = urb->transfer_buffer; int length = urb->actual_length; int bytes_avail; @@ -644,12 +643,7 @@ static void edge_interrupt_callback(struct urb *urb) /* tell the tty driver that something has changed */ - tty = tty_port_tty_get( - &edge_port->port->port); - if (tty) { - tty_wakeup(tty); - tty_kref_put(tty); - } + tty_port_tty_wakeup(&edge_port->port->port); /* Since we have more credit, check if more data can be sent */ send_more_port_data(edge_serial, @@ -738,7 +732,6 @@ static void edge_bulk_in_callback(struct urb *urb) static void edge_bulk_out_data_callback(struct urb *urb) { struct edgeport_port *edge_port = urb->context; - struct tty_struct *tty; int status = urb->status; if (status) { @@ -747,14 +740,8 @@ static void edge_bulk_out_data_callback(struct urb *urb) __func__, status); } - tty = tty_port_tty_get(&edge_port->port->port); - - if (tty && edge_port->open) { - /* let the tty driver wakeup if it has a special - write_wakeup function */ - tty_wakeup(tty); - } - tty_kref_put(tty); + if (edge_port->open) + tty_port_tty_wakeup(&edge_port->port->port); /* Release the Write URB */ edge_port->write_in_progress = false; @@ -773,7 +760,6 @@ static void edge_bulk_out_data_callback(struct urb *urb) static void edge_bulk_out_cmd_callback(struct urb *urb) { struct edgeport_port *edge_port = urb->context; - struct tty_struct *tty; int status = urb->status; atomic_dec(&CmdUrbs); @@ -794,13 +780,9 @@ static void edge_bulk_out_cmd_callback(struct urb *urb) return; } - /* Get pointer to tty */ - tty = tty_port_tty_get(&edge_port->port->port); - /* tell the tty driver that something has changed */ - if (tty && edge_port->open) - tty_wakeup(tty); - tty_kref_put(tty); + if (edge_port->open) + tty_port_tty_wakeup(&edge_port->port->port); /* we have completed the command */ edge_port->commandPending = false; diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index 3b17d5d13dc8..2230223978ca 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -104,10 +104,8 @@ static void keyspan_pda_wakeup_write(struct work_struct *work) struct keyspan_pda_private *priv = container_of(work, struct keyspan_pda_private, wakeup_work); struct usb_serial_port *port = priv->port; - struct tty_struct *tty = tty_port_tty_get(&port->port); - if (tty) - tty_wakeup(tty); - tty_kref_put(tty); + + tty_port_tty_wakeup(&port->port); } static void keyspan_pda_request_unthrottle(struct work_struct *work) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index e0ebec3b5d6a..e956eae198fd 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -932,7 +932,6 @@ static void mos7720_bulk_in_callback(struct urb *urb) static void mos7720_bulk_out_data_callback(struct urb *urb) { struct moschip_port *mos7720_port; - struct tty_struct *tty; int status = urb->status; if (status) { @@ -946,11 +945,8 @@ static void mos7720_bulk_out_data_callback(struct urb *urb) return ; } - tty = tty_port_tty_get(&mos7720_port->port->port); - - if (tty && mos7720_port->open) - tty_wakeup(tty); - tty_kref_put(tty); + if (mos7720_port->open) + tty_port_tty_wakeup(&mos7720_port->port->port); } /* diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 809fb329eca5..08284d28e84b 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -814,7 +814,6 @@ static void mos7840_bulk_out_data_callback(struct urb *urb) { struct moschip_port *mos7840_port; struct usb_serial_port *port; - struct tty_struct *tty; int status = urb->status; int i; @@ -837,10 +836,8 @@ static void mos7840_bulk_out_data_callback(struct urb *urb) if (mos7840_port_paranoia_check(port, __func__)) return; - tty = tty_port_tty_get(&port->port); - if (tty && mos7840_port->open) - tty_wakeup(tty); - tty_kref_put(tty); + if (mos7840_port->open) + tty_port_tty_wakeup(&port->port); } diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 39cb9b807c3c..437f2d579cde 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -1227,7 +1227,6 @@ static void ti_send(struct ti_port *tport) { int count, result; struct usb_serial_port *port = tport->tp_port; - struct tty_struct *tty = tty_port_tty_get(&port->port); /* FIXME */ unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); @@ -1268,14 +1267,12 @@ static void ti_send(struct ti_port *tport) } /* more room in the buffer for new writes, wakeup */ - if (tty) - tty_wakeup(tty); - tty_kref_put(tty); + tty_port_tty_wakeup(&port->port); + wake_up_interruptible(&tport->tp_write_wait); return; unlock: spin_unlock_irqrestore(&tport->tp_lock, flags); - tty_kref_put(tty); return; } diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index a19ed74d770d..2df84845bafb 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -541,16 +541,8 @@ static void usb_serial_port_work(struct work_struct *work) { struct usb_serial_port *port = container_of(work, struct usb_serial_port, work); - struct tty_struct *tty; - tty = tty_port_tty_get(&port->port); - if (!tty) - return; - - dev_dbg(tty->dev, "%s - port %d\n", __func__, port->number); - - tty_wakeup(tty); - tty_kref_put(tty); + tty_port_tty_wakeup(&port->port); } static void kill_traffic(struct usb_serial_port *port) diff --git a/include/linux/tty.h b/include/linux/tty.h index 63b62865c8e9..b6e890a87eb1 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -534,6 +534,7 @@ extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); +extern void tty_port_tty_wakeup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_close_start(struct tty_port *port, -- cgit From aa27a094e2c2e0cc59914e56113b860f524f4479 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 7 Mar 2013 13:12:30 +0100 Subject: TTY: add tty_port_tty_hangup helper It allows for cleaning up on a considerable amount of places. They did port_get, hangup, kref_put. Now the only thing needed is to call tty_port_tty_hangup which does exactly that. And they can also decide whether to consider CLOCAL or completely ignore that. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- arch/um/drivers/chan_kern.c | 6 +----- drivers/mmc/card/sdio_uart.c | 13 ++--------- drivers/net/usb/hso.c | 7 +----- drivers/tty/cyclades.c | 10 ++------- drivers/tty/moxa.c | 19 ++++++---------- drivers/tty/n_gsm.c | 6 +----- drivers/tty/nozomi.c | 9 +++----- drivers/tty/rocket.c | 7 +----- drivers/tty/serial/ifx6x60.c | 21 ++---------------- drivers/tty/tty_port.c | 17 +++++++++++++++ drivers/usb/class/cdc-acm.c | 24 ++++++--------------- drivers/usb/serial/keyspan.c | 43 +++++++++---------------------------- drivers/usb/serial/option.c | 9 ++------ drivers/usb/serial/sierra.c | 8 ++----- include/linux/tty.h | 1 + net/irda/ircomm/ircomm_tty_attach.c | 6 +----- 16 files changed, 58 insertions(+), 148 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 15c553c239a1..bf42825ba54f 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -568,11 +568,7 @@ void chan_interrupt(struct line *line, int irq) reactivate_fd(chan->fd, irq); if (err == -EIO) { if (chan->primary) { - struct tty_struct *tty = tty_port_tty_get(&line->port); - if (tty != NULL) { - tty_hangup(tty); - tty_kref_put(tty); - } + tty_port_tty_hangup(&line->port, false); if (line->chan_out != chan) close_one_chan(line->chan_out, 1); } diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c index c931dfe6a59c..f093cea0d060 100644 --- a/drivers/mmc/card/sdio_uart.c +++ b/drivers/mmc/card/sdio_uart.c @@ -134,7 +134,6 @@ static void sdio_uart_port_put(struct sdio_uart_port *port) static void sdio_uart_port_remove(struct sdio_uart_port *port) { struct sdio_func *func; - struct tty_struct *tty; BUG_ON(sdio_uart_table[port->index] != port); @@ -155,12 +154,8 @@ static void sdio_uart_port_remove(struct sdio_uart_port *port) sdio_claim_host(func); port->func = NULL; mutex_unlock(&port->func_lock); - tty = tty_port_tty_get(&port->port); /* tty_hangup is async so is this safe as is ?? */ - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } + tty_port_tty_hangup(&port->port, false); mutex_unlock(&port->port.mutex); sdio_release_irq(func); sdio_disable_func(func); @@ -492,11 +487,7 @@ static void sdio_uart_check_modem_status(struct sdio_uart_port *port) wake_up_interruptible(&port->port.open_wait); else { /* DCD drop - hang up if tty attached */ - tty = tty_port_tty_get(&port->port); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } + tty_port_tty_hangup(&port->port, false); } } if (status & UART_MSR_DCTS) { diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index a7714b4f29ad..cba1d46e672e 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -3124,18 +3124,13 @@ static void hso_serial_ref_free(struct kref *ref) static void hso_free_interface(struct usb_interface *interface) { struct hso_serial *hso_dev; - struct tty_struct *tty; int i; for (i = 0; i < HSO_SERIAL_TTY_MINORS; i++) { if (serial_table[i] && (serial_table[i]->interface == interface)) { hso_dev = dev2ser(serial_table[i]); - tty = tty_port_tty_get(&hso_dev->port); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } + tty_port_tty_hangup(&hso_dev->port, false); mutex_lock(&hso_dev->parent->mutex); hso_dev->parent->usb_gone = 1; mutex_unlock(&hso_dev->parent->mutex); diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c index 345bd0e0884e..33f83fee9fae 100644 --- a/drivers/tty/cyclades.c +++ b/drivers/tty/cyclades.c @@ -1124,14 +1124,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) readl(&info->u.cyz.ch_ctrl->rs_status); if (dcd & C_RS_DCD) wake_up_interruptible(&info->port.open_wait); - else { - struct tty_struct *tty; - tty = tty_port_tty_get(&info->port); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } - } + else + tty_port_tty_hangup(&info->port, false); } break; case C_CM_MCTS: diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index adeac255e526..1deaca4674e4 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -913,16 +913,12 @@ static void moxa_board_deinit(struct moxa_board_conf *brd) /* pci hot-un-plug support */ for (a = 0; a < brd->numPorts; a++) - if (brd->ports[a].port.flags & ASYNC_INITIALIZED) { - struct tty_struct *tty = tty_port_tty_get( - &brd->ports[a].port); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } - } + if (brd->ports[a].port.flags & ASYNC_INITIALIZED) + tty_port_tty_hangup(&brd->ports[a].port, false); + for (a = 0; a < MAX_PORTS_PER_BOARD; a++) tty_port_destroy(&brd->ports[a].port); + while (1) { opened = 0; for (a = 0; a < brd->numPorts; a++) @@ -1365,7 +1361,6 @@ static void moxa_hangup(struct tty_struct *tty) static void moxa_new_dcdstate(struct moxa_port *p, u8 dcd) { - struct tty_struct *tty; unsigned long flags; dcd = !!dcd; @@ -1373,10 +1368,8 @@ static void moxa_new_dcdstate(struct moxa_port *p, u8 dcd) if (dcd != p->DCDState) { p->DCDState = dcd; spin_unlock_irqrestore(&p->port.lock, flags); - tty = tty_port_tty_get(&p->port); - if (tty && !C_CLOCAL(tty) && !dcd) - tty_hangup(tty); - tty_kref_put(tty); + if (!dcd) + tty_port_tty_hangup(&p->port, true); } else spin_unlock_irqrestore(&p->port.lock, flags); diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 4a43ef5d7962..74d9a0258d7c 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1418,11 +1418,7 @@ static void gsm_dlci_close(struct gsm_dlci *dlci) pr_debug("DLCI %d goes closed.\n", dlci->addr); dlci->state = DLCI_CLOSED; if (dlci->addr != 0) { - struct tty_struct *tty = tty_port_tty_get(&dlci->port); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } + tty_port_tty_hangup(&dlci->port, false); kfifo_reset(dlci->fifo); } else dlci->gsm->dead = 1; diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c index 2e5bbdc09e1c..d6080c3831ef 100644 --- a/drivers/tty/nozomi.c +++ b/drivers/tty/nozomi.c @@ -1501,12 +1501,9 @@ static void tty_exit(struct nozomi *dc) DBG1(" "); - for (i = 0; i < MAX_PORT; ++i) { - struct tty_struct *tty = tty_port_tty_get(&dc->port[i].port); - if (tty && list_empty(&tty->hangup_work.entry)) - tty_hangup(tty); - tty_kref_put(tty); - } + for (i = 0; i < MAX_PORT; ++i) + tty_port_tty_hangup(&dc->port[i].port, false); + /* Racy below - surely should wait for scheduled work to be done or complete off a hangup method ? */ while (dc->open_ttys) diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c index 1d270034bfc3..bbffd7a431e9 100644 --- a/drivers/tty/rocket.c +++ b/drivers/tty/rocket.c @@ -521,15 +521,10 @@ static void rp_handle_port(struct r_port *info) (ChanStatus & CD_ACT) ? "on" : "off"); #endif if (!(ChanStatus & CD_ACT) && info->cd_status) { - struct tty_struct *tty; #ifdef ROCKET_DEBUG_HANGUP printk(KERN_INFO "CD drop, calling hangup.\n"); #endif - tty = tty_port_tty_get(&info->port); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } + tty_port_tty_hangup(&info->port, false); } info->cd_status = (ChanStatus & CD_ACT) ? 1 : 0; wake_up_interruptible(&info->port.open_wait); diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c index d723d4193b90..2c77fed31a72 100644 --- a/drivers/tty/serial/ifx6x60.c +++ b/drivers/tty/serial/ifx6x60.c @@ -269,23 +269,6 @@ static void mrdy_assert(struct ifx_spi_device *ifx_dev) mrdy_set_high(ifx_dev); } -/** - * ifx_spi_hangup - hang up an IFX device - * @ifx_dev: our SPI device - * - * Hang up the tty attached to the IFX device if one is currently - * open. If not take no action - */ -static void ifx_spi_ttyhangup(struct ifx_spi_device *ifx_dev) -{ - struct tty_port *pport = &ifx_dev->tty_port; - struct tty_struct *tty = tty_port_tty_get(pport); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } -} - /** * ifx_spi_timeout - SPI timeout * @arg: our SPI device @@ -298,7 +281,7 @@ static void ifx_spi_timeout(unsigned long arg) struct ifx_spi_device *ifx_dev = (struct ifx_spi_device *)arg; dev_warn(&ifx_dev->spi_dev->dev, "*** SPI Timeout ***"); - ifx_spi_ttyhangup(ifx_dev); + tty_port_tty_hangup(&ifx_dev->tty_port, false); mrdy_set_low(ifx_dev); clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags); } @@ -933,7 +916,7 @@ static irqreturn_t ifx_spi_reset_interrupt(int irq, void *dev) set_bit(MR_INPROGRESS, &ifx_dev->mdm_reset_state); if (!solreset) { /* unsolicited reset */ - ifx_spi_ttyhangup(ifx_dev); + tty_port_tty_hangup(&ifx_dev->tty_port, false); } } else { /* exited reset */ diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 8bb757c62ee2..7f38eeaafac3 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -232,6 +232,23 @@ void tty_port_hangup(struct tty_port *port) } EXPORT_SYMBOL(tty_port_hangup); +/** + * tty_port_tty_hangup - helper to hang up a tty + * + * @port: tty port + * @check_clocal: hang only ttys with CLOCAL unset? + */ +void tty_port_tty_hangup(struct tty_port *port, bool check_clocal) +{ + struct tty_struct *tty = tty_port_tty_get(port); + + if (tty && (!check_clocal || !C_CLOCAL(tty))) { + tty_hangup(tty); + tty_kref_put(tty); + } +} +EXPORT_SYMBOL_GPL(tty_port_tty_hangup); + /** * tty_port_tty_wakeup - helper to wake up a tty * diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 755766e4b756..27a18743275e 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -292,7 +292,6 @@ static void acm_ctrl_irq(struct urb *urb) { struct acm *acm = urb->context; struct usb_cdc_notification *dr = urb->transfer_buffer; - struct tty_struct *tty; unsigned char *data; int newctrl; int retval; @@ -327,17 +326,12 @@ static void acm_ctrl_irq(struct urb *urb) break; case USB_CDC_NOTIFY_SERIAL_STATE: - tty = tty_port_tty_get(&acm->port); newctrl = get_unaligned_le16(data); - if (tty) { - if (!acm->clocal && - (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { - dev_dbg(&acm->control->dev, - "%s - calling hangup\n", __func__); - tty_hangup(tty); - } - tty_kref_put(tty); + if (!acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { + dev_dbg(&acm->control->dev, "%s - calling hangup\n", + __func__); + tty_port_tty_hangup(&acm->port, false); } acm->ctrlin = newctrl; @@ -1498,15 +1492,9 @@ err_out: static int acm_reset_resume(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); - struct tty_struct *tty; - if (test_bit(ASYNCB_INITIALIZED, &acm->port.flags)) { - tty = tty_port_tty_get(&acm->port); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } - } + if (test_bit(ASYNCB_INITIALIZED, &acm->port.flags)) + tty_port_tty_hangup(&acm->port, false); return acm_resume(intf); } diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 1fd1935c8316..b011478d2e5f 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -378,7 +378,6 @@ static void usa26_instat_callback(struct urb *urb) struct usb_serial *serial; struct usb_serial_port *port; struct keyspan_port_private *p_priv; - struct tty_struct *tty; int old_dcd_state, err; int status = urb->status; @@ -421,12 +420,8 @@ static void usa26_instat_callback(struct urb *urb) p_priv->dcd_state = ((msg->gpia_dcd) ? 1 : 0); p_priv->ri_state = ((msg->ri) ? 1 : 0); - if (old_dcd_state != p_priv->dcd_state) { - tty = tty_port_tty_get(&port->port); - if (tty && !C_CLOCAL(tty)) - tty_hangup(tty); - tty_kref_put(tty); - } + if (old_dcd_state != p_priv->dcd_state) + tty_port_tty_hangup(&port->port, true); /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); @@ -510,7 +505,6 @@ static void usa28_instat_callback(struct urb *urb) struct usb_serial *serial; struct usb_serial_port *port; struct keyspan_port_private *p_priv; - struct tty_struct *tty; int old_dcd_state; int status = urb->status; @@ -551,12 +545,8 @@ static void usa28_instat_callback(struct urb *urb) p_priv->dcd_state = ((msg->dcd) ? 1 : 0); p_priv->ri_state = ((msg->ri) ? 1 : 0); - if (old_dcd_state != p_priv->dcd_state && old_dcd_state) { - tty = tty_port_tty_get(&port->port); - if (tty && !C_CLOCAL(tty)) - tty_hangup(tty); - tty_kref_put(tty); - } + if (old_dcd_state != p_priv->dcd_state && old_dcd_state) + tty_port_tty_hangup(&port->port, true); /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); @@ -642,12 +632,8 @@ static void usa49_instat_callback(struct urb *urb) p_priv->dcd_state = ((msg->dcd) ? 1 : 0); p_priv->ri_state = ((msg->ri) ? 1 : 0); - if (old_dcd_state != p_priv->dcd_state && old_dcd_state) { - struct tty_struct *tty = tty_port_tty_get(&port->port); - if (tty && !C_CLOCAL(tty)) - tty_hangup(tty); - tty_kref_put(tty); - } + if (old_dcd_state != p_priv->dcd_state && old_dcd_state) + tty_port_tty_hangup(&port->port, true); /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); @@ -851,7 +837,6 @@ static void usa90_instat_callback(struct urb *urb) struct usb_serial *serial; struct usb_serial_port *port; struct keyspan_port_private *p_priv; - struct tty_struct *tty; int old_dcd_state, err; int status = urb->status; @@ -880,12 +865,8 @@ static void usa90_instat_callback(struct urb *urb) p_priv->dcd_state = ((msg->dcd) ? 1 : 0); p_priv->ri_state = ((msg->ri) ? 1 : 0); - if (old_dcd_state != p_priv->dcd_state && old_dcd_state) { - tty = tty_port_tty_get(&port->port); - if (tty && !C_CLOCAL(tty)) - tty_hangup(tty); - tty_kref_put(tty); - } + if (old_dcd_state != p_priv->dcd_state && old_dcd_state) + tty_port_tty_hangup(&port->port, true); /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); @@ -953,12 +934,8 @@ static void usa67_instat_callback(struct urb *urb) p_priv->cts_state = ((msg->hskia_cts) ? 1 : 0); p_priv->dcd_state = ((msg->gpia_dcd) ? 1 : 0); - if (old_dcd_state != p_priv->dcd_state && old_dcd_state) { - struct tty_struct *tty = tty_port_tty_get(&port->port); - if (tty && !C_CLOCAL(tty)) - tty_hangup(tty); - tty_kref_put(tty); - } + if (old_dcd_state != p_priv->dcd_state && old_dcd_state) + tty_port_tty_hangup(&port->port, true); /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f7d339d8187b..602d1f389a3b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1532,13 +1532,8 @@ static void option_instat_callback(struct urb *urb) portdata->dsr_state = ((signals & 0x02) ? 1 : 0); portdata->ri_state = ((signals & 0x08) ? 1 : 0); - if (old_dcd_state && !portdata->dcd_state) { - struct tty_struct *tty = - tty_port_tty_get(&port->port); - if (tty && !C_CLOCAL(tty)) - tty_hangup(tty); - tty_kref_put(tty); - } + if (old_dcd_state && !portdata->dcd_state) + tty_port_tty_hangup(&port->port, true); } else { dev_dbg(dev, "%s: type %x req %x\n", __func__, req_pkt->bRequestType, req_pkt->bRequest); diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index c13f6e747748..d66148a17fe3 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -628,7 +628,6 @@ static void sierra_instat_callback(struct urb *urb) unsigned char signals = *((unsigned char *) urb->transfer_buffer + sizeof(struct usb_ctrlrequest)); - struct tty_struct *tty; dev_dbg(&port->dev, "%s: signal x%x\n", __func__, signals); @@ -639,11 +638,8 @@ static void sierra_instat_callback(struct urb *urb) portdata->dsr_state = ((signals & 0x02) ? 1 : 0); portdata->ri_state = ((signals & 0x08) ? 1 : 0); - tty = tty_port_tty_get(&port->port); - if (tty && !C_CLOCAL(tty) && - old_dcd_state && !portdata->dcd_state) - tty_hangup(tty); - tty_kref_put(tty); + if (old_dcd_state && !portdata->dcd_state) + tty_port_tty_hangup(&port->port, true); } else { dev_dbg(&port->dev, "%s: type %x req %x\n", __func__, req_pkt->bRequestType, diff --git a/include/linux/tty.h b/include/linux/tty.h index b6e890a87eb1..d3548f871968 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -534,6 +534,7 @@ extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); +extern void tty_port_tty_hangup(struct tty_port *port, bool check_clocal); extern void tty_port_tty_wakeup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index edab393e0c82..a2a508f5f268 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -997,12 +997,8 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, self->settings.dce = IRCOMM_DELTA_CD; ircomm_tty_check_modem_status(self); } else { - struct tty_struct *tty = tty_port_tty_get(&self->port); IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ ); - if (tty) { - tty_hangup(tty); - tty_kref_put(tty); - } + tty_port_tty_hangup(&self->port, false); } break; default: -- cgit From 21622939fc452c7fb739464b8e49368c3ceaa0ee Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 11 Mar 2013 16:44:21 -0400 Subject: tty: Add diagnostic for halted line discipline Flip buffer work must not be scheduled by the line discipline after the line discipline has been halted; issue warning. Note: drivers can still schedule flip buffer work. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 8 ++++++++ drivers/tty/tty_ldisc.c | 7 ++++++- include/linux/tty.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 68865d9af8a0..16793eccc6ae 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -153,6 +153,12 @@ static void n_tty_set_room(struct tty_struct *tty) if (left && !old_left) { WARN_RATELIMIT(tty->port->itty == NULL, "scheduling with invalid itty\n"); + /* see if ldisc has been killed - if so, this means that + * even though the ldisc has been halted and ->buf.work + * cancelled, ->buf.work is about to be rescheduled + */ + WARN_RATELIMIT(test_bit(TTY_LDISC_HALTED, &tty->flags), + "scheduling buffer work for halted ldisc\n"); schedule_work(&tty->port->buf.work); } } @@ -1624,6 +1630,8 @@ static int n_tty_open(struct tty_struct *tty) goto err_free_bufs; tty->disc_data = ldata; + /* indicate buffer work may resume */ + clear_bit(TTY_LDISC_HALTED, &tty->flags); reset_buffer_flags(tty); tty_unthrottle(tty); ldata->column = 0; diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index d794087c327e..c641321b9404 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -375,6 +375,7 @@ static inline void tty_ldisc_put(struct tty_ldisc *ld) void tty_ldisc_enable(struct tty_struct *tty) { + clear_bit(TTY_LDISC_HALTED, &tty->flags); set_bit(TTY_LDISC, &tty->flags); clear_bit(TTY_LDISC_CHANGING, &tty->flags); wake_up(&tty_ldisc_wait); @@ -513,8 +514,11 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) static int tty_ldisc_halt(struct tty_struct *tty) { + int scheduled; clear_bit(TTY_LDISC, &tty->flags); - return cancel_work_sync(&tty->port->buf.work); + scheduled = cancel_work_sync(&tty->port->buf.work); + set_bit(TTY_LDISC_HALTED, &tty->flags); + return scheduled; } /** @@ -820,6 +824,7 @@ void tty_ldisc_hangup(struct tty_struct *tty) clear_bit(TTY_LDISC, &tty->flags); tty_unlock(tty); cancel_work_sync(&tty->port->buf.work); + set_bit(TTY_LDISC_HALTED, &tty->flags); mutex_unlock(&tty->ldisc_mutex); retry: tty_lock(tty); diff --git a/include/linux/tty.h b/include/linux/tty.h index d3548f871968..66ae020e8a98 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -315,6 +315,7 @@ struct tty_file_private { #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_HUPPING 21 /* ->hangup() in progress */ +#define TTY_LDISC_HALTED 22 /* Line discipline is halted */ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) -- cgit From d912156605b0eb3b3070dc7eabc43db6379aa43b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 11 Mar 2013 16:44:33 -0400 Subject: tty: Don't reenable already enabled ldisc tty_ldisc_hangup() guarantees the ldisc is enabled (or that there is no ldisc). Since __tty_hangup() was the only user, re-define tty_ldisc_enable() in file-scope. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 1 - drivers/tty/tty_ldisc.c | 2 +- include/linux/tty.h | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index d3ddb31e363e..e6ee0f459a20 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -693,7 +693,6 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) */ set_bit(TTY_HUPPED, &tty->flags); clear_bit(TTY_HUPPING, &tty->flags); - tty_ldisc_enable(tty); tty_unlock(tty); diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 37671fcc7e4c..9c727da59fac 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -373,7 +373,7 @@ static inline void tty_ldisc_put(struct tty_ldisc *ld) * Clearing directly is allowed. */ -void tty_ldisc_enable(struct tty_struct *tty) +static void tty_ldisc_enable(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); set_bit(TTY_LDISC, &tty->flags); diff --git a/include/linux/tty.h b/include/linux/tty.h index 66ae020e8a98..367a9dfc4ea2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -561,8 +561,6 @@ extern void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty); extern void tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); extern void tty_ldisc_begin(void); -/* This last one is just for the tty layer internals and shouldn't be used elsewhere */ -extern void tty_ldisc_enable(struct tty_struct *tty); /* n_tty.c */ -- cgit From 6f8da5df8c451103e0043f73a00c90676da6be9e Mon Sep 17 00:00:00 2001 From: Rhyland Klein Date: Tue, 12 Mar 2013 18:08:09 -0400 Subject: power_supply: Add support for tps65090-charger This patch adds support for the tps65090 charger driver. This driver is responsible for controlling the charger aspect of the tps65090 mfd. Currently, this mainly consists of turning on and off the charger, but some features of the charger can be supported through this driver including: - Enable Auto Recharge based on Battery voltage - Fast Charge Safety Timer - Maximum battery discharge current - Maximum battery adapter current - Enable External Charge - Disable charging termination based on low charger current (supported) Once the driver is accepted, later patches can add support for the features above which are not yet supported. Based on work by: Syed Rafiuddin Laxman Dewangan Signed-off-by: Rhyland Klein Signed-off-by: Anton Vorontsov --- drivers/power/Kconfig | 7 + drivers/power/Makefile | 1 + drivers/power/tps65090-charger.c | 315 +++++++++++++++++++++++++++++++++++++++ include/linux/mfd/tps65090.h | 5 + 4 files changed, 328 insertions(+) create mode 100644 drivers/power/tps65090-charger.c (limited to 'include/linux') diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 07e1a8f8d03e..339f802b91c1 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -340,6 +340,13 @@ config CHARGER_SMB347 Say Y to include support for Summit Microelectronics SMB347 Battery Charger. +config CHARGER_TPS65090 + tristate "TPS65090 battery charger driver" + depends on MFD_TPS65090 + help + Say Y here to enable support for battery charging with TPS65090 + PMIC chips. + config AB8500_BM bool "AB8500 Battery Management Driver" depends on AB8500_CORE && AB8500_GPADC diff --git a/drivers/power/Makefile b/drivers/power/Makefile index eb520ea74970..653bf6ceff30 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -52,4 +52,5 @@ obj-$(CONFIG_CHARGER_MAX8998) += max8998_charger.o obj-$(CONFIG_CHARGER_BQ2415X) += bq2415x_charger.o obj-$(CONFIG_POWER_AVS) += avs/ obj-$(CONFIG_CHARGER_SMB347) += smb347-charger.o +obj-$(CONFIG_CHARGER_TPS65090) += tps65090-charger.o obj-$(CONFIG_POWER_RESET) += reset/ diff --git a/drivers/power/tps65090-charger.c b/drivers/power/tps65090-charger.c new file mode 100644 index 000000000000..0c66c6656b13 --- /dev/null +++ b/drivers/power/tps65090-charger.c @@ -0,0 +1,315 @@ +/* + * Battery charger driver for TI's tps65090 + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TPS65090_REG_INTR_STS 0x00 +#define TPS65090_REG_CG_CTRL0 0x04 +#define TPS65090_REG_CG_CTRL1 0x05 +#define TPS65090_REG_CG_CTRL2 0x06 +#define TPS65090_REG_CG_CTRL3 0x07 +#define TPS65090_REG_CG_CTRL4 0x08 +#define TPS65090_REG_CG_CTRL5 0x09 +#define TPS65090_REG_CG_STATUS1 0x0a +#define TPS65090_REG_CG_STATUS2 0x0b + +#define TPS65090_CHARGER_ENABLE BIT(0) +#define TPS65090_VACG BIT(1) +#define TPS65090_NOITERM BIT(5) + +struct tps65090_charger { + struct device *dev; + int ac_online; + int prev_ac_online; + int irq; + struct power_supply ac; + struct tps65090_platform_data *pdata; +}; + +static enum power_supply_property tps65090_ac_props[] = { + POWER_SUPPLY_PROP_ONLINE, +}; + +static int tps65090_low_chrg_current(struct tps65090_charger *charger) +{ + int ret; + + ret = tps65090_write(charger->dev->parent, TPS65090_REG_CG_CTRL5, + TPS65090_NOITERM); + if (ret < 0) { + dev_err(charger->dev, "%s(): error reading in register 0x%x\n", + __func__, TPS65090_REG_CG_CTRL5); + return ret; + } + return 0; +} + +static int tps65090_enable_charging(struct tps65090_charger *charger, + uint8_t enable) +{ + int ret; + uint8_t ctrl0 = 0; + + ret = tps65090_read(charger->dev->parent, TPS65090_REG_CG_CTRL0, + &ctrl0); + if (ret < 0) { + dev_err(charger->dev, "%s(): error reading in register 0x%x\n", + __func__, TPS65090_REG_CG_CTRL0); + return ret; + } + + ret = tps65090_write(charger->dev->parent, TPS65090_REG_CG_CTRL0, + (ctrl0 | TPS65090_CHARGER_ENABLE)); + if (ret < 0) { + dev_err(charger->dev, "%s(): error reading in register 0x%x\n", + __func__, TPS65090_REG_CG_CTRL0); + return ret; + } + return 0; +} + +static int tps65090_config_charger(struct tps65090_charger *charger) +{ + int ret; + + if (charger->pdata->enable_low_current_chrg) { + ret = tps65090_low_chrg_current(charger); + if (ret < 0) { + dev_err(charger->dev, + "error configuring low charge current\n"); + return ret; + } + } + + return 0; +} + +static int tps65090_ac_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct tps65090_charger *charger = container_of(psy, + struct tps65090_charger, ac); + + if (psp == POWER_SUPPLY_PROP_ONLINE) { + val->intval = charger->ac_online; + charger->prev_ac_online = charger->ac_online; + return 0; + } + return -EINVAL; +} + +static irqreturn_t tps65090_charger_isr(int irq, void *dev_id) +{ + struct tps65090_charger *charger = dev_id; + int ret; + uint8_t status1 = 0; + uint8_t intrsts = 0; + + ret = tps65090_read(charger->dev->parent, TPS65090_REG_CG_STATUS1, + &status1); + if (ret < 0) { + dev_err(charger->dev, "%s(): Error in reading reg 0x%x\n", + __func__, TPS65090_REG_CG_STATUS1); + return IRQ_HANDLED; + } + msleep(75); + ret = tps65090_read(charger->dev->parent, TPS65090_REG_INTR_STS, + &intrsts); + if (ret < 0) { + dev_err(charger->dev, "%s(): Error in reading reg 0x%x\n", + __func__, TPS65090_REG_INTR_STS); + return IRQ_HANDLED; + } + + if (intrsts & TPS65090_VACG) { + ret = tps65090_enable_charging(charger, 1); + if (ret < 0) + return IRQ_HANDLED; + charger->ac_online = 1; + } else { + charger->ac_online = 0; + } + + if (charger->prev_ac_online != charger->ac_online) + power_supply_changed(&charger->ac); + + return IRQ_HANDLED; +} + +#if defined(CONFIG_OF) + +#include + +static struct tps65090_platform_data * + tps65090_parse_dt_charger_data(struct platform_device *pdev) +{ + struct tps65090_platform_data *pdata; + struct device_node *np = pdev->dev.parent->of_node; + unsigned int prop; + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + dev_err(&pdev->dev, "Memory alloc for tps65090_pdata failed\n"); + return NULL; + } + + prop = of_property_read_bool(np, "ti,enable-low-current-chrg"); + pdata->enable_low_current_chrg = prop; + + pdata->irq_base = -1; + + return pdata; + +} +#else +static struct tps65090_platform_data * + tps65090_parse_dt_charger_data(struct platform_device *pdev) +{ + return NULL; +} +#endif + +static int tps65090_charger_probe(struct platform_device *pdev) +{ + struct tps65090 *tps65090_mfd = dev_get_drvdata(pdev->dev.parent); + struct tps65090_charger *cdata; + struct tps65090_platform_data *pdata; + uint8_t status1 = 0; + int ret; + int irq; + + pdata = dev_get_platdata(pdev->dev.parent); + + if (!pdata && tps65090_mfd->dev->of_node) + pdata = tps65090_parse_dt_charger_data(pdev); + + if (!pdata) { + dev_err(&pdev->dev, "%s():no platform data available\n", + __func__); + return -ENODEV; + } + + cdata = devm_kzalloc(&pdev->dev, sizeof(*cdata), GFP_KERNEL); + if (!cdata) { + dev_err(&pdev->dev, "failed to allocate memory status\n"); + return -ENOMEM; + } + + dev_set_drvdata(&pdev->dev, cdata); + + cdata->dev = &pdev->dev; + cdata->pdata = pdata; + + cdata->ac.name = "tps65090-ac"; + cdata->ac.type = POWER_SUPPLY_TYPE_MAINS; + cdata->ac.get_property = tps65090_ac_get_property; + cdata->ac.properties = tps65090_ac_props; + cdata->ac.num_properties = ARRAY_SIZE(tps65090_ac_props); + cdata->ac.supplied_to = pdata->supplied_to; + cdata->ac.num_supplicants = pdata->num_supplicants; + + ret = power_supply_register(&pdev->dev, &cdata->ac); + if (ret) { + dev_err(&pdev->dev, "failed: power supply register\n"); + return ret; + } + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_warn(&pdev->dev, "Unable to get charger irq = %d\n", irq); + ret = irq; + goto fail_unregister_supply; + } + + cdata->irq = irq; + + ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, + tps65090_charger_isr, 0, "tps65090-charger", cdata); + if (ret) { + dev_err(cdata->dev, "Unable to register irq %d err %d\n", irq, + ret); + goto fail_free_irq; + } + + ret = tps65090_config_charger(cdata); + if (ret < 0) { + dev_err(&pdev->dev, "charger config failed, err %d\n", ret); + goto fail_free_irq; + } + + /* Check for charger presence */ + ret = tps65090_read(cdata->dev->parent, TPS65090_REG_CG_STATUS1, + &status1); + if (ret < 0) { + dev_err(cdata->dev, "%s(): Error in reading reg 0x%x", __func__, + TPS65090_REG_CG_STATUS1); + goto fail_free_irq; + } + + if (status1 != 0) { + ret = tps65090_enable_charging(cdata, 1); + if (ret < 0) { + dev_err(cdata->dev, "error enabling charger\n"); + goto fail_free_irq; + } + cdata->ac_online = 1; + power_supply_changed(&cdata->ac); + } + + return 0; + +fail_free_irq: + devm_free_irq(cdata->dev, irq, cdata); +fail_unregister_supply: + power_supply_unregister(&cdata->ac); + + return ret; +} + +static int tps65090_charger_remove(struct platform_device *pdev) +{ + struct tps65090_charger *cdata = dev_get_drvdata(&pdev->dev); + + devm_free_irq(cdata->dev, cdata->irq, cdata); + power_supply_unregister(&cdata->ac); + + return 0; +} + +static struct platform_driver tps65090_charger_driver = { + .driver = { + .name = "tps65090-charger", + .owner = THIS_MODULE, + }, + .probe = tps65090_charger_probe, + .remove = tps65090_charger_remove, +}; +module_platform_driver(tps65090_charger_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Syed Rafiuddin "); +MODULE_DESCRIPTION("tps65090 battery charger driver"); diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h index 6694cf43e8b8..998628a2b08b 100644 --- a/include/linux/mfd/tps65090.h +++ b/include/linux/mfd/tps65090.h @@ -86,6 +86,11 @@ struct tps65090_regulator_plat_data { struct tps65090_platform_data { int irq_base; + + char **supplied_to; + size_t num_supplicants; + int enable_low_current_chrg; + struct tps65090_regulator_plat_data *reg_pdata[TPS65090_REGULATOR_MAX]; }; -- cgit From 3d6ee287a3e341c88eafd0b4620b12d640b3736b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 12 Mar 2013 20:26:02 +0100 Subject: clk: Introduce optional is_prepared callback To reflect whether a clk_hw is prepared the clk_hw may implement the optional is_prepared callback. If not implemented we fall back to use the software prepare counter. Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Signed-off-by: Mike Turquette --- drivers/clk/clk.c | 21 +++++++++++++++++++++ include/linux/clk-provider.h | 6 ++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ed87b2405806..7571b5054f3c 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -451,6 +451,27 @@ unsigned long __clk_get_flags(struct clk *clk) return !clk ? 0 : clk->flags; } +bool __clk_is_prepared(struct clk *clk) +{ + int ret; + + if (!clk) + return false; + + /* + * .is_prepared is optional for clocks that can prepare + * fall back to software usage counter if it is missing + */ + if (!clk->ops->is_prepared) { + ret = clk->prepare_count ? 1 : 0; + goto out; + } + + ret = clk->ops->is_prepared(clk->hw); +out: + return !!ret; +} + bool __clk_is_enabled(struct clk *clk) { int ret; diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 7f197d7addb0..ee946862e058 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -45,6 +45,10 @@ struct clk_hw; * undo any work done in the @prepare callback. Called with * prepare_lock held. * + * @is_prepared: Queries the hardware to determine if the clock is prepared. + * This function is allowed to sleep. Optional, if this op is not + * set then the prepare count will be used. + * * @enable: Enable the clock atomically. This must not return until the * clock is generating a valid clock signal, usable by consumer * devices. Called with enable_lock held. This function must not @@ -108,6 +112,7 @@ struct clk_hw; struct clk_ops { int (*prepare)(struct clk_hw *hw); void (*unprepare)(struct clk_hw *hw); + int (*is_prepared)(struct clk_hw *hw); int (*enable)(struct clk_hw *hw); void (*disable)(struct clk_hw *hw); int (*is_enabled)(struct clk_hw *hw); @@ -351,6 +356,7 @@ unsigned int __clk_get_enable_count(struct clk *clk); unsigned int __clk_get_prepare_count(struct clk *clk); unsigned long __clk_get_rate(struct clk *clk); unsigned long __clk_get_flags(struct clk *clk); +bool __clk_is_prepared(struct clk *clk); bool __clk_is_enabled(struct clk *clk); struct clk *__clk_lookup(const char *name); -- cgit From 3cc8247f1dce79511de8bf0f69ab02a46cc315b7 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 12 Mar 2013 20:26:04 +0100 Subject: clk: Introduce optional unprepare_unused callback An unprepare_unused callback is introduced due to the same reasons to why the disable_unused callback was added. During the clk_disable_unused sequence, those clk_hw that needs specific treatment with regards to being unprepared, shall implement the unprepare_unused callback. Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Signed-off-by: Mike Turquette --- drivers/clk/clk.c | 7 +++++-- include/linux/clk-provider.h | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index c0141f3e1109..253792a46c08 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -352,9 +352,12 @@ static void clk_unprepare_unused_subtree(struct clk *clk) if (clk->flags & CLK_IGNORE_UNUSED) return; - if (__clk_is_prepared(clk)) - if (clk->ops->unprepare) + if (__clk_is_prepared(clk)) { + if (clk->ops->unprepare_unused) + clk->ops->unprepare_unused(clk->hw); + else if (clk->ops->unprepare) clk->ops->unprepare(clk->hw); + } } /* caller must hold prepare_lock */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index ee946862e058..56e6cc12c796 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -49,6 +49,10 @@ struct clk_hw; * This function is allowed to sleep. Optional, if this op is not * set then the prepare count will be used. * + * @unprepare_unused: Unprepare the clock atomically. Only called from + * clk_disable_unused for prepare clocks with special needs. + * Called with prepare mutex held. This function may sleep. + * * @enable: Enable the clock atomically. This must not return until the * clock is generating a valid clock signal, usable by consumer * devices. Called with enable_lock held. This function must not @@ -113,6 +117,7 @@ struct clk_ops { int (*prepare)(struct clk_hw *hw); void (*unprepare)(struct clk_hw *hw); int (*is_prepared)(struct clk_hw *hw); + void (*unprepare_unused)(struct clk_hw *hw); int (*enable)(struct clk_hw *hw); void (*disable)(struct clk_hw *hw); int (*is_enabled)(struct clk_hw *hw); -- cgit From 14a40ffccd6163bbcd1d6f32b28a88ffe6149fc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 19 Mar 2013 13:45:20 -0700 Subject: sched: replace PF_THREAD_BOUND with PF_NO_SETAFFINITY PF_THREAD_BOUND was originally used to mark kernel threads which were bound to a specific CPU using kthread_bind() and a task with the flag set allows cpus_allowed modifications only to itself. Workqueue is currently abusing it to prevent userland from meddling with cpus_allowed of workqueue workers. What we need is a flag to prevent userland from messing with cpus_allowed of certain kernel tasks. In kernel, anyone can (incorrectly) squash the flag, and, for worker-type usages, restricting cpus_allowed modification to the task itself doesn't provide meaningful extra proection as other tasks can inject work items to the task anyway. This patch replaces PF_THREAD_BOUND with PF_NO_SETAFFINITY. sched_setaffinity() checks the flag and return -EINVAL if set. set_cpus_allowed_ptr() is no longer affected by the flag. This will allow simplifying workqueue worker CPU affinity management. Signed-off-by: Tejun Heo Acked-by: Ingo Molnar Reviewed-by: Lai Jiangshan Cc: Peter Zijlstra Cc: Thomas Gleixner --- include/linux/sched.h | 2 +- kernel/cgroup.c | 4 ++-- kernel/cpuset.c | 16 ++++++++-------- kernel/kthread.c | 2 +- kernel/sched/core.c | 9 ++++----- kernel/workqueue.c | 10 +++------- 6 files changed, 19 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..e5c64f7b8c1d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1793,7 +1793,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ -#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a32f9432666c..3852d926322c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2224,11 +2224,11 @@ retry_find_task: tsk = tsk->group_leader; /* - * Workqueue threads may acquire PF_THREAD_BOUND and become + * Workqueue threads may acquire PF_NO_SETAFFINITY and become * trapped in a cpuset, or RT worker may be born in a cgroup * with no rt_runtime allocated. Just say no. */ - if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) { + if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { ret = -EINVAL; rcu_read_unlock(); goto out_unlock_cgroup; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4f9dfe43ecbd..f22e94792707 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1388,16 +1388,16 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) cgroup_taskset_for_each(task, cgrp, tset) { /* - * Kthreads bound to specific cpus cannot be moved to a new - * cpuset; we cannot change their cpu affinity and - * isolating such threads by their set of allowed nodes is - * unnecessary. Thus, cpusets are not applicable for such - * threads. This prevents checking for success of - * set_cpus_allowed_ptr() on all attached tasks before - * cpus_allowed may be changed. + * Kthreads which disallow setaffinity shouldn't be moved + * to a new cpuset; we don't want to change their cpu + * affinity and isolating such threads by their set of + * allowed nodes is unnecessary. Thus, cpusets are not + * applicable for such threads. This prevents checking for + * success of set_cpus_allowed_ptr() on all attached tasks + * before cpus_allowed may be changed. */ ret = -EINVAL; - if (task->flags & PF_THREAD_BOUND) + if (task->flags & PF_NO_SETAFFINITY) goto out_unlock; ret = security_task_setscheduler(task); if (ret) diff --git a/kernel/kthread.c b/kernel/kthread.c index 691dc2ef9baf..a2fbbb782bad 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -260,7 +260,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu) { /* It's safe because the task is inactive. */ do_set_cpus_allowed(p, cpumask_of(cpu)); - p->flags |= PF_THREAD_BOUND; + p->flags |= PF_NO_SETAFFINITY; } /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393c..23606ee961b5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4126,6 +4126,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) get_task_struct(p); rcu_read_unlock(); + if (p->flags & PF_NO_SETAFFINITY) { + retval = -EINVAL; + goto out_put_task; + } if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { retval = -ENOMEM; goto out_put_task; @@ -4773,11 +4777,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) goto out; } - if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { - ret = -EINVAL; - goto out; - } - do_set_cpus_allowed(p, new_mask); /* Can the task run on the task's current CPU? If so, we're done */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 969be0b72071..39a591f65b08 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1757,12 +1757,8 @@ static struct worker *create_worker(struct worker_pool *pool) set_user_nice(worker->task, pool->attrs->nice); set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); - /* - * %PF_THREAD_BOUND is used to prevent userland from meddling with - * cpumask of workqueue workers. This is an abuse. We need - * %PF_NO_SETAFFINITY. - */ - worker->task->flags |= PF_THREAD_BOUND; + /* prevent userland from meddling with cpumask of workqueue workers */ + worker->task->flags |= PF_NO_SETAFFINITY; /* * The caller is responsible for ensuring %POOL_DISASSOCIATED @@ -3876,7 +3872,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, } wq->rescuer = rescuer; - rescuer->task->flags |= PF_THREAD_BOUND; + rescuer->task->flags |= PF_NO_SETAFFINITY; wake_up_process(rescuer->task); } -- cgit From 4d10f054f7df600ec8a388091c93b2d976920de0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Mar 2013 15:38:50 +0100 Subject: clocksource: make CLOCKSOURCE_OF_DECLARE type safe This ensures that a function pointer passed into CLOCKSOURCE_OF_DECLARE takes the same arguments that we use for calling that function later. Also fix the extraneous semicolon at end of the CLOCKSOURCE_OF_DECLARE definition. Signed-off-by: Arnd Bergmann Acked-by: Rob Herring --- drivers/clocksource/clksrc-of.c | 3 ++- drivers/clocksource/vt8500_timer.c | 2 +- include/linux/clocksource.h | 11 +++++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c index 3ef11fba781c..37f5325bec95 100644 --- a/drivers/clocksource/clksrc-of.c +++ b/drivers/clocksource/clksrc-of.c @@ -16,6 +16,7 @@ #include #include +#include extern struct of_device_id __clksrc_of_table[]; @@ -26,7 +27,7 @@ void __init clocksource_of_init(void) { struct device_node *np; const struct of_device_id *match; - void (*init_func)(struct device_node *); + clocksource_of_init_fn init_func; for_each_matching_node_and_match(np, __clksrc_of_table, &match) { init_func = match->data; diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c index 242255285597..64f553f04fa4 100644 --- a/drivers/clocksource/vt8500_timer.c +++ b/drivers/clocksource/vt8500_timer.c @@ -165,4 +165,4 @@ static void __init vt8500_timer_init(struct device_node *np) 4, 0xf0000000); } -CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init) +CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 08ed5e19d8c6..ac33184b14fd 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -332,16 +332,23 @@ extern int clocksource_mmio_init(void __iomem *, const char *, extern int clocksource_i8253_init(void); +struct device_node; +typedef void(*clocksource_of_init_fn)(struct device_node *); #ifdef CONFIG_CLKSRC_OF extern void clocksource_of_init(void); #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ static const struct of_device_id __clksrc_of_table_##name \ __used __section(__clksrc_of_table) \ - = { .compatible = compat, .data = fn }; + = { .compatible = compat, \ + .data = (fn == (clocksource_of_init_fn)NULL) ? fn : fn } #else static inline void clocksource_of_init(void) {} -#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) +#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ + static const struct of_device_id __clksrc_of_table_##name \ + __unused __section(__clksrc_of_table) \ + = { .compatible = compat, \ + .data = (fn == (clocksource_of_init_fn)NULL) ? fn : fn } #endif #endif /* _LINUX_CLOCKSOURCE_H */ -- cgit From a9a0fef779074838230e04a322fd2bdc921f4f4f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 18 Mar 2013 13:22:19 +1030 Subject: virtio_ring: expose virtio barriers for use in vringh. The host side of ring needs this logic too. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 33 +++++-------------------- include/linux/virtio_ring.h | 57 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index ffd7e7da5d3b..245177c286ae 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -24,27 +24,6 @@ #include #include -/* virtio guest is communicating with a virtual "device" that actually runs on - * a host processor. Memory barriers are used to control SMP effects. */ -#ifdef CONFIG_SMP -/* Where possible, use SMP barriers which are more lightweight than mandatory - * barriers, because mandatory barriers control MMIO effects on accesses - * through relaxed memory I/O windows (which virtio-pci does not use). */ -#define virtio_mb(vq) \ - do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0) -#define virtio_rmb(vq) \ - do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) -#define virtio_wmb(vq) \ - do { if ((vq)->weak_barriers) smp_wmb(); else wmb(); } while(0) -#else -/* We must force memory ordering even if guest is UP since host could be - * running on another CPU, but SMP barriers are defined to barrier() in that - * configuration. So fall back to mandatory barriers instead. */ -#define virtio_mb(vq) mb() -#define virtio_rmb(vq) rmb() -#define virtio_wmb(vq) wmb() -#endif - #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ #define BAD_RING(_vq, fmt, args...) \ @@ -276,7 +255,7 @@ add_head: /* Descriptors and available array need to be set before we expose the * new available array entries. */ - virtio_wmb(vq); + virtio_wmb(vq->weak_barriers); vq->vring.avail->idx++; vq->num_added++; @@ -312,7 +291,7 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) START_USE(vq); /* We need to expose available array entries before checking avail * event. */ - virtio_mb(vq); + virtio_mb(vq->weak_barriers); old = vq->vring.avail->idx - vq->num_added; new = vq->vring.avail->idx; @@ -436,7 +415,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) } /* Only get used array entries after they have been exposed by host. */ - virtio_rmb(vq); + virtio_rmb(vq->weak_barriers); last_used = (vq->last_used_idx & (vq->vring.num - 1)); i = vq->vring.used->ring[last_used].id; @@ -460,7 +439,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) * the read in the next get_buf call. */ if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(vq); + virtio_mb(vq->weak_barriers); } #ifdef DEBUG @@ -513,7 +492,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(vq); + virtio_mb(vq->weak_barriers); if (unlikely(more_used(vq))) { END_USE(vq); return false; @@ -553,7 +532,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) /* TODO: tune this threshold */ bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; vring_used_event(&vq->vring) = vq->last_used_idx + bufs; - virtio_mb(vq); + virtio_mb(vq->weak_barriers); if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { END_USE(vq); return false; diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 63c6ea199519..ca3ad41c2c82 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -4,6 +4,63 @@ #include #include +/* + * Barriers in virtio are tricky. Non-SMP virtio guests can't assume + * they're not on an SMP host system, so they need to assume real + * barriers. Non-SMP virtio hosts could skip the barriers, but does + * anyone care? + * + * For virtio_pci on SMP, we don't need to order with respect to MMIO + * accesses through relaxed memory I/O windows, so smp_mb() et al are + * sufficient. + * + * For using virtio to talk to real devices (eg. other heterogeneous + * CPUs) we do need real barriers. In theory, we could be using both + * kinds of virtio, so it's a runtime decision, and the branch is + * actually quite cheap. + */ + +#ifdef CONFIG_SMP +static inline void virtio_mb(bool weak_barriers) +{ + if (weak_barriers) + smp_mb(); + else + mb(); +} + +static inline void virtio_rmb(bool weak_barriers) +{ + if (weak_barriers) + smp_rmb(); + else + rmb(); +} + +static inline void virtio_wmb(bool weak_barriers) +{ + if (weak_barriers) + smp_wmb(); + else + wmb(); +} +#else +static inline void virtio_mb(bool weak_barriers) +{ + mb(); +} + +static inline void virtio_rmb(bool weak_barriers) +{ + rmb(); +} + +static inline void virtio_wmb(bool weak_barriers) +{ + wmb(); +} +#endif + struct virtio_device; struct virtqueue; -- cgit From f87d0fbb579818fed3eeb0923cc253163ab93039 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 20 Mar 2013 13:50:14 +1030 Subject: vringh: host-side implementation of virtio rings. Getting use of virtio rings correct is tricky, and a recent patch saw an implementation of in-kernel rings (as separate from userspace). This abstracts the business of dealing with the virtio ring layout from the access (userspace or direct); to do this, we use function pointers, which gcc inlines correctly. Signed-off-by: Rusty Russell Acked-by: Michael S. Tsirkin --- drivers/Makefile | 2 +- drivers/vhost/Kconfig | 8 + drivers/vhost/Kconfig.tcm | 1 + drivers/vhost/Makefile | 2 + drivers/vhost/vringh.c | 1007 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/vringh.h | 196 +++++++++ 6 files changed, 1215 insertions(+), 1 deletion(-) create mode 100644 drivers/vhost/vringh.c create mode 100644 include/linux/vringh.h (limited to 'include/linux') diff --git a/drivers/Makefile b/drivers/Makefile index dce39a95fa71..72d28d34ee24 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -123,7 +123,7 @@ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ obj-$(CONFIG_BCMA) += bcma/ -obj-$(CONFIG_VHOST_NET) += vhost/ +obj-$(CONFIG_VHOST_RING) += vhost/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index bf243177ffe1..85b773a93a5d 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,7 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + select VHOST_RING ---help--- This kernel module can be loaded in host kernel to accelerate guest networking with virtio_net. Not to be confused with virtio_net @@ -12,3 +13,10 @@ config VHOST_NET if STAGING source "drivers/vhost/Kconfig.tcm" endif + +config VHOST_RING + tristate + ---help--- + This option is selected by any driver which needs to access + the host side of a virtio ring. + diff --git a/drivers/vhost/Kconfig.tcm b/drivers/vhost/Kconfig.tcm index 7e3aa28d999e..c3a8cfa1de72 100644 --- a/drivers/vhost/Kconfig.tcm +++ b/drivers/vhost/Kconfig.tcm @@ -1,6 +1,7 @@ config TCM_VHOST tristate "TCM_VHOST fabric module" depends on TARGET_CORE && EVENTFD && m + select VHOST_RING default n ---help--- Say M here to enable the TCM_VHOST fabric module for use with virtio-scsi guests diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index a27b053bc9ab..1d37f5e12be6 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -2,3 +2,5 @@ obj-$(CONFIG_VHOST_NET) += vhost_net.o vhost_net-y := vhost.o net.o obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o + +obj-$(CONFIG_VHOST_RING) += vringh.o diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c new file mode 100644 index 000000000000..bff0775e258c --- /dev/null +++ b/drivers/vhost/vringh.c @@ -0,0 +1,1007 @@ +/* + * Helpers for the host side of a virtio ring. + * + * Since these may be in userspace, we use (inline) accessors. + */ +#include +#include +#include +#include +#include +#include +#include + +static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) +{ + static DEFINE_RATELIMIT_STATE(vringh_rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + if (__ratelimit(&vringh_rs)) { + va_list ap; + va_start(ap, fmt); + printk(KERN_NOTICE "vringh:"); + vprintk(fmt, ap); + va_end(ap); + } +} + +/* Returns vring->num if empty, -ve on error. */ +static inline int __vringh_get_head(const struct vringh *vrh, + int (*getu16)(u16 *val, const u16 *p), + u16 *last_avail_idx) +{ + u16 avail_idx, i, head; + int err; + + err = getu16(&avail_idx, &vrh->vring.avail->idx); + if (err) { + vringh_bad("Failed to access avail idx at %p", + &vrh->vring.avail->idx); + return err; + } + + if (*last_avail_idx == avail_idx) + return vrh->vring.num; + + /* Only get avail ring entries after they have been exposed by guest. */ + virtio_rmb(vrh->weak_barriers); + + i = *last_avail_idx & (vrh->vring.num - 1); + + err = getu16(&head, &vrh->vring.avail->ring[i]); + if (err) { + vringh_bad("Failed to read head: idx %d address %p", + *last_avail_idx, &vrh->vring.avail->ring[i]); + return err; + } + + if (head >= vrh->vring.num) { + vringh_bad("Guest says index %u > %u is available", + head, vrh->vring.num); + return -EINVAL; + } + + (*last_avail_idx)++; + return head; +} + +/* Copy some bytes to/from the iovec. Returns num copied. */ +static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov, + void *ptr, size_t len, + int (*xfer)(void *addr, void *ptr, + size_t len)) +{ + int err, done = 0; + + while (len && iov->i < iov->used) { + size_t partlen; + + partlen = min(iov->iov[iov->i].iov_len, len); + err = xfer(iov->iov[iov->i].iov_base, ptr, partlen); + if (err) + return err; + done += partlen; + len -= partlen; + ptr += partlen; + iov->consumed += partlen; + iov->iov[iov->i].iov_len -= partlen; + iov->iov[iov->i].iov_base += partlen; + + if (!iov->iov[iov->i].iov_len) { + /* Fix up old iov element then increment. */ + iov->iov[iov->i].iov_len = iov->consumed; + iov->iov[iov->i].iov_base -= iov->consumed; + + iov->consumed = 0; + iov->i++; + } + } + return done; +} + +/* May reduce *len if range is shorter. */ +static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, + struct vringh_range *range, + bool (*getrange)(struct vringh *, + u64, struct vringh_range *)) +{ + if (addr < range->start || addr > range->end_incl) { + if (!getrange(vrh, addr, range)) + return false; + } + BUG_ON(addr < range->start || addr > range->end_incl); + + /* To end of memory? */ + if (unlikely(addr + *len == 0)) { + if (range->end_incl == -1ULL) + return true; + goto truncate; + } + + /* Otherwise, don't wrap. */ + if (addr + *len < addr) { + vringh_bad("Wrapping descriptor %zu@0x%llx", + *len, (unsigned long long)addr); + return false; + } + + if (unlikely(addr + *len - 1 > range->end_incl)) + goto truncate; + return true; + +truncate: + *len = range->end_incl + 1 - addr; + return true; +} + +static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, + struct vringh_range *range, + bool (*getrange)(struct vringh *, + u64, struct vringh_range *)) +{ + return true; +} + +/* No reason for this code to be inline. */ +static int move_to_indirect(int *up_next, u16 *i, void *addr, + const struct vring_desc *desc, + struct vring_desc **descs, int *desc_max) +{ + /* Indirect tables can't have indirect. */ + if (*up_next != -1) { + vringh_bad("Multilevel indirect %u->%u", *up_next, *i); + return -EINVAL; + } + + if (unlikely(desc->len % sizeof(struct vring_desc))) { + vringh_bad("Strange indirect len %u", desc->len); + return -EINVAL; + } + + /* We will check this when we follow it! */ + if (desc->flags & VRING_DESC_F_NEXT) + *up_next = desc->next; + else + *up_next = -2; + *descs = addr; + *desc_max = desc->len / sizeof(struct vring_desc); + + /* Now, start at the first indirect. */ + *i = 0; + return 0; +} + +static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) +{ + struct kvec *new; + unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; + + if (new_num < 8) + new_num = 8; + + flag = (iov->max_num & VRINGH_IOV_ALLOCATED); + if (flag) + new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp); + else { + new = kmalloc(new_num * sizeof(struct iovec), gfp); + if (new) { + memcpy(new, iov->iov, + iov->max_num * sizeof(struct iovec)); + flag = VRINGH_IOV_ALLOCATED; + } + } + if (!new) + return -ENOMEM; + iov->iov = new; + iov->max_num = (new_num | flag); + return 0; +} + +static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, + struct vring_desc **descs, int *desc_max) +{ + u16 i = *up_next; + + *up_next = -1; + *descs = vrh->vring.desc; + *desc_max = vrh->vring.num; + return i; +} + +static int slow_copy(struct vringh *vrh, void *dst, const void *src, + bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, + struct vringh_range *range, + bool (*getrange)(struct vringh *vrh, + u64, + struct vringh_range *)), + bool (*getrange)(struct vringh *vrh, + u64 addr, + struct vringh_range *r), + struct vringh_range *range, + int (*copy)(void *dst, const void *src, size_t len)) +{ + size_t part, len = sizeof(struct vring_desc); + + do { + u64 addr; + int err; + + part = len; + addr = (u64)(unsigned long)src - range->offset; + + if (!rcheck(vrh, addr, &part, range, getrange)) + return -EINVAL; + + err = copy(dst, src, part); + if (err) + return err; + + dst += part; + src += part; + len -= part; + } while (len); + return 0; +} + +static inline int +__vringh_iov(struct vringh *vrh, u16 i, + struct vringh_kiov *riov, + struct vringh_kiov *wiov, + bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, + struct vringh_range *range, + bool (*getrange)(struct vringh *, u64, + struct vringh_range *)), + bool (*getrange)(struct vringh *, u64, struct vringh_range *), + gfp_t gfp, + int (*copy)(void *dst, const void *src, size_t len)) +{ + int err, count = 0, up_next, desc_max; + struct vring_desc desc, *descs; + struct vringh_range range = { -1ULL, 0 }, slowrange; + bool slow = false; + + /* We start traversing vring's descriptor table. */ + descs = vrh->vring.desc; + desc_max = vrh->vring.num; + up_next = -1; + + if (riov) + riov->i = riov->used = 0; + else if (wiov) + wiov->i = wiov->used = 0; + else + /* You must want something! */ + BUG(); + + for (;;) { + void *addr; + struct vringh_kiov *iov; + size_t len; + + if (unlikely(slow)) + err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, + &slowrange, copy); + else + err = copy(&desc, &descs[i], sizeof(desc)); + if (unlikely(err)) + goto fail; + + if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { + /* Make sure it's OK, and get offset. */ + len = desc.len; + if (!rcheck(vrh, desc.addr, &len, &range, getrange)) { + err = -EINVAL; + goto fail; + } + + if (unlikely(len != desc.len)) { + slow = true; + /* We need to save this range to use offset */ + slowrange = range; + } + + addr = (void *)(long)(desc.addr + range.offset); + err = move_to_indirect(&up_next, &i, addr, &desc, + &descs, &desc_max); + if (err) + goto fail; + continue; + } + + if (count++ == vrh->vring.num) { + vringh_bad("Descriptor loop in %p", descs); + err = -ELOOP; + goto fail; + } + + if (desc.flags & VRING_DESC_F_WRITE) + iov = wiov; + else { + iov = riov; + if (unlikely(wiov && wiov->i)) { + vringh_bad("Readable desc %p after writable", + &descs[i]); + err = -EINVAL; + goto fail; + } + } + + if (!iov) { + vringh_bad("Unexpected %s desc", + !wiov ? "writable" : "readable"); + err = -EPROTO; + goto fail; + } + + again: + /* Make sure it's OK, and get offset. */ + len = desc.len; + if (!rcheck(vrh, desc.addr, &len, &range, getrange)) { + err = -EINVAL; + goto fail; + } + addr = (void *)(unsigned long)(desc.addr + range.offset); + + if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { + err = resize_iovec(iov, gfp); + if (err) + goto fail; + } + + iov->iov[iov->used].iov_base = addr; + iov->iov[iov->used].iov_len = len; + iov->used++; + + if (unlikely(len != desc.len)) { + desc.len -= len; + desc.addr += len; + goto again; + } + + if (desc.flags & VRING_DESC_F_NEXT) { + i = desc.next; + } else { + /* Just in case we need to finish traversing above. */ + if (unlikely(up_next > 0)) { + i = return_from_indirect(vrh, &up_next, + &descs, &desc_max); + slow = false; + } else + break; + } + + if (i >= desc_max) { + vringh_bad("Chained index %u > %u", i, desc_max); + err = -EINVAL; + goto fail; + } + } + + return 0; + +fail: + return err; +} + +static inline int __vringh_complete(struct vringh *vrh, + const struct vring_used_elem *used, + unsigned int num_used, + int (*putu16)(u16 *p, u16 val), + int (*putused)(struct vring_used_elem *dst, + const struct vring_used_elem + *src, unsigned num)) +{ + struct vring_used *used_ring; + int err; + u16 used_idx, off; + + used_ring = vrh->vring.used; + used_idx = vrh->last_used_idx + vrh->completed; + + off = used_idx % vrh->vring.num; + + /* Compiler knows num_used == 1 sometimes, hence extra check */ + if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { + u16 part = vrh->vring.num - off; + err = putused(&used_ring->ring[off], used, part); + if (!err) + err = putused(&used_ring->ring[0], used + part, + num_used - part); + } else + err = putused(&used_ring->ring[off], used, num_used); + + if (err) { + vringh_bad("Failed to write %u used entries %u at %p", + num_used, off, &used_ring->ring[off]); + return err; + } + + /* Make sure buffer is written before we update index. */ + virtio_wmb(vrh->weak_barriers); + + err = putu16(&vrh->vring.used->idx, used_idx + num_used); + if (err) { + vringh_bad("Failed to update used index at %p", + &vrh->vring.used->idx); + return err; + } + + vrh->completed += num_used; + return 0; +} + + +static inline int __vringh_need_notify(struct vringh *vrh, + int (*getu16)(u16 *val, const u16 *p)) +{ + bool notify; + u16 used_event; + int err; + + /* Flush out used index update. This is paired with the + * barrier that the Guest executes when enabling + * interrupts. */ + virtio_mb(vrh->weak_barriers); + + /* Old-style, without event indices. */ + if (!vrh->event_indices) { + u16 flags; + err = getu16(&flags, &vrh->vring.avail->flags); + if (err) { + vringh_bad("Failed to get flags at %p", + &vrh->vring.avail->flags); + return err; + } + return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); + } + + /* Modern: we know when other side wants to know. */ + err = getu16(&used_event, &vring_used_event(&vrh->vring)); + if (err) { + vringh_bad("Failed to get used event idx at %p", + &vring_used_event(&vrh->vring)); + return err; + } + + /* Just in case we added so many that we wrap. */ + if (unlikely(vrh->completed > 0xffff)) + notify = true; + else + notify = vring_need_event(used_event, + vrh->last_used_idx + vrh->completed, + vrh->last_used_idx); + + vrh->last_used_idx += vrh->completed; + vrh->completed = 0; + return notify; +} + +static inline bool __vringh_notify_enable(struct vringh *vrh, + int (*getu16)(u16 *val, const u16 *p), + int (*putu16)(u16 *p, u16 val)) +{ + u16 avail; + + if (!vrh->event_indices) { + /* Old-school; update flags. */ + if (putu16(&vrh->vring.used->flags, 0) != 0) { + vringh_bad("Clearing used flags %p", + &vrh->vring.used->flags); + return true; + } + } else { + if (putu16(&vring_avail_event(&vrh->vring), + vrh->last_avail_idx) != 0) { + vringh_bad("Updating avail event index %p", + &vring_avail_event(&vrh->vring)); + return true; + } + } + + /* They could have slipped one in as we were doing that: make + * sure it's written, then check again. */ + virtio_mb(vrh->weak_barriers); + + if (getu16(&avail, &vrh->vring.avail->idx) != 0) { + vringh_bad("Failed to check avail idx at %p", + &vrh->vring.avail->idx); + return true; + } + + /* This is unlikely, so we just leave notifications enabled + * (if we're using event_indices, we'll only get one + * notification anyway). */ + return avail == vrh->last_avail_idx; +} + +static inline void __vringh_notify_disable(struct vringh *vrh, + int (*putu16)(u16 *p, u16 val)) +{ + if (!vrh->event_indices) { + /* Old-school; update flags. */ + if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) { + vringh_bad("Setting used flags %p", + &vrh->vring.used->flags); + } + } +} + +/* Userspace access helpers: in this case, addresses are really userspace. */ +static inline int getu16_user(u16 *val, const u16 *p) +{ + return get_user(*val, (__force u16 __user *)p); +} + +static inline int putu16_user(u16 *p, u16 val) +{ + return put_user(val, (__force u16 __user *)p); +} + +static inline int copydesc_user(void *dst, const void *src, size_t len) +{ + return copy_from_user(dst, (__force void __user *)src, len) ? + -EFAULT : 0; +} + +static inline int putused_user(struct vring_used_elem *dst, + const struct vring_used_elem *src, + unsigned int num) +{ + return copy_to_user((__force void __user *)dst, src, + sizeof(*dst) * num) ? -EFAULT : 0; +} + +static inline int xfer_from_user(void *src, void *dst, size_t len) +{ + return copy_from_user(dst, (__force void __user *)src, len) ? + -EFAULT : 0; +} + +static inline int xfer_to_user(void *dst, void *src, size_t len) +{ + return copy_to_user((__force void __user *)dst, src, len) ? + -EFAULT : 0; +} + +/** + * vringh_init_user - initialize a vringh for a userspace vring. + * @vrh: the vringh to initialize. + * @features: the feature bits for this ring. + * @num: the number of elements. + * @weak_barriers: true if we only need memory barriers, not I/O. + * @desc: the userpace descriptor pointer. + * @avail: the userpace avail pointer. + * @used: the userpace used pointer. + * + * Returns an error if num is invalid: you should check pointers + * yourself! + */ +int vringh_init_user(struct vringh *vrh, u32 features, + unsigned int num, bool weak_barriers, + struct vring_desc __user *desc, + struct vring_avail __user *avail, + struct vring_used __user *used) +{ + /* Sane power of 2 please! */ + if (!num || num > 0xffff || (num & (num - 1))) { + vringh_bad("Bad ring size %u", num); + return -EINVAL; + } + + vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); + vrh->weak_barriers = weak_barriers; + vrh->completed = 0; + vrh->last_avail_idx = 0; + vrh->last_used_idx = 0; + vrh->vring.num = num; + /* vring expects kernel addresses, but only used via accessors. */ + vrh->vring.desc = (__force struct vring_desc *)desc; + vrh->vring.avail = (__force struct vring_avail *)avail; + vrh->vring.used = (__force struct vring_used *)used; + return 0; +} +EXPORT_SYMBOL(vringh_init_user); + +/** + * vringh_getdesc_user - get next available descriptor from userspace ring. + * @vrh: the userspace vring. + * @riov: where to put the readable descriptors (or NULL) + * @wiov: where to put the writable descriptors (or NULL) + * @getrange: function to call to check ranges. + * @head: head index we received, for passing to vringh_complete_user(). + * + * Returns 0 if there was no descriptor, 1 if there was, or -errno. + * + * Note that on error return, you can tell the difference between an + * invalid ring and a single invalid descriptor: in the former case, + * *head will be vrh->vring.num. You may be able to ignore an invalid + * descriptor, but there's not much you can do with an invalid ring. + * + * Note that you may need to clean up riov and wiov, even on error! + */ +int vringh_getdesc_user(struct vringh *vrh, + struct vringh_iov *riov, + struct vringh_iov *wiov, + bool (*getrange)(struct vringh *vrh, + u64 addr, struct vringh_range *r), + u16 *head) +{ + int err; + + *head = vrh->vring.num; + err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); + if (err < 0) + return err; + + /* Empty... */ + if (err == vrh->vring.num) + return 0; + + /* We need the layouts to be the identical for this to work */ + BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); + BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != + offsetof(struct vringh_iov, iov)); + BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != + offsetof(struct vringh_iov, i)); + BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != + offsetof(struct vringh_iov, used)); + BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != + offsetof(struct vringh_iov, max_num)); + BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); + BUILD_BUG_ON(offsetof(struct iovec, iov_base) != + offsetof(struct kvec, iov_base)); + BUILD_BUG_ON(offsetof(struct iovec, iov_len) != + offsetof(struct kvec, iov_len)); + BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) + != sizeof(((struct kvec *)NULL)->iov_base)); + BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) + != sizeof(((struct kvec *)NULL)->iov_len)); + + *head = err; + err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, + (struct vringh_kiov *)wiov, + range_check, getrange, GFP_KERNEL, copydesc_user); + if (err) + return err; + + return 1; +} +EXPORT_SYMBOL(vringh_getdesc_user); + +/** + * vringh_iov_pull_user - copy bytes from vring_iov. + * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) + * @dst: the place to copy. + * @len: the maximum length to copy. + * + * Returns the bytes copied <= len or a negative errno. + */ +ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) +{ + return vringh_iov_xfer((struct vringh_kiov *)riov, + dst, len, xfer_from_user); +} +EXPORT_SYMBOL(vringh_iov_pull_user); + +/** + * vringh_iov_push_user - copy bytes into vring_iov. + * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) + * @dst: the place to copy. + * @len: the maximum length to copy. + * + * Returns the bytes copied <= len or a negative errno. + */ +ssize_t vringh_iov_push_user(struct vringh_iov *wiov, + const void *src, size_t len) +{ + return vringh_iov_xfer((struct vringh_kiov *)wiov, + (void *)src, len, xfer_to_user); +} +EXPORT_SYMBOL(vringh_iov_push_user); + +/** + * vringh_abandon_user - we've decided not to handle the descriptor(s). + * @vrh: the vring. + * @num: the number of descriptors to put back (ie. num + * vringh_get_user() to undo). + * + * The next vringh_get_user() will return the old descriptor(s) again. + */ +void vringh_abandon_user(struct vringh *vrh, unsigned int num) +{ + /* We only update vring_avail_event(vr) when we want to be notified, + * so we haven't changed that yet. */ + vrh->last_avail_idx -= num; +} +EXPORT_SYMBOL(vringh_abandon_user); + +/** + * vringh_complete_user - we've finished with descriptor, publish it. + * @vrh: the vring. + * @head: the head as filled in by vringh_getdesc_user. + * @len: the length of data we have written. + * + * You should check vringh_need_notify_user() after one or more calls + * to this function. + */ +int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) +{ + struct vring_used_elem used; + + used.id = head; + used.len = len; + return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); +} +EXPORT_SYMBOL(vringh_complete_user); + +/** + * vringh_complete_multi_user - we've finished with many descriptors. + * @vrh: the vring. + * @used: the head, length pairs. + * @num_used: the number of used elements. + * + * You should check vringh_need_notify_user() after one or more calls + * to this function. + */ +int vringh_complete_multi_user(struct vringh *vrh, + const struct vring_used_elem used[], + unsigned num_used) +{ + return __vringh_complete(vrh, used, num_used, + putu16_user, putused_user); +} +EXPORT_SYMBOL(vringh_complete_multi_user); + +/** + * vringh_notify_enable_user - we want to know if something changes. + * @vrh: the vring. + * + * This always enables notifications, but returns false if there are + * now more buffers available in the vring. + */ +bool vringh_notify_enable_user(struct vringh *vrh) +{ + return __vringh_notify_enable(vrh, getu16_user, putu16_user); +} +EXPORT_SYMBOL(vringh_notify_enable_user); + +/** + * vringh_notify_disable_user - don't tell us if something changes. + * @vrh: the vring. + * + * This is our normal running state: we disable and then only enable when + * we're going to sleep. + */ +void vringh_notify_disable_user(struct vringh *vrh) +{ + __vringh_notify_disable(vrh, putu16_user); +} +EXPORT_SYMBOL(vringh_notify_disable_user); + +/** + * vringh_need_notify_user - must we tell the other side about used buffers? + * @vrh: the vring we've called vringh_complete_user() on. + * + * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. + */ +int vringh_need_notify_user(struct vringh *vrh) +{ + return __vringh_need_notify(vrh, getu16_user); +} +EXPORT_SYMBOL(vringh_need_notify_user); + +/* Kernelspace access helpers. */ +static inline int getu16_kern(u16 *val, const u16 *p) +{ + *val = ACCESS_ONCE(*p); + return 0; +} + +static inline int putu16_kern(u16 *p, u16 val) +{ + ACCESS_ONCE(*p) = val; + return 0; +} + +static inline int copydesc_kern(void *dst, const void *src, size_t len) +{ + memcpy(dst, src, len); + return 0; +} + +static inline int putused_kern(struct vring_used_elem *dst, + const struct vring_used_elem *src, + unsigned int num) +{ + memcpy(dst, src, num * sizeof(*dst)); + return 0; +} + +static inline int xfer_kern(void *src, void *dst, size_t len) +{ + memcpy(dst, src, len); + return 0; +} + +/** + * vringh_init_kern - initialize a vringh for a kernelspace vring. + * @vrh: the vringh to initialize. + * @features: the feature bits for this ring. + * @num: the number of elements. + * @weak_barriers: true if we only need memory barriers, not I/O. + * @desc: the userpace descriptor pointer. + * @avail: the userpace avail pointer. + * @used: the userpace used pointer. + * + * Returns an error if num is invalid. + */ +int vringh_init_kern(struct vringh *vrh, u32 features, + unsigned int num, bool weak_barriers, + struct vring_desc *desc, + struct vring_avail *avail, + struct vring_used *used) +{ + /* Sane power of 2 please! */ + if (!num || num > 0xffff || (num & (num - 1))) { + vringh_bad("Bad ring size %u", num); + return -EINVAL; + } + + vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); + vrh->weak_barriers = weak_barriers; + vrh->completed = 0; + vrh->last_avail_idx = 0; + vrh->last_used_idx = 0; + vrh->vring.num = num; + vrh->vring.desc = desc; + vrh->vring.avail = avail; + vrh->vring.used = used; + return 0; +} +EXPORT_SYMBOL(vringh_init_kern); + +/** + * vringh_getdesc_kern - get next available descriptor from kernelspace ring. + * @vrh: the kernelspace vring. + * @riov: where to put the readable descriptors (or NULL) + * @wiov: where to put the writable descriptors (or NULL) + * @head: head index we received, for passing to vringh_complete_kern(). + * @gfp: flags for allocating larger riov/wiov. + * + * Returns 0 if there was no descriptor, 1 if there was, or -errno. + * + * Note that on error return, you can tell the difference between an + * invalid ring and a single invalid descriptor: in the former case, + * *head will be vrh->vring.num. You may be able to ignore an invalid + * descriptor, but there's not much you can do with an invalid ring. + * + * Note that you may need to clean up riov and wiov, even on error! + */ +int vringh_getdesc_kern(struct vringh *vrh, + struct vringh_kiov *riov, + struct vringh_kiov *wiov, + u16 *head, + gfp_t gfp) +{ + int err; + + err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); + if (err < 0) + return err; + + /* Empty... */ + if (err == vrh->vring.num) + return 0; + + *head = err; + err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, + gfp, copydesc_kern); + if (err) + return err; + + return 1; +} +EXPORT_SYMBOL(vringh_getdesc_kern); + +/** + * vringh_iov_pull_kern - copy bytes from vring_iov. + * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) + * @dst: the place to copy. + * @len: the maximum length to copy. + * + * Returns the bytes copied <= len or a negative errno. + */ +ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) +{ + return vringh_iov_xfer(riov, dst, len, xfer_kern); +} +EXPORT_SYMBOL(vringh_iov_pull_kern); + +/** + * vringh_iov_push_kern - copy bytes into vring_iov. + * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) + * @dst: the place to copy. + * @len: the maximum length to copy. + * + * Returns the bytes copied <= len or a negative errno. + */ +ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, + const void *src, size_t len) +{ + return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern); +} +EXPORT_SYMBOL(vringh_iov_push_kern); + +/** + * vringh_abandon_kern - we've decided not to handle the descriptor(s). + * @vrh: the vring. + * @num: the number of descriptors to put back (ie. num + * vringh_get_kern() to undo). + * + * The next vringh_get_kern() will return the old descriptor(s) again. + */ +void vringh_abandon_kern(struct vringh *vrh, unsigned int num) +{ + /* We only update vring_avail_event(vr) when we want to be notified, + * so we haven't changed that yet. */ + vrh->last_avail_idx -= num; +} +EXPORT_SYMBOL(vringh_abandon_kern); + +/** + * vringh_complete_kern - we've finished with descriptor, publish it. + * @vrh: the vring. + * @head: the head as filled in by vringh_getdesc_kern. + * @len: the length of data we have written. + * + * You should check vringh_need_notify_kern() after one or more calls + * to this function. + */ +int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) +{ + struct vring_used_elem used; + + used.id = head; + used.len = len; + + return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); +} +EXPORT_SYMBOL(vringh_complete_kern); + +/** + * vringh_notify_enable_kern - we want to know if something changes. + * @vrh: the vring. + * + * This always enables notifications, but returns false if there are + * now more buffers available in the vring. + */ +bool vringh_notify_enable_kern(struct vringh *vrh) +{ + return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); +} +EXPORT_SYMBOL(vringh_notify_enable_kern); + +/** + * vringh_notify_disable_kern - don't tell us if something changes. + * @vrh: the vring. + * + * This is our normal running state: we disable and then only enable when + * we're going to sleep. + */ +void vringh_notify_disable_kern(struct vringh *vrh) +{ + __vringh_notify_disable(vrh, putu16_kern); +} +EXPORT_SYMBOL(vringh_notify_disable_kern); + +/** + * vringh_need_notify_kern - must we tell the other side about used buffers? + * @vrh: the vring we've called vringh_complete_kern() on. + * + * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. + */ +int vringh_need_notify_kern(struct vringh *vrh) +{ + return __vringh_need_notify(vrh, getu16_kern); +} +EXPORT_SYMBOL(vringh_need_notify_kern); diff --git a/include/linux/vringh.h b/include/linux/vringh.h new file mode 100644 index 000000000000..b8f086625c49 --- /dev/null +++ b/include/linux/vringh.h @@ -0,0 +1,196 @@ +/* + * Linux host-side vring helpers; for when the kernel needs to access + * someone else's vring. + * + * Copyright IBM Corporation, 2013. + * Parts taken from drivers/vhost/vhost.c Copyright 2009 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Written by: Rusty Russell + */ +#ifndef _LINUX_VRINGH_H +#define _LINUX_VRINGH_H +#include +#include +#include +#include + +/* virtio_ring with information needed for host access. */ +struct vringh { + /* Guest publishes used event idx (note: we always do). */ + bool event_indices; + + /* Can we get away with weak barriers? */ + bool weak_barriers; + + /* Last available index we saw (ie. where we're up to). */ + u16 last_avail_idx; + + /* Last index we used. */ + u16 last_used_idx; + + /* How many descriptors we've completed since last need_notify(). */ + u32 completed; + + /* The vring (note: it may contain user pointers!) */ + struct vring vring; +}; + +/* The memory the vring can access, and what offset to apply. */ +struct vringh_range { + u64 start, end_incl; + u64 offset; +}; + +/** + * struct vringh_iov - iovec mangler. + * + * Mangles iovec in place, and restores it. + * Remaining data is iov + i, of used - i elements. + */ +struct vringh_iov { + struct iovec *iov; + size_t consumed; /* Within iov[i] */ + unsigned i, used, max_num; +}; + +/** + * struct vringh_iov - kvec mangler. + * + * Mangles kvec in place, and restores it. + * Remaining data is iov + i, of used - i elements. + */ +struct vringh_kiov { + struct kvec *iov; + size_t consumed; /* Within iov[i] */ + unsigned i, used, max_num; +}; + +/* Flag on max_num to indicate we're kmalloced. */ +#define VRINGH_IOV_ALLOCATED 0x8000000 + +/* Helpers for userspace vrings. */ +int vringh_init_user(struct vringh *vrh, u32 features, + unsigned int num, bool weak_barriers, + struct vring_desc __user *desc, + struct vring_avail __user *avail, + struct vring_used __user *used); + +static inline void vringh_iov_init(struct vringh_iov *iov, + struct iovec *iovec, unsigned num) +{ + iov->used = iov->i = 0; + iov->consumed = 0; + iov->max_num = num; + iov->iov = iovec; +} + +static inline void vringh_iov_reset(struct vringh_iov *iov) +{ + iov->iov[iov->i].iov_len += iov->consumed; + iov->iov[iov->i].iov_base -= iov->consumed; + iov->consumed = 0; + iov->i = 0; +} + +static inline void vringh_iov_cleanup(struct vringh_iov *iov) +{ + if (iov->max_num & VRINGH_IOV_ALLOCATED) + kfree(iov->iov); + iov->max_num = iov->used = iov->i = iov->consumed = 0; + iov->iov = NULL; +} + +/* Convert a descriptor into iovecs. */ +int vringh_getdesc_user(struct vringh *vrh, + struct vringh_iov *riov, + struct vringh_iov *wiov, + bool (*getrange)(struct vringh *vrh, + u64 addr, struct vringh_range *r), + u16 *head); + +/* Copy bytes from readable vsg, consuming it (and incrementing wiov->i). */ +ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len); + +/* Copy bytes into writable vsg, consuming it (and incrementing wiov->i). */ +ssize_t vringh_iov_push_user(struct vringh_iov *wiov, + const void *src, size_t len); + +/* Mark a descriptor as used. */ +int vringh_complete_user(struct vringh *vrh, u16 head, u32 len); +int vringh_complete_multi_user(struct vringh *vrh, + const struct vring_used_elem used[], + unsigned num_used); + +/* Pretend we've never seen descriptor (for easy error handling). */ +void vringh_abandon_user(struct vringh *vrh, unsigned int num); + +/* Do we need to fire the eventfd to notify the other side? */ +int vringh_need_notify_user(struct vringh *vrh); + +bool vringh_notify_enable_user(struct vringh *vrh); +void vringh_notify_disable_user(struct vringh *vrh); + +/* Helpers for kernelspace vrings. */ +int vringh_init_kern(struct vringh *vrh, u32 features, + unsigned int num, bool weak_barriers, + struct vring_desc *desc, + struct vring_avail *avail, + struct vring_used *used); + +static inline void vringh_kiov_init(struct vringh_kiov *kiov, + struct kvec *kvec, unsigned num) +{ + kiov->used = kiov->i = 0; + kiov->consumed = 0; + kiov->max_num = num; + kiov->iov = kvec; +} + +static inline void vringh_kiov_reset(struct vringh_kiov *kiov) +{ + kiov->iov[kiov->i].iov_len += kiov->consumed; + kiov->iov[kiov->i].iov_base -= kiov->consumed; + kiov->consumed = 0; + kiov->i = 0; +} + +static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov) +{ + if (kiov->max_num & VRINGH_IOV_ALLOCATED) + kfree(kiov->iov); + kiov->max_num = kiov->used = kiov->i = kiov->consumed = 0; + kiov->iov = NULL; +} + +int vringh_getdesc_kern(struct vringh *vrh, + struct vringh_kiov *riov, + struct vringh_kiov *wiov, + u16 *head, + gfp_t gfp); + +ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len); +ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, + const void *src, size_t len); +void vringh_abandon_kern(struct vringh *vrh, unsigned int num); +int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len); + +bool vringh_notify_enable_kern(struct vringh *vrh); +void vringh_notify_disable_kern(struct vringh *vrh); + +int vringh_need_notify_kern(struct vringh *vrh); + +#endif /* _LINUX_VRINGH_H */ -- cgit From 3beee86a4b9374e38dba36b44e81f1423a0d6b54 Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Wed, 20 Mar 2013 13:51:24 +1030 Subject: virtio: Introduce vringh wrappers in virtio_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add wrappers for the host vrings to support loose coupling between the virtio device and driver. A new struct vringh_config_ops with the functions find_vrhs() and del_vrhs() is added to the virtio_device struct. This enables virtio drivers to manage virtio host rings without detailed knowledge of how the vrings are created and deleted. The function vringh_notify() is added so vringh clients can notify the other side that buffers are added to the used-ring. Cc: Ohad Ben-Cohen Signed-off-by: Sjur Brændeland Signed-off-by: Rusty Russell (constified vringh_config) --- include/linux/virtio.h | 3 +++ include/linux/vringh.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ff6714e6d0f5..5d5b3abc283d 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -8,6 +8,7 @@ #include #include #include +#include /** * virtqueue - a queue to register buffers for sending or receiving. @@ -70,6 +71,7 @@ static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq) * @dev: underlying device. * @id: the device type identification (used to match it with a driver). * @config: the configuration ops for this device. + * @vringh_config: configuration ops for host vrings. * @vqs: the list of virtqueues for this device. * @features: the features supported by both driver and device. * @priv: private pointer for the driver's use. @@ -79,6 +81,7 @@ struct virtio_device { struct device dev; struct virtio_device_id id; const struct virtio_config_ops *config; + const struct vringh_config_ops *vringh_config; struct list_head vqs; /* Note that this is a Linux set_bit-style bitmap. */ unsigned long features[1]; diff --git a/include/linux/vringh.h b/include/linux/vringh.h index b8f086625c49..749cde28728b 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -47,6 +47,28 @@ struct vringh { /* The vring (note: it may contain user pointers!) */ struct vring vring; + + /* The function to call to notify the guest about added buffers */ + void (*notify)(struct vringh *); +}; + +/** + * struct vringh_config_ops - ops for creating a host vring from a virtio driver + * @find_vrhs: find the host vrings and instantiate them + * vdev: the virtio_device + * nhvrs: the number of host vrings to find + * hvrs: on success, includes new host vrings + * callbacks: array of driver callbacks, for each host vring + * include a NULL entry for vqs that do not need a callback + * Returns 0 on success or error status + * @del_vrhs: free the host vrings found by find_vrhs(). + */ +struct virtio_device; +typedef void vrh_callback_t(struct virtio_device *, struct vringh *); +struct vringh_config_ops { + int (*find_vrhs)(struct virtio_device *vdev, unsigned nhvrs, + struct vringh *vrhs[], vrh_callback_t *callbacks[]); + void (*del_vrhs)(struct virtio_device *vdev); }; /* The memory the vring can access, and what offset to apply. */ @@ -193,4 +215,11 @@ void vringh_notify_disable_kern(struct vringh *vrh); int vringh_need_notify_kern(struct vringh *vrh); +/* Notify the guest about buffers added to the used ring */ +static inline void vringh_notify(struct vringh *vrh) +{ + if (vrh->notify) + vrh->notify(vrh); +} + #endif /* _LINUX_VRINGH_H */ -- cgit From 0d2e1a2926b1839a4b74519e660739b2566c9386 Mon Sep 17 00:00:00 2001 From: Erwan Yvin Date: Wed, 20 Mar 2013 13:52:24 +1030 Subject: caif_virtio: Introduce caif over virtio Add the CAIF Virtio shared memory driver for talking to a modem. This CAIF Link layer communicates to the modem over shared memory. It is implemented as a virtio_driver. The underlying virtio device is managed by the remoteproc framework. The Virtio queue is used for transmitting data to the modem, and the new vringh is used for receiving data. Genalloc is used for managing the shared memory used for TX data. The default dma-alloc-coherent allocator can only allocate whole pages, and this wastes too much shared memory. Flow control is implemented by stopping the TX-queues if the virtio queues go full or we run out of memory. Queued are reopened when queues are below the watermark. NAPI is used in RX path, and a dedicated tasklet is used for releasing TX buffers. Signed-off-by: Erwan Yvin Acked-by: David S. Miller Signed-off-by: Rusty Russell (minor fixes) --- drivers/net/caif/Kconfig | 14 + drivers/net/caif/Makefile | 3 + drivers/net/caif/caif_virtio.c | 785 ++++++++++++++++++++++++++++++++++++++++ include/linux/virtio_caif.h | 24 ++ include/uapi/linux/virtio_ids.h | 1 + 5 files changed, 827 insertions(+) create mode 100644 drivers/net/caif/caif_virtio.c create mode 100644 include/linux/virtio_caif.h (limited to 'include/linux') diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 60c2142373c9..893f9154011e 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -47,3 +47,17 @@ config CAIF_HSI The caif low level driver for CAIF over HSI. Be aware that if you enable this then you also need to enable a low-level HSI driver. + +config CAIF_VIRTIO + tristate "CAIF virtio transport driver" + depends on CAIF + select VHOST_RING + select VIRTIO + select GENERIC_ALLOCATOR + default n + ---help--- + The caif driver for CAIF over Virtio. + +if CAIF_VIRTIO +source "drivers/vhost/Kconfig" +endif diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile index 91dff861560f..d9ee26a96c6e 100644 --- a/drivers/net/caif/Makefile +++ b/drivers/net/caif/Makefile @@ -13,3 +13,6 @@ obj-$(CONFIG_CAIF_SHM) += caif_shm.o # HSI interface obj-$(CONFIG_CAIF_HSI) += caif_hsi.o + +# Virtio interface +obj-$(CONFIG_CAIF_VIRTIO) += caif_virtio.o diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c new file mode 100644 index 000000000000..b1e1205e4e28 --- /dev/null +++ b/drivers/net/caif/caif_virtio.c @@ -0,0 +1,785 @@ +/* + * Copyright (C) ST-Ericsson AB 2013 + * Authors: Vicram Arv / vikram.arv@stericsson.com, + * Dmitry Tarnyagin / dmitry.tarnyagin@stericsson.com + * Sjur Brendeland / sjur.brandeland@stericsson.com + * License terms: GNU General Public License (GPL) version 2 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Vicram Arv "); +MODULE_AUTHOR("Sjur Brendeland "); +MODULE_DESCRIPTION("Virtio CAIF Driver"); + +/* NAPI schedule quota */ +#define CFV_DEFAULT_QUOTA 32 + +/* Defaults used if virtio config space is unavailable */ +#define CFV_DEF_MTU_SIZE 4096 +#define CFV_DEF_HEADROOM 32 +#define CFV_DEF_TAILROOM 32 + +/* Required IP header alignment */ +#define IP_HDR_ALIGN 4 + +/* struct cfv_napi_contxt - NAPI context info + * @riov: IOV holding data read from the ring. Note that riov may + * still hold data when cfv_rx_poll() returns. + * @head: Last descriptor ID we received from vringh_getdesc_kern. + * We use this to put descriptor back on the used ring. USHRT_MAX is + * used to indicate invalid head-id. + */ +struct cfv_napi_context { + struct vringh_kiov riov; + unsigned short head; +}; + +/* struct cfv_stats - statistics for debugfs + * @rx_napi_complete: Number of NAPI completions (RX) + * @rx_napi_resched: Number of calls where the full quota was used (RX) + * @rx_nomem: Number of SKB alloc failures (RX) + * @rx_kicks: Number of RX kicks + * @tx_full_ring: Number times TX ring was full + * @tx_no_mem: Number of times TX went out of memory + * @tx_flow_on: Number of flow on (TX) + * @tx_kicks: Number of TX kicks + */ +struct cfv_stats { + u32 rx_napi_complete; + u32 rx_napi_resched; + u32 rx_nomem; + u32 rx_kicks; + u32 tx_full_ring; + u32 tx_no_mem; + u32 tx_flow_on; + u32 tx_kicks; +}; + +/* struct cfv_info - Caif Virtio control structure + * @cfdev: caif common header + * @vdev: Associated virtio device + * @vr_rx: rx/downlink host vring + * @vq_tx: tx/uplink virtqueue + * @ndev: CAIF link layer device + * @watermark_tx: indicates number of free descriptors we need + * to reopen the tx-queues after overload. + * @tx_lock: protects vq_tx from concurrent use + * @tx_release_tasklet: Tasklet for freeing consumed TX buffers + * @napi: Napi context used in cfv_rx_poll() + * @ctx: Context data used in cfv_rx_poll() + * @tx_hr: transmit headroom + * @rx_hr: receive headroom + * @tx_tr: transmit tail room + * @rx_tr: receive tail room + * @mtu: transmit max size + * @mru: receive max size + * @allocsz: size of dma memory reserved for TX buffers + * @alloc_addr: virtual address to dma memory for TX buffers + * @alloc_dma: dma address to dma memory for TX buffers + * @genpool: Gen Pool used for allocating TX buffers + * @reserved_mem: Pointer to memory reserve allocated from genpool + * @reserved_size: Size of memory reserve allocated from genpool + * @stats: Statistics exposed in sysfs + * @debugfs: Debugfs dentry for statistic counters + */ +struct cfv_info { + struct caif_dev_common cfdev; + struct virtio_device *vdev; + struct vringh *vr_rx; + struct virtqueue *vq_tx; + struct net_device *ndev; + unsigned int watermark_tx; + /* Protect access to vq_tx */ + spinlock_t tx_lock; + struct tasklet_struct tx_release_tasklet; + struct napi_struct napi; + struct cfv_napi_context ctx; + u16 tx_hr; + u16 rx_hr; + u16 tx_tr; + u16 rx_tr; + u32 mtu; + u32 mru; + size_t allocsz; + void *alloc_addr; + dma_addr_t alloc_dma; + struct gen_pool *genpool; + unsigned long reserved_mem; + size_t reserved_size; + struct cfv_stats stats; + struct dentry *debugfs; +}; + +/* struct buf_info - maintains transmit buffer data handle + * @size: size of transmit buffer + * @dma_handle: handle to allocated dma device memory area + * @vaddr: virtual address mapping to allocated memory area + */ +struct buf_info { + size_t size; + u8 *vaddr; +}; + +/* Called from virtio device, in IRQ context */ +static void cfv_release_cb(struct virtqueue *vq_tx) +{ + struct cfv_info *cfv = vq_tx->vdev->priv; + + ++cfv->stats.tx_kicks; + tasklet_schedule(&cfv->tx_release_tasklet); +} + +static void free_buf_info(struct cfv_info *cfv, struct buf_info *buf_info) +{ + if (!buf_info) + return; + gen_pool_free(cfv->genpool, (unsigned long) buf_info->vaddr, + buf_info->size); + kfree(buf_info); +} + +/* This is invoked whenever the remote processor completed processing + * a TX msg we just sent, and the buffer is put back to the used ring. + */ +static void cfv_release_used_buf(struct virtqueue *vq_tx) +{ + struct cfv_info *cfv = vq_tx->vdev->priv; + unsigned long flags; + + BUG_ON(vq_tx != cfv->vq_tx); + + for (;;) { + unsigned int len; + struct buf_info *buf_info; + + /* Get used buffer from used ring to recycle used descriptors */ + spin_lock_irqsave(&cfv->tx_lock, flags); + buf_info = virtqueue_get_buf(vq_tx, &len); + spin_unlock_irqrestore(&cfv->tx_lock, flags); + + /* Stop looping if there are no more buffers to free */ + if (!buf_info) + break; + + free_buf_info(cfv, buf_info); + + /* watermark_tx indicates if we previously stopped the tx + * queues. If we have enough free stots in the virtio ring, + * re-establish memory reserved and open up tx queues. + */ + if (cfv->vq_tx->num_free <= cfv->watermark_tx) + continue; + + /* Re-establish memory reserve */ + if (cfv->reserved_mem == 0 && cfv->genpool) + cfv->reserved_mem = + gen_pool_alloc(cfv->genpool, + cfv->reserved_size); + + /* Open up the tx queues */ + if (cfv->reserved_mem) { + cfv->watermark_tx = + virtqueue_get_vring_size(cfv->vq_tx); + netif_tx_wake_all_queues(cfv->ndev); + /* Buffers are recycled in cfv_netdev_tx, so + * disable notifications when queues are opened. + */ + virtqueue_disable_cb(cfv->vq_tx); + ++cfv->stats.tx_flow_on; + } else { + /* if no memory reserve, wait for more free slots */ + WARN_ON(cfv->watermark_tx > + virtqueue_get_vring_size(cfv->vq_tx)); + cfv->watermark_tx += + virtqueue_get_vring_size(cfv->vq_tx) / 4; + } + } +} + +/* Allocate a SKB and copy packet data to it */ +static struct sk_buff *cfv_alloc_and_copy_skb(int *err, + struct cfv_info *cfv, + u8 *frm, u32 frm_len) +{ + struct sk_buff *skb; + u32 cfpkt_len, pad_len; + + *err = 0; + /* Verify that packet size with down-link header and mtu size */ + if (frm_len > cfv->mru || frm_len <= cfv->rx_hr + cfv->rx_tr) { + netdev_err(cfv->ndev, + "Invalid frmlen:%u mtu:%u hr:%d tr:%d\n", + frm_len, cfv->mru, cfv->rx_hr, + cfv->rx_tr); + *err = -EPROTO; + return NULL; + } + + cfpkt_len = frm_len - (cfv->rx_hr + cfv->rx_tr); + pad_len = (unsigned long)(frm + cfv->rx_hr) & (IP_HDR_ALIGN - 1); + + skb = netdev_alloc_skb(cfv->ndev, frm_len + pad_len); + if (!skb) { + *err = -ENOMEM; + return NULL; + } + + skb_reserve(skb, cfv->rx_hr + pad_len); + + memcpy(skb_put(skb, cfpkt_len), frm + cfv->rx_hr, cfpkt_len); + return skb; +} + +/* Get packets from the host vring */ +static int cfv_rx_poll(struct napi_struct *napi, int quota) +{ + struct cfv_info *cfv = container_of(napi, struct cfv_info, napi); + int rxcnt = 0; + int err = 0; + void *buf; + struct sk_buff *skb; + struct vringh_kiov *riov = &cfv->ctx.riov; + unsigned int skb_len; + +again: + do { + skb = NULL; + + /* Put the previous iovec back on the used ring and + * fetch a new iovec if we have processed all elements. + */ + if (riov->i == riov->used) { + if (cfv->ctx.head != USHRT_MAX) { + vringh_complete_kern(cfv->vr_rx, + cfv->ctx.head, + 0); + cfv->ctx.head = USHRT_MAX; + } + + err = vringh_getdesc_kern( + cfv->vr_rx, + riov, + NULL, + &cfv->ctx.head, + GFP_ATOMIC); + + if (err <= 0) + goto exit; + } + + buf = phys_to_virt((unsigned long) riov->iov[riov->i].iov_base); + /* TODO: Add check on valid buffer address */ + + skb = cfv_alloc_and_copy_skb(&err, cfv, buf, + riov->iov[riov->i].iov_len); + if (unlikely(err)) + goto exit; + + /* Push received packet up the stack. */ + skb_len = skb->len; + skb->protocol = htons(ETH_P_CAIF); + skb_reset_mac_header(skb); + skb->dev = cfv->ndev; + err = netif_receive_skb(skb); + if (unlikely(err)) { + ++cfv->ndev->stats.rx_dropped; + } else { + ++cfv->ndev->stats.rx_packets; + cfv->ndev->stats.rx_bytes += skb_len; + } + + ++riov->i; + ++rxcnt; + } while (rxcnt < quota); + + ++cfv->stats.rx_napi_resched; + goto out; + +exit: + switch (err) { + case 0: + ++cfv->stats.rx_napi_complete; + + /* Really out of patckets? (stolen from virtio_net)*/ + napi_complete(napi); + if (unlikely(vringh_notify_enable_kern(cfv->vr_rx)) && + napi_schedule_prep(napi)) { + vringh_notify_disable_kern(cfv->vr_rx); + __napi_schedule(napi); + goto again; + } + break; + + case -ENOMEM: + ++cfv->stats.rx_nomem; + dev_kfree_skb(skb); + /* Stop NAPI poll on OOM, we hope to be polled later */ + napi_complete(napi); + vringh_notify_enable_kern(cfv->vr_rx); + break; + + default: + /* We're doomed, any modem fault is fatal */ + netdev_warn(cfv->ndev, "Bad ring, disable device\n"); + cfv->ndev->stats.rx_dropped = riov->used - riov->i; + napi_complete(napi); + vringh_notify_disable_kern(cfv->vr_rx); + netif_carrier_off(cfv->ndev); + break; + } +out: + if (rxcnt && vringh_need_notify_kern(cfv->vr_rx) > 0) + vringh_notify(cfv->vr_rx); + return rxcnt; +} + +static void cfv_recv(struct virtio_device *vdev, struct vringh *vr_rx) +{ + struct cfv_info *cfv = vdev->priv; + + ++cfv->stats.rx_kicks; + vringh_notify_disable_kern(cfv->vr_rx); + napi_schedule(&cfv->napi); +} + +static void cfv_destroy_genpool(struct cfv_info *cfv) +{ + if (cfv->alloc_addr) + dma_free_coherent(cfv->vdev->dev.parent->parent, + cfv->allocsz, cfv->alloc_addr, + cfv->alloc_dma); + + if (!cfv->genpool) + return; + gen_pool_free(cfv->genpool, cfv->reserved_mem, + cfv->reserved_size); + gen_pool_destroy(cfv->genpool); + cfv->genpool = NULL; +} + +static int cfv_create_genpool(struct cfv_info *cfv) +{ + int err; + + /* dma_alloc can only allocate whole pages, and we need a more + * fine graned allocation so we use genpool. We ask for space needed + * by IP and a full ring. If the dma allcoation fails we retry with a + * smaller allocation size. + */ + err = -ENOMEM; + cfv->allocsz = (virtqueue_get_vring_size(cfv->vq_tx) * + (ETH_DATA_LEN + cfv->tx_hr + cfv->tx_tr) * 11)/10; + if (cfv->allocsz <= (num_possible_cpus() + 1) * cfv->ndev->mtu) + return -EINVAL; + + for (;;) { + if (cfv->allocsz <= num_possible_cpus() * cfv->ndev->mtu) { + netdev_info(cfv->ndev, "Not enough device memory\n"); + return -ENOMEM; + } + + cfv->alloc_addr = dma_alloc_coherent( + cfv->vdev->dev.parent->parent, + cfv->allocsz, &cfv->alloc_dma, + GFP_ATOMIC); + if (cfv->alloc_addr) + break; + + cfv->allocsz = (cfv->allocsz * 3) >> 2; + } + + netdev_dbg(cfv->ndev, "Allocated %zd bytes from dma-memory\n", + cfv->allocsz); + + /* Allocate on 128 bytes boundaries (1 << 7)*/ + cfv->genpool = gen_pool_create(7, -1); + if (!cfv->genpool) + goto err; + + err = gen_pool_add_virt(cfv->genpool, (unsigned long)cfv->alloc_addr, + (phys_addr_t)virt_to_phys(cfv->alloc_addr), + cfv->allocsz, -1); + if (err) + goto err; + + /* Reserve some memory for low memory situations. If we hit the roof + * in the memory pool, we stop TX flow and release the reserve. + */ + cfv->reserved_size = num_possible_cpus() * cfv->ndev->mtu; + cfv->reserved_mem = gen_pool_alloc(cfv->genpool, + cfv->reserved_size); + if (!cfv->reserved_mem) + goto err; + + cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx); + return 0; +err: + cfv_destroy_genpool(cfv); + return err; +} + +/* Enable the CAIF interface and allocate the memory-pool */ +static int cfv_netdev_open(struct net_device *netdev) +{ + struct cfv_info *cfv = netdev_priv(netdev); + + if (cfv_create_genpool(cfv)) + return -ENOMEM; + + netif_carrier_on(netdev); + napi_enable(&cfv->napi); + + /* Schedule NAPI to read any pending packets */ + napi_schedule(&cfv->napi); + return 0; +} + +/* Disable the CAIF interface and free the memory-pool */ +static int cfv_netdev_close(struct net_device *netdev) +{ + struct cfv_info *cfv = netdev_priv(netdev); + unsigned long flags; + struct buf_info *buf_info; + + /* Disable interrupts, queues and NAPI polling */ + netif_carrier_off(netdev); + virtqueue_disable_cb(cfv->vq_tx); + vringh_notify_disable_kern(cfv->vr_rx); + napi_disable(&cfv->napi); + + /* Release any TX buffers on both used and avilable rings */ + cfv_release_used_buf(cfv->vq_tx); + spin_lock_irqsave(&cfv->tx_lock, flags); + while ((buf_info = virtqueue_detach_unused_buf(cfv->vq_tx))) + free_buf_info(cfv, buf_info); + spin_unlock_irqrestore(&cfv->tx_lock, flags); + + /* Release all dma allocated memory and destroy the pool */ + cfv_destroy_genpool(cfv); + return 0; +} + +/* Allocate a buffer in dma-memory and copy skb to it */ +static struct buf_info *cfv_alloc_and_copy_to_shm(struct cfv_info *cfv, + struct sk_buff *skb, + struct scatterlist *sg) +{ + struct caif_payload_info *info = (void *)&skb->cb; + struct buf_info *buf_info = NULL; + u8 pad_len, hdr_ofs; + + if (!cfv->genpool) + goto err; + + if (unlikely(cfv->tx_hr + skb->len + cfv->tx_tr > cfv->mtu)) { + netdev_warn(cfv->ndev, "Invalid packet len (%d > %d)\n", + cfv->tx_hr + skb->len + cfv->tx_tr, cfv->mtu); + goto err; + } + + buf_info = kmalloc(sizeof(struct buf_info), GFP_ATOMIC); + if (unlikely(!buf_info)) + goto err; + + /* Make the IP header aligned in tbe buffer */ + hdr_ofs = cfv->tx_hr + info->hdr_len; + pad_len = hdr_ofs & (IP_HDR_ALIGN - 1); + buf_info->size = cfv->tx_hr + skb->len + cfv->tx_tr + pad_len; + + /* allocate dma memory buffer */ + buf_info->vaddr = (void *)gen_pool_alloc(cfv->genpool, buf_info->size); + if (unlikely(!buf_info->vaddr)) + goto err; + + /* copy skbuf contents to send buffer */ + skb_copy_bits(skb, 0, buf_info->vaddr + cfv->tx_hr + pad_len, skb->len); + sg_init_one(sg, buf_info->vaddr + pad_len, + skb->len + cfv->tx_hr + cfv->rx_hr); + + return buf_info; +err: + kfree(buf_info); + return NULL; +} + +/* Put the CAIF packet on the virtio ring and kick the receiver */ +static int cfv_netdev_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct cfv_info *cfv = netdev_priv(netdev); + struct buf_info *buf_info; + struct scatterlist sg; + unsigned long flags; + bool flow_off = false; + int ret; + + /* garbage collect released buffers */ + cfv_release_used_buf(cfv->vq_tx); + spin_lock_irqsave(&cfv->tx_lock, flags); + + /* Flow-off check takes into account number of cpus to make sure + * virtqueue will not be overfilled in any possible smp conditions. + * + * Flow-on is triggered when sufficient buffers are freed + */ + if (unlikely(cfv->vq_tx->num_free <= num_present_cpus())) { + flow_off = true; + cfv->stats.tx_full_ring++; + } + + /* If we run out of memory, we release the memory reserve and retry + * allocation. + */ + buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg); + if (unlikely(!buf_info)) { + cfv->stats.tx_no_mem++; + flow_off = true; + + if (cfv->reserved_mem && cfv->genpool) { + gen_pool_free(cfv->genpool, cfv->reserved_mem, + cfv->reserved_size); + cfv->reserved_mem = 0; + buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg); + } + } + + if (unlikely(flow_off)) { + /* Turn flow on when a 1/4 of the descriptors are released */ + cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx) / 4; + /* Enable notifications of recycled TX buffers */ + virtqueue_enable_cb(cfv->vq_tx); + netif_tx_stop_all_queues(netdev); + } + + if (unlikely(!buf_info)) { + /* If the memory reserve does it's job, this shouldn't happen */ + netdev_warn(cfv->ndev, "Out of gen_pool memory\n"); + goto err; + } + + ret = virtqueue_add_buf(cfv->vq_tx, &sg, 1, 0, + buf_info, GFP_ATOMIC); + if (unlikely((ret < 0))) { + /* If flow control works, this shouldn't happen */ + netdev_warn(cfv->ndev, "Failed adding buffer to TX vring:%d\n", + ret); + goto err; + } + + /* update netdev statistics */ + cfv->ndev->stats.tx_packets++; + cfv->ndev->stats.tx_bytes += skb->len; + spin_unlock_irqrestore(&cfv->tx_lock, flags); + + /* tell the remote processor it has a pending message to read */ + virtqueue_kick(cfv->vq_tx); + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +err: + spin_unlock_irqrestore(&cfv->tx_lock, flags); + cfv->ndev->stats.tx_dropped++; + free_buf_info(cfv, buf_info); + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void cfv_tx_release_tasklet(unsigned long drv) +{ + struct cfv_info *cfv = (struct cfv_info *)drv; + cfv_release_used_buf(cfv->vq_tx); +} + +static const struct net_device_ops cfv_netdev_ops = { + .ndo_open = cfv_netdev_open, + .ndo_stop = cfv_netdev_close, + .ndo_start_xmit = cfv_netdev_tx, +}; + +static void cfv_netdev_setup(struct net_device *netdev) +{ + netdev->netdev_ops = &cfv_netdev_ops; + netdev->type = ARPHRD_CAIF; + netdev->tx_queue_len = 100; + netdev->flags = IFF_POINTOPOINT | IFF_NOARP; + netdev->mtu = CFV_DEF_MTU_SIZE; + netdev->destructor = free_netdev; +} + +/* Create debugfs counters for the device */ +static inline void debugfs_init(struct cfv_info *cfv) +{ + cfv->debugfs = + debugfs_create_dir(netdev_name(cfv->ndev), NULL); + + if (IS_ERR(cfv->debugfs)) + return; + + debugfs_create_u32("rx-napi-complete", S_IRUSR, cfv->debugfs, + &cfv->stats.rx_napi_complete); + debugfs_create_u32("rx-napi-resched", S_IRUSR, cfv->debugfs, + &cfv->stats.rx_napi_resched); + debugfs_create_u32("rx-nomem", S_IRUSR, cfv->debugfs, + &cfv->stats.rx_nomem); + debugfs_create_u32("rx-kicks", S_IRUSR, cfv->debugfs, + &cfv->stats.rx_kicks); + debugfs_create_u32("tx-full-ring", S_IRUSR, cfv->debugfs, + &cfv->stats.tx_full_ring); + debugfs_create_u32("tx-no-mem", S_IRUSR, cfv->debugfs, + &cfv->stats.tx_no_mem); + debugfs_create_u32("tx-kicks", S_IRUSR, cfv->debugfs, + &cfv->stats.tx_kicks); + debugfs_create_u32("tx-flow-on", S_IRUSR, cfv->debugfs, + &cfv->stats.tx_flow_on); +} + +/* Setup CAIF for the a virtio device */ +static int cfv_probe(struct virtio_device *vdev) +{ + vq_callback_t *vq_cbs = cfv_release_cb; + vrh_callback_t *vrh_cbs = cfv_recv; + const char *names = "output"; + const char *cfv_netdev_name = "cfvrt"; + struct net_device *netdev; + struct cfv_info *cfv; + int err = -EINVAL; + + netdev = alloc_netdev(sizeof(struct cfv_info), cfv_netdev_name, + cfv_netdev_setup); + if (!netdev) + return -ENOMEM; + + cfv = netdev_priv(netdev); + cfv->vdev = vdev; + cfv->ndev = netdev; + + spin_lock_init(&cfv->tx_lock); + + /* Get the RX virtio ring. This is a "host side vring". */ + err = vdev->vringh_config->find_vrhs(vdev, 1, &cfv->vr_rx, &vrh_cbs); + if (err) + goto err; + + /* Get the TX virtio ring. This is a "guest side vring". */ + err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names); + if (err) + goto err; + + /* Get the CAIF configuration from virtio config space, if available */ +#define GET_VIRTIO_CONFIG_OPS(_v, _var, _f) \ + ((_v)->config->get(_v, offsetof(struct virtio_caif_transf_config, _f), \ + &_var, \ + FIELD_SIZEOF(struct virtio_caif_transf_config, _f))) + + if (vdev->config->get) { + GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_hr, headroom); + GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_hr, headroom); + GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_tr, tailroom); + GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_tr, tailroom); + GET_VIRTIO_CONFIG_OPS(vdev, cfv->mtu, mtu); + GET_VIRTIO_CONFIG_OPS(vdev, cfv->mru, mtu); + } else { + cfv->tx_hr = CFV_DEF_HEADROOM; + cfv->rx_hr = CFV_DEF_HEADROOM; + cfv->tx_tr = CFV_DEF_TAILROOM; + cfv->rx_tr = CFV_DEF_TAILROOM; + cfv->mtu = CFV_DEF_MTU_SIZE; + cfv->mru = CFV_DEF_MTU_SIZE; + } + + netdev->needed_headroom = cfv->tx_hr; + netdev->needed_tailroom = cfv->tx_tr; + + /* Disable buffer release interrupts unless we have stopped TX queues */ + virtqueue_disable_cb(cfv->vq_tx); + + netdev->mtu = cfv->mtu - cfv->tx_tr; + vdev->priv = cfv; + + /* Initialize NAPI poll context data */ + vringh_kiov_init(&cfv->ctx.riov, NULL, 0); + cfv->ctx.head = USHRT_MAX; + netif_napi_add(netdev, &cfv->napi, cfv_rx_poll, CFV_DEFAULT_QUOTA); + + tasklet_init(&cfv->tx_release_tasklet, + cfv_tx_release_tasklet, + (unsigned long)cfv); + + /* Carrier is off until netdevice is opened */ + netif_carrier_off(netdev); + + /* register Netdev */ + err = register_netdev(netdev); + if (err) { + dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err); + goto err; + } + + debugfs_init(cfv); + + return 0; +err: + netdev_warn(cfv->ndev, "CAIF Virtio probe failed:%d\n", err); + + if (cfv->vr_rx) + vdev->vringh_config->del_vrhs(cfv->vdev); + if (cfv->vdev) + vdev->config->del_vqs(cfv->vdev); + free_netdev(netdev); + return err; +} + +static void cfv_remove(struct virtio_device *vdev) +{ + struct cfv_info *cfv = vdev->priv; + + rtnl_lock(); + dev_close(cfv->ndev); + rtnl_unlock(); + + tasklet_kill(&cfv->tx_release_tasklet); + debugfs_remove_recursive(cfv->debugfs); + + vringh_kiov_cleanup(&cfv->ctx.riov); + vdev->config->reset(vdev); + vdev->vringh_config->del_vrhs(cfv->vdev); + cfv->vr_rx = NULL; + vdev->config->del_vqs(cfv->vdev); + unregister_netdev(cfv->ndev); +} + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_CAIF, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static unsigned int features[] = { +}; + +static struct virtio_driver caif_virtio_driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = cfv_probe, + .remove = cfv_remove, +}; + +module_virtio_driver(caif_virtio_driver); +MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/include/linux/virtio_caif.h b/include/linux/virtio_caif.h new file mode 100644 index 000000000000..5d2d3124ca3d --- /dev/null +++ b/include/linux/virtio_caif.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) ST-Ericsson AB 2012 + * Author: Sjur Brændeland + * + * This header is BSD licensed so + * anyone can use the definitions to implement compatible remote processors + */ + +#ifndef VIRTIO_CAIF_H +#define VIRTIO_CAIF_H + +#include +struct virtio_caif_transf_config { + u16 headroom; + u16 tailroom; + u32 mtu; + u8 reserved[4]; +}; + +struct virtio_caif_config { + struct virtio_caif_transf_config uplink, downlink; + u8 reserved[8]; +}; +#endif diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index a7630d04029f..284fc3a05f7b 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -38,5 +38,6 @@ #define VIRTIO_ID_SCSI 8 /* virtio scsi */ #define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit From c8164d8931fdee9ac5314708c4071adf1d997425 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Mar 2013 15:37:08 +1030 Subject: scatterlist: introduce sg_unmark_end This is useful in places that recycle the same scatterlist multiple times, and do not want to incur the cost of sg_init_table every time in hot paths. Acked-by: Jens Axboe Signed-off-by: Paolo Bonzini Signed-off-by: Rusty Russell --- block/blk-integrity.c | 2 +- block/blk-merge.c | 2 +- include/linux/scatterlist.h | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index dabd221857e1..03cf7179e8ef 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -110,7 +110,7 @@ new_segment: if (!sg) sg = sglist; else { - sg->page_link &= ~0x02; + sg_unmark_end(sg); sg = sg_next(sg); } diff --git a/block/blk-merge.c b/block/blk-merge.c index 936a110de0b9..5f2448253797 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -143,7 +143,7 @@ new_segment: * termination bit to avoid doing a full * sg_init_table() in drivers for each command. */ - (*sg)->page_link &= ~0x02; + sg_unmark_end(*sg); *sg = sg_next(*sg); } diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 2d8bdaef9611..bfc47e0de81c 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -171,6 +171,22 @@ static inline void sg_mark_end(struct scatterlist *sg) sg->page_link &= ~0x01; } +/** + * sg_unmark_end - Undo setting the end of the scatterlist + * @sg: SG entryScatterlist + * + * Description: + * Removes the termination marker from the given entry of the scatterlist. + * + **/ +static inline void sg_unmark_end(struct scatterlist *sg) +{ +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg->sg_magic != SG_MAGIC); +#endif + sg->page_link &= ~0x02; +} + /** * sg_phys - Return physical address of an sg entry * @sg: SG entry -- cgit From 13816c768d46586e925b22736992258d6105ad2c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 20 Mar 2013 15:37:09 +1030 Subject: virtio_ring: virtqueue_add_sgs, to add multiple sgs. virtio_scsi can really use this, to avoid the current hack of copying the whole sg array. Some other things get slightly neater, too. This causes a slowdown in virtqueue_add_buf(), which is implemented as a wrapper. This is addressed in the next patches. for i in `seq 50`; do /usr/bin/time -f 'Wall time:%e' ./vringh_test --indirect --eventidx --parallel --fast-vringh; done 2>&1 | stats --trim-outliers: Before: Using CPUS 0 and 3 Guest: notified 0, pinged 39009-39063(39062) Host: notified 39009-39063(39062), pinged 0 Wall time:1.700000-1.950000(1.723542) After: Using CPUS 0 and 3 Guest: notified 0, pinged 39062-39063(39063) Host: notified 39062-39063(39063), pinged 0 Wall time:1.760000-2.220000(1.789167) Signed-off-by: Rusty Russell Reviewed-by: Wanlong Gao Reviewed-by: Asias He --- drivers/virtio/virtio_ring.c | 220 ++++++++++++++++++++++++++++----------- include/linux/virtio.h | 7 ++ tools/virtio/linux/scatterlist.h | 16 +++ tools/virtio/linux/virtio.h | 7 ++ 4 files changed, 187 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 245177c286ae..a78ad459cc85 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -98,16 +98,36 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, + unsigned int *count) +{ + return sg_next(sg); +} + +static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, + unsigned int *count) +{ + if (--(*count) == 0) + return NULL; + return sg + 1; +} + /* Set up an indirect table of descriptors and add it to the queue. */ -static int vring_add_indirect(struct vring_virtqueue *vq, - struct scatterlist sg[], - unsigned int out, - unsigned int in, - gfp_t gfp) +static inline int vring_add_indirect(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + struct scatterlist *(*next) + (struct scatterlist *, unsigned int *), + unsigned int total_sg, + unsigned int total_out, + unsigned int total_in, + unsigned int out_sgs, + unsigned int in_sgs, + gfp_t gfp) { struct vring_desc *desc; unsigned head; - int i; + struct scatterlist *sg; + int i, n; /* * We require lowmem mappings for the descriptors because @@ -116,25 +136,31 @@ static int vring_add_indirect(struct vring_virtqueue *vq, */ gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); - desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); + desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); if (!desc) return -ENOMEM; - /* Transfer entries from the sg list into the indirect page */ - for (i = 0; i < out; i++) { - desc[i].flags = VRING_DESC_F_NEXT; - desc[i].addr = sg_phys(sg); - desc[i].len = sg->length; - desc[i].next = i+1; - sg++; + /* Transfer entries from the sg lists into the indirect page */ + i = 0; + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + desc[i].flags = VRING_DESC_F_NEXT; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + i++; + } } - for (; i < (out + in); i++) { - desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - desc[i].addr = sg_phys(sg); - desc[i].len = sg->length; - desc[i].next = i+1; - sg++; + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + i++; + } } + BUG_ON(i != total_sg); /* Last one doesn't continue. */ desc[i-1].flags &= ~VRING_DESC_F_NEXT; @@ -155,29 +181,20 @@ static int vring_add_indirect(struct vring_virtqueue *vq, return head; } -/** - * virtqueue_add_buf - expose buffer to other end - * @vq: the struct virtqueue we're talking about. - * @sg: the description of the buffer(s). - * @out_num: the number of sg readable by other side - * @in_num: the number of sg which are writable (after readable ones) - * @data: the token identifying the buffer. - * @gfp: how to do memory allocations (if necessary). - * - * Caller must ensure we don't call this with other virtqueue operations - * at the same time (except where noted). - * - * Returns zero or a negative error (ie. ENOSPC, ENOMEM). - */ -int virtqueue_add_buf(struct virtqueue *_vq, - struct scatterlist sg[], - unsigned int out, - unsigned int in, - void *data, - gfp_t gfp) +static inline int virtqueue_add(struct virtqueue *_vq, + struct scatterlist *sgs[], + struct scatterlist *(*next) + (struct scatterlist *, unsigned int *), + unsigned int total_out, + unsigned int total_in, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); - unsigned int i, avail, uninitialized_var(prev); + struct scatterlist *sg; + unsigned int i, n, avail, uninitialized_var(prev), total_sg; int head; START_USE(vq); @@ -197,46 +214,54 @@ int virtqueue_add_buf(struct virtqueue *_vq, } #endif + total_sg = total_in + total_out; + /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ - if (vq->indirect && (out + in) > 1 && vq->vq.num_free) { - head = vring_add_indirect(vq, sg, out, in, gfp); + if (vq->indirect && total_sg > 1 && vq->vq.num_free) { + head = vring_add_indirect(vq, sgs, next, total_sg, total_out, + total_in, + out_sgs, in_sgs, gfp); if (likely(head >= 0)) goto add_head; } - BUG_ON(out + in > vq->vring.num); - BUG_ON(out + in == 0); + BUG_ON(total_sg > vq->vring.num); + BUG_ON(total_sg == 0); - if (vq->vq.num_free < out + in) { + if (vq->vq.num_free < total_sg) { pr_debug("Can't add buf len %i - avail = %i\n", - out + in, vq->vq.num_free); + total_sg, vq->vq.num_free); /* FIXME: for historical reasons, we force a notify here if * there are outgoing parts to the buffer. Presumably the * host should service the ring ASAP. */ - if (out) + if (out_sgs) vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } /* We're about to use some buffers from the free list. */ - vq->vq.num_free -= out + in; - - head = vq->free_head; - for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { - vq->vring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vring.desc[i].addr = sg_phys(sg); - vq->vring.desc[i].len = sg->length; - prev = i; - sg++; + vq->vq.num_free -= total_sg; + + head = i = vq->free_head; + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + vq->vring.desc[i].flags = VRING_DESC_F_NEXT; + vq->vring.desc[i].addr = sg_phys(sg); + vq->vring.desc[i].len = sg->length; + prev = i; + i = vq->vring.desc[i].next; + } } - for (; in; i = vq->vring.desc[i].next, in--) { - vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - vq->vring.desc[i].addr = sg_phys(sg); - vq->vring.desc[i].len = sg->length; - prev = i; - sg++; + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; + vq->vring.desc[i].addr = sg_phys(sg); + vq->vring.desc[i].len = sg->length; + prev = i; + i = vq->vring.desc[i].next; + } } /* Last one doesn't continue. */ vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; @@ -269,8 +294,77 @@ add_head: return 0; } + +/** + * virtqueue_add_buf - expose buffer to other end + * @vq: the struct virtqueue we're talking about. + * @sg: the description of the buffer(s). + * @out_num: the number of sg readable by other side + * @in_num: the number of sg which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM). + */ +int virtqueue_add_buf(struct virtqueue *_vq, + struct scatterlist sg[], + unsigned int out, + unsigned int in, + void *data, + gfp_t gfp) +{ + struct scatterlist *sgs[2]; + + sgs[0] = sg; + sgs[1] = sg + out; + + return virtqueue_add(_vq, sgs, sg_next_arr, + out, in, out ? 1 : 0, in ? 1 : 0, data, gfp); +} EXPORT_SYMBOL_GPL(virtqueue_add_buf); +/** + * virtqueue_add_sgs - expose buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of terminated scatterlists. + * @out_num: the number of scatterlists readable by other side + * @in_num: the number of scatterlists which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM). + */ +int virtqueue_add_sgs(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) +{ + unsigned int i, total_out, total_in; + + /* Count them first. */ + for (i = total_out = total_in = 0; i < out_sgs; i++) { + struct scatterlist *sg; + for (sg = sgs[i]; sg; sg = sg_next(sg)) + total_out++; + } + for (; i < out_sgs + in_sgs; i++) { + struct scatterlist *sg; + for (sg = sgs[i]; sg; sg = sg_next(sg)) + total_in++; + } + return virtqueue_add(_vq, sgs, sg_next_chained, + total_out, total_in, out_sgs, in_sgs, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_sgs); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 5d5b3abc283d..ac80288b2920 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -41,6 +41,13 @@ int virtqueue_add_buf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_sgs(struct virtqueue *vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp); + void virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h index b2cf7d0f6133..68c9e2adc996 100644 --- a/tools/virtio/linux/scatterlist.h +++ b/tools/virtio/linux/scatterlist.h @@ -125,6 +125,22 @@ static inline void sg_mark_end(struct scatterlist *sg) sg->page_link &= ~0x01; } +/** + * sg_unmark_end - Undo setting the end of the scatterlist + * @sg: SG entryScatterlist + * + * Description: + * Removes the termination marker from the given entry of the scatterlist. + * + **/ +static inline void sg_unmark_end(struct scatterlist *sg) +{ +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg->sg_magic != SG_MAGIC); +#endif + sg->page_link &= ~0x02; +} + static inline struct scatterlist *sg_next(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index e4af6591f5ff..5fa612ad932c 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -56,6 +56,13 @@ int virtqueue_add_buf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_sgs(struct virtqueue *vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp); + void virtqueue_kick(struct virtqueue *vq); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); -- cgit From 282edb36499042a92b71f052f51754ae7ed936e4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 20 Mar 2013 15:44:26 +1030 Subject: virtio_ring: virtqueue_add_outbuf / virtqueue_add_inbuf. These are specialized versions of virtqueue_add_buf(), which cover over 80% of cases and are far clearer. In particular, the scatterlists passed to these functions don't have to be clean (ie. we ignore end markers). Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 10 ++++++++++ 2 files changed, 54 insertions(+) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a78ad459cc85..5217baf5528c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -365,6 +365,50 @@ int virtqueue_add_sgs(struct virtqueue *_vq, } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); +/** + * virtqueue_add_outbuf - expose output buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM). + */ +int virtqueue_add_outbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); + +/** + * virtqueue_add_inbuf - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists writable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM). + */ +int virtqueue_add_inbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ac80288b2920..833f17b6a743 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -41,6 +41,16 @@ int virtqueue_add_buf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_outbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp); + +int virtqueue_add_inbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, -- cgit From 081aa458c38ba576bdd4265fc807fa95b48b9e79 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 13 Mar 2013 09:17:09 +0800 Subject: cgroup: consolidate cgroup_attach_task() and cgroup_attach_proc() These two functions share most of the code. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 3 +- kernel/cgroup.c | 109 +++++++++---------------------------------------- kernel/cpuset.c | 2 +- 3 files changed, 23 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7e818a3ef60a..01c48c6806d6 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -693,7 +693,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, struct cgroup_iter *it); void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); -int cgroup_attach_task(struct cgroup *, struct task_struct *); +int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, + bool threadgroup); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 54689fc008f6..04fa2abf94b2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -59,7 +59,7 @@ #include /* TODO: replace with more sophisticated array */ #include #include -#include /* used in cgroup_attach_proc */ +#include /* used in cgroup_attach_task */ #include #include @@ -1943,82 +1943,6 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, put_css_set(oldcg); } -/** - * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' - * @cgrp: the cgroup the task is attaching to - * @tsk: the task to be attached - * - * Call with cgroup_mutex and threadgroup locked. May take task_lock of - * @tsk during call. - */ -int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) -{ - int retval = 0; - struct cgroup_subsys *ss, *failed_ss = NULL; - struct cgroup *oldcgrp; - struct cgroupfs_root *root = cgrp->root; - struct cgroup_taskset tset = { }; - struct css_set *newcg; - - /* @tsk either already exited or can't exit until the end */ - if (tsk->flags & PF_EXITING) - return -ESRCH; - - /* Nothing to do if the task is already in that cgroup */ - oldcgrp = task_cgroup_from_root(tsk, root); - if (cgrp == oldcgrp) - return 0; - - tset.single.task = tsk; - tset.single.cgrp = oldcgrp; - - for_each_subsys(root, ss) { - if (ss->can_attach) { - retval = ss->can_attach(cgrp, &tset); - if (retval) { - /* - * Remember on which subsystem the can_attach() - * failed, so that we only call cancel_attach() - * against the subsystems whose can_attach() - * succeeded. (See below) - */ - failed_ss = ss; - goto out; - } - } - } - - newcg = find_css_set(tsk->cgroups, cgrp); - if (!newcg) { - retval = -ENOMEM; - goto out; - } - - cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg); - - for_each_subsys(root, ss) { - if (ss->attach) - ss->attach(cgrp, &tset); - } - -out: - if (retval) { - for_each_subsys(root, ss) { - if (ss == failed_ss) - /* - * This subsystem was the one that failed the - * can_attach() check earlier, so we don't need - * to call cancel_attach() against it or any - * remaining subsystems. - */ - break; - if (ss->cancel_attach) - ss->cancel_attach(cgrp, &tset); - } - } - return retval; -} - /** * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' * @from: attach to all cgroups of a given task @@ -2033,7 +1957,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) for_each_active_root(root) { struct cgroup *from_cg = task_cgroup_from_root(from, root); - retval = cgroup_attach_task(from_cg, tsk); + retval = cgroup_attach_task(from_cg, tsk, false); if (retval) break; } @@ -2044,21 +1968,22 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /** - * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup + * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup * @cgrp: the cgroup to attach to - * @leader: the threadgroup leader task_struct of the group to be attached + * @tsk: the task or the leader of the threadgroup to be attached + * @threadgroup: attach the whole threadgroup? * * Call holding cgroup_mutex and the group_rwsem of the leader. Will take - * task_lock of each thread in leader's threadgroup individually in turn. + * task_lock of @tsk or each thread in the threadgroup individually in turn. */ -static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) +int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, + bool threadgroup) { int retval, i, group_size; struct cgroup_subsys *ss, *failed_ss = NULL; - /* guaranteed to be initialized later, but the compiler needs this */ struct cgroupfs_root *root = cgrp->root; /* threadgroup list cursor and array */ - struct task_struct *tsk; + struct task_struct *leader = tsk; struct task_and_cgroup *tc; struct flex_array *group; struct cgroup_taskset tset = { }; @@ -2070,7 +1995,10 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * group - group_rwsem prevents new threads from appearing, and if * threads exit, this will just be an over-estimate. */ - group_size = get_nr_threads(leader); + if (threadgroup) + group_size = get_nr_threads(tsk); + else + group_size = 1; /* flex_array supports very large thread-groups better than kmalloc. */ group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL); if (!group) @@ -2080,7 +2008,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) if (retval) goto out_free_group_list; - tsk = leader; i = 0; /* * Prevent freeing of tasks while we take a snapshot. Tasks that are @@ -2109,6 +2036,9 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) retval = flex_array_put(group, i, &ent, GFP_ATOMIC); BUG_ON(retval != 0); i++; + + if (!threadgroup) + break; } while_each_thread(leader, tsk); rcu_read_unlock(); /* remember the number of threads in the array for later. */ @@ -2262,9 +2192,10 @@ retry_find_task: put_task_struct(tsk); goto retry_find_task; } - ret = cgroup_attach_proc(cgrp, tsk); - } else - ret = cgroup_attach_task(cgrp, tsk); + } + + ret = cgroup_attach_task(cgrp, tsk, threadgroup); + threadgroup_unlock(tsk); put_task_struct(tsk); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index efbfca7a33e4..98d458aad789 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2008,7 +2008,7 @@ static void cpuset_do_move_task(struct task_struct *tsk, struct cgroup *new_cgroup = scan->data; cgroup_lock(); - cgroup_attach_task(new_cgroup, tsk); + cgroup_attach_task(new_cgroup, tsk, false); cgroup_unlock(); } -- cgit From f77668dc25b27270fe589031b22c432c3462b1d8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Mar 2013 06:39:30 +0000 Subject: net: flow_dissector: add __skb_get_poff to get a start offset to payload __skb_get_poff() returns the offset to the payload as far as it could be dissected. The main user is currently BPF, so that we can dynamically truncate packets without needing to push actual payload to the user space and instead can analyze headers only. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/flow_dissector.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b66ecc6ef102..497412165b1c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2840,6 +2840,8 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); +u32 __skb_get_poff(const struct sk_buff *skb); + /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f4be293bab9e..00ee068efc1c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -228,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, } EXPORT_SYMBOL(__skb_tx_hash); +/* __skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 __skb_get_poff(const struct sk_buff *skb) +{ + struct flow_keys keys; + u32 poff = 0; + + if (!skb_flow_dissect(skb, &keys)) + return 0; + + poff += keys.thoff; + switch (keys.ip_proto) { + case IPPROTO_TCP: { + const struct tcphdr *tcph; + struct tcphdr _tcph; + + tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); + if (!tcph) + return poff; + + poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + break; + } + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + poff += sizeof(struct udphdr); + break; + /* For the rest, we do not really care about header + * extensions at this point for now. + */ + case IPPROTO_ICMP: + poff += sizeof(struct icmphdr); + break; + case IPPROTO_ICMPV6: + poff += sizeof(struct icmp6hdr); + break; + case IPPROTO_IGMP: + poff += sizeof(struct igmphdr); + break; + case IPPROTO_DCCP: + poff += sizeof(struct dccp_hdr); + break; + case IPPROTO_SCTP: + poff += sizeof(struct sctphdr); + break; + } + + return poff; +} + static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { -- cgit From 3e5289d5e3f98b7b5b8cac32e9e5a7004c067436 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Mar 2013 06:39:31 +0000 Subject: filter: add ANC_PAY_OFFSET instruction for loading payload start offset It is very useful to do dynamic truncation of packets. In particular, we're interested to push the necessary header bytes to the user space and cut off user payload that should probably not be transferred for some reasons (e.g. privacy, speed, or others). With the ancillary extension PAY_OFFSET, we can load it into the accumulator, and return it. E.g. in bpfc syntax ... ld #poff ; { 0x20, 0, 0, 0xfffff034 }, ret a ; { 0x16, 0, 0, 0x00000000 }, ... as a filter will accomplish this without having to do a big hackery in a BPF filter itself. Follow-up JIT implementations are welcome. Thanks to Eric Dumazet for suggesting and discussing this during the Netfilter Workshop in Copenhagen. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + include/uapi/linux/filter.h | 3 ++- net/core/filter.c | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index c45eabc135e1..d2059cb4e465 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -126,6 +126,7 @@ enum { BPF_S_ANC_SECCOMP_LD_W, BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, + BPF_S_ANC_PAY_OFFSET, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 9cfde6941099..8eb9ccaa5b48 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -129,7 +129,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_ALU_XOR_X 40 #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 -#define SKF_AD_MAX 52 +#define SKF_AD_PAY_OFFSET 52 +#define SKF_AD_MAX 56 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index 2e20b55a7830..dad2a178f9f8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,6 +348,9 @@ load_b: case BPF_S_ANC_VLAN_TAG_PRESENT: A = !!vlan_tx_tag_present(skb); continue; + case BPF_S_ANC_PAY_OFFSET: + A = __skb_get_poff(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(ALU_XOR_X); ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); + ANCILLARY(PAY_OFFSET); } /* ancillary operation unknown or unsupported */ @@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, -- cgit From 2b5faa4c553f90ee2dde1d976b220b1ca9741ef0 Mon Sep 17 00:00:00 2001 From: Jesper Derehag Date: Tue, 19 Mar 2013 20:50:05 +0000 Subject: connector: Added coredumping event to the process connector Process connector can now also detect coredumping events. Main aim of patch is get notified at start of coredumping, instead of having to wait for it to finish and then being notified through EXIT event. Could be used for instance by process-managers that want to get notified as soon as possible about process failures, and not necessarily beeing notified after coredump, which could be in the order of minutes depending on size of coredump, piping and so on. Signed-off-by: Jesper Derehag Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 25 +++++++++++++++++++++++++ include/linux/cn_proc.h | 4 ++++ include/uapi/linux/cn_proc.h | 10 +++++++++- kernel/signal.c | 2 ++ 4 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 1110478dd0fd..08ae128cce9b 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -232,6 +232,31 @@ void proc_comm_connector(struct task_struct *task) cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } +void proc_coredump_connector(struct task_struct *task) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + struct timespec ts; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->what = PROC_EVENT_COREDUMP; + ev->event_data.coredump.process_pid = task->pid; + ev->event_data.coredump.process_tgid = task->tgid; + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + void proc_exit_connector(struct task_struct *task) { struct cn_msg *msg; diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h index 2c1bc1ea04ee..1d5b02a96c46 100644 --- a/include/linux/cn_proc.h +++ b/include/linux/cn_proc.h @@ -26,6 +26,7 @@ void proc_id_connector(struct task_struct *task, int which_id); void proc_sid_connector(struct task_struct *task); void proc_ptrace_connector(struct task_struct *task, int which_id); void proc_comm_connector(struct task_struct *task); +void proc_coredump_connector(struct task_struct *task); void proc_exit_connector(struct task_struct *task); #else static inline void proc_fork_connector(struct task_struct *task) @@ -48,6 +49,9 @@ static inline void proc_ptrace_connector(struct task_struct *task, int ptrace_id) {} +static inline void proc_coredump_connector(struct task_struct *task) +{} + static inline void proc_exit_connector(struct task_struct *task) {} #endif /* CONFIG_PROC_EVENTS */ diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h index 0d7b49973bb3..f6c271035bbd 100644 --- a/include/uapi/linux/cn_proc.h +++ b/include/uapi/linux/cn_proc.h @@ -56,7 +56,9 @@ struct proc_event { PROC_EVENT_PTRACE = 0x00000100, PROC_EVENT_COMM = 0x00000200, /* "next" should be 0x00000400 */ - /* "last" is the last process event: exit */ + /* "last" is the last process event: exit, + * while "next to last" is coredumping event */ + PROC_EVENT_COREDUMP = 0x40000000, PROC_EVENT_EXIT = 0x80000000 } what; __u32 cpu; @@ -110,11 +112,17 @@ struct proc_event { char comm[16]; } comm; + struct coredump_proc_event { + __kernel_pid_t process_pid; + __kernel_pid_t process_tgid; + } coredump; + struct exit_proc_event { __kernel_pid_t process_pid; __kernel_pid_t process_tgid; __u32 exit_code, exit_signal; } exit; + } event_data; }; diff --git a/kernel/signal.c b/kernel/signal.c index dd72567767d9..497330ec2ae9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -32,6 +32,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -2350,6 +2351,7 @@ relock: if (sig_kernel_coredump(signr)) { if (print_fatal_signals) print_fatal_signal(info->si_signo); + proc_coredump_connector(current); /* * If it was able to dump core, this kills all * other threads in the group and synchronizes with -- cgit From a831881be220358a1d28c5d95d69449fb6d623ca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Dec 2012 17:32:19 +0100 Subject: nohz: Basic full dynticks interface For extreme usecases such as Real Time or HPC, having the ability to shutdown the tick when a single task runs on a CPU is a desired feature: * Reducing the amount of interrupts improves throughput for CPU-bound tasks. The CPU is less distracted from its real job, from an execution time and from the cache point of views. * This also improve latency response as we have less critical sections. Start with introducing a very simple interface to define full dynticks CPU: use a boot time option defined cpumask through the "nohz_extended=" kernel parameter. CPUs that are part of this range will have their tick shutdown whenever possible: provided they run a single task and they don't do kernel activity that require the periodic tick. These details will be later documented in Documentation/* An online CPU must be kept outside this range to handle the timekeeping. Suggested-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/kernel-parameters.txt | 6 ++++ include/linux/tick.h | 7 +++++ kernel/time/Kconfig | 19 ++++++++++++ kernel/time/tick-sched.c | 62 +++++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..231698feaddc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1913,6 +1913,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Valid arguments: on, off Default: on + nohz_extended= [KNL,BOOT] + In kernels built with CONFIG_NO_HZ_EXTENDED=y, set + the specified list of CPUs whose tick will be stopped + whenever possible. You need to keep at least one online + CPU outside the range to maintain the timekeeping. + noiotrap [SH] Disables trapped I/O port accesses. noirqdebug [X86-32] Disables the code which attempts to detect and diff --git a/include/linux/tick.h b/include/linux/tick.h index 553272e6af55..44bfa8aa439f 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -157,6 +157,13 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ +#ifdef CONFIG_NO_HZ_EXTENDED +extern int tick_nohz_extended_cpu(int cpu); +#else +static inline int tick_nohz_extended_cpu(int cpu) { return 0; } +#endif + + # ifdef CONFIG_CPU_IDLE_GOV_MENU extern void menu_hrtimer_cancel(void); # else diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 24510d84efd7..5a87c03e45ad 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -79,6 +79,25 @@ config NO_HZ only trigger on an as-needed basis both when the system is busy and when the system is idle. +config NO_HZ_EXTENDED + bool "Full dynticks system" + depends on NO_HZ && RCU_USER_QS && VIRT_CPU_ACCOUNTING_GEN && RCU_NOCB_CPU && SMP + select CONTEXT_TRACKING_FORCE + help + Adaptively try to shutdown the tick whenever possible, even when + the CPU is running tasks. Typically this requires running a single + task on the CPU. Chances for running tickless are maximized when + the task mostly runs in userspace and has few kernel activity. + + You need to fill up the nohz_extended boot parameter with the + desired range of dynticks CPUs. + + This is implemented at the expense of some overhead in user <-> kernel + transitions: syscalls, exceptions and interrupts. Even when it's + dynamically off. + + Say N. + config HIGH_RES_TIMERS bool "High Resolution Timer Support" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a19a39952c1b..79c275f08b7d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -142,6 +142,68 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) profile_tick(CPU_PROFILING); } +#ifdef CONFIG_NO_HZ_EXTENDED +static cpumask_var_t nohz_extended_mask; +bool have_nohz_extended_mask; + +int tick_nohz_extended_cpu(int cpu) +{ + if (!have_nohz_extended_mask) + return 0; + + return cpumask_test_cpu(cpu, nohz_extended_mask); +} + +/* Parse the boot-time nohz CPU list from the kernel parameters. */ +static int __init tick_nohz_extended_setup(char *str) +{ + alloc_bootmem_cpumask_var(&nohz_extended_mask); + if (cpulist_parse(str, nohz_extended_mask) < 0) + pr_warning("NOHZ: Incorrect nohz_extended cpumask\n"); + else + have_nohz_extended_mask = true; + return 1; +} +__setup("nohz_extended=", tick_nohz_extended_setup); + +static int __init init_tick_nohz_extended(void) +{ + cpumask_var_t online_nohz; + int cpu; + + if (!have_nohz_extended_mask) + return 0; + + if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { + pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); + return -ENOMEM; + } + + /* + * CPUs can probably not be concurrently offlined on initcall time. + * But we are paranoid, aren't we? + */ + get_online_cpus(); + + /* Ensure we keep a CPU outside the dynticks range for timekeeping */ + cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); + if (cpumask_equal(online_nohz, cpu_online_mask)) { + cpu = cpumask_any(cpu_online_mask); + pr_warning("NO_HZ: Must keep at least one online CPU " + "out of nohz_extended range\n"); + pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); + cpumask_clear_cpu(cpu, nohz_extended_mask); + } + put_online_cpus(); + free_cpumask_var(online_nohz); + + return 0; +} +core_initcall(init_tick_nohz_extended); +#else +#define have_nohz_extended_mask (0) +#endif + /* * NOHZ - aka dynamic tick functionality */ -- cgit From 1c20091e77fc5a9b7d7d905176443b4822a23cdb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Wake up full dynticks CPUs when a timer gets enqueued Wake up a CPU when a timer list timer is enqueued there and the target is part of the full dynticks range. Sending an IPI to it makes it reconsidering the next timer to program on top of recent updates. This may later be improved by checking if the tick is really stopped on the target. This would need some careful synchronization though. So deal with such optimization later and start simple. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/sched.h | 4 ++-- kernel/sched/core.c | 20 +++++++++++++++++++- kernel/timer.c | 12 ++++++------ 3 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9004f6e19eac..10626e2ee688 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1851,9 +1851,9 @@ static inline void idle_task_exit(void) {} #endif #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) -extern void wake_up_idle_cpu(int cpu); +extern void wake_up_nohz_cpu(int cpu); #else -static inline void wake_up_idle_cpu(int cpu) { } +static inline void wake_up_nohz_cpu(int cpu) { } #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 849deb96e61e..e91ee589f793 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -587,7 +587,7 @@ unlock: * account when the CPU goes back to idle and evaluates the timer * wheel for the next timer event. */ -void wake_up_idle_cpu(int cpu) +static void wake_up_idle_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -617,6 +617,24 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } +static bool wake_up_extended_nohz_cpu(int cpu) +{ + if (tick_nohz_extended_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) + smp_send_reschedule(cpu); + return true; + } + + return false; +} + +void wake_up_nohz_cpu(int cpu) +{ + if (!wake_up_extended_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); +} + static inline bool got_nohz_idle_kick(void) { int cpu = smp_processor_id(); diff --git a/kernel/timer.c b/kernel/timer.c index dbf7a78a1ef1..4e3040b40d16 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -930,14 +930,14 @@ void add_timer_on(struct timer_list *timer, int cpu) debug_activate(timer, timer->expires); internal_add_timer(base, timer); /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling + * Check whether the other CPU is in dynticks mode and needs + * to be triggered to reevaluate the timer wheel. + * We are protected against the other CPU fiddling * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. + * makes sure that a CPU on the way to stop its tick can not + * evaluate the timer wheel. */ - wake_up_idle_cpu(cpu); + wake_up_nohz_cpu(cpu); spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); -- cgit From 65deb782858128cde598ac4a9150ab7cdd29dafa Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 28 Feb 2013 16:43:19 +0000 Subject: arm: vexpress: Decouple vexpress-poweroff implementation from machine_desc This patch adds the pm_power_off and arm_pm_restart variable settings to the vexpress-poweroff.c driver to decouple it from the machine_desc definition. Signed-off-by: Catalin Marinas Acked-by: Pawel Moll --- arch/arm/mach-vexpress/v2m.c | 5 ----- drivers/power/reset/vexpress-poweroff.c | 9 +++++++-- include/linux/vexpress.h | 3 --- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 915683cb67d6..c970762e8386 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -361,8 +361,6 @@ static void __init v2m_init(void) for (i = 0; i < ARRAY_SIZE(v2m_amba_devs); i++) amba_device_register(v2m_amba_devs[i], &iomem_resource); - pm_power_off = vexpress_power_off; - ct_desc->init_tile(); } @@ -374,7 +372,6 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express") .init_irq = v2m_init_irq, .init_time = v2m_timer_init, .init_machine = v2m_init, - .restart = vexpress_restart, MACHINE_END static struct map_desc v2m_rs1_io_desc __initdata = { @@ -464,7 +461,6 @@ static void __init v2m_dt_init(void) { l2x0_of_init(0x00400000, 0xfe0fffff); of_platform_populate(NULL, v2m_dt_bus_match, NULL, NULL); - pm_power_off = vexpress_power_off; } static const char * const v2m_dt_match[] __initconst = { @@ -481,5 +477,4 @@ DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express") .init_irq = irqchip_init, .init_time = v2m_dt_timer_init, .init_machine = v2m_dt_init, - .restart = vexpress_restart, MACHINE_END diff --git a/drivers/power/reset/vexpress-poweroff.c b/drivers/power/reset/vexpress-poweroff.c index 465923aa3819..469e6962b2cf 100644 --- a/drivers/power/reset/vexpress-poweroff.c +++ b/drivers/power/reset/vexpress-poweroff.c @@ -18,6 +18,8 @@ #include #include +#include + static void vexpress_reset_do(struct device *dev, const char *what) { int err = -ENOENT; @@ -39,14 +41,14 @@ static void vexpress_reset_do(struct device *dev, const char *what) static struct device *vexpress_power_off_device; -void vexpress_power_off(void) +static void vexpress_power_off(void) { vexpress_reset_do(vexpress_power_off_device, "power off"); } static struct device *vexpress_restart_device; -void vexpress_restart(char str, const char *cmd) +static void vexpress_restart(char str, const char *cmd) { vexpress_reset_do(vexpress_restart_device, "restart"); } @@ -103,14 +105,17 @@ static int vexpress_reset_probe(struct platform_device *pdev) switch (func) { case FUNC_SHUTDOWN: vexpress_power_off_device = &pdev->dev; + pm_power_off = vexpress_power_off; break; case FUNC_RESET: if (!vexpress_restart_device) vexpress_restart_device = &pdev->dev; + arm_pm_restart = vexpress_restart; device_create_file(&pdev->dev, &dev_attr_active); break; case FUNC_REBOOT: vexpress_restart_device = &pdev->dev; + arm_pm_restart = vexpress_restart; device_create_file(&pdev->dev, &dev_attr_active); break; }; diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h index 75818744ab59..ea7168a68081 100644 --- a/include/linux/vexpress.h +++ b/include/linux/vexpress.h @@ -115,9 +115,6 @@ unsigned __vexpress_get_site(struct device *dev, struct device_node *node); void vexpress_sysreg_early_init(void __iomem *base); void vexpress_sysreg_of_early_init(void); -void vexpress_power_off(void); -void vexpress_restart(char str, const char *cmd); - /* Clocks */ struct clk *vexpress_osc_setup(struct device *dev); -- cgit From 9b44190dc114c1720b34975b5bfc65aece112ced Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 20 Mar 2013 13:32:58 +0000 Subject: tcp: refactor F-RTO The patch series refactor the F-RTO feature (RFC4138/5682). This is to simplify the loss recovery processing. Existing F-RTO was developed during the experimental stage (RFC4138) and has many experimental features. It takes a separate code path from the traditional timeout processing by overloading CA_Disorder instead of using CA_Loss state. This complicates CA_Disorder state handling because it's also used for handling dubious ACKs and undos. While the algorithm in the RFC does not change the congestion control, the implementation intercepts congestion control in various places (e.g., frto_cwnd in tcp_ack()). The new code implements newer F-RTO RFC5682 using CA_Loss processing path. F-RTO becomes a small extension in the timeout processing and interfaces with congestion control and Eifel undo modules. It lets congestion control (module) determines how many to send independently. F-RTO only chooses what to send in order to detect spurious retranmission. If timeout is found spurious it invokes existing Eifel undo algorithms like DSACK or TCP timestamp based detection. The first patch removes all F-RTO code except the sysctl_tcp_frto is left for the new implementation. Since CA_EVENT_FRTO is removed, TCP westwood now computes ssthresh on regular timeout CA_EVENT_LOSS event. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 17 -- include/linux/tcp.h | 6 +- include/net/tcp.h | 4 - net/ipv4/sysctl_net_ipv4.c | 7 - net/ipv4/tcp_input.c | 375 +-------------------------------- net/ipv4/tcp_minisocks.c | 3 - net/ipv4/tcp_output.c | 11 +- net/ipv4/tcp_timer.c | 6 +- net/ipv4/tcp_westwood.c | 2 +- 9 files changed, 10 insertions(+), 421 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17953e2bc3e9..8a977a0aaede 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -239,23 +239,6 @@ tcp_frto - INTEGER interacts badly with the packet counting of the SACK enabled TCP flow. -tcp_frto_response - INTEGER - When F-RTO has detected that a TCP retransmission timeout was - spurious (i.e, the timeout would have been avoided had TCP set a - longer retransmission timeout), TCP has several options what to do - next. Possible values are: - 0 Rate halving based; a smooth and conservative response, - results in halved cwnd and ssthresh after one RTT - 1 Very conservative response; not recommended because even - though being valid, it interacts poorly with the rest of - Linux TCP, halves cwnd and ssthresh immediately - 2 Aggressive response; undoes congestion control measures - that are now known to be unnecessary (ignoring the - possibility of a lost retransmission that would require - TCP to be more cautious), cwnd and ssthresh are restored - to the values prior timeout - Default: 0 (rate halving based) - tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. Default: 2hours. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ed6a7456eecd..f5f203b36379 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -187,14 +187,12 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ - u32 frto_highmark; /* snd_nxt when RTO occurred */ u16 advmss; /* Advertised MSS */ - u8 frto_counter; /* Number of new acks after RTO */ + u8 unused; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ - repair : 1, - unused : 1; + repair : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 7f2f17198d75..d1dcb596230e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -272,7 +272,6 @@ extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; -extern int sysctl_tcp_frto_response; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; @@ -424,8 +423,6 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, bool fastopen); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); -extern bool tcp_use_frto(struct sock *sk); -extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); @@ -756,7 +753,6 @@ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ - CA_EVENT_FRTO, /* fast recovery timeout */ CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_FAST_ACK, /* in sequence ack */ CA_EVENT_SLOW_ACK, /* other ack */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cb45062c8be0..fa2f63fc453b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -591,13 +591,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_frto_response", - .data = &sysctl_tcp_frto_response, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 19f0149fb6a2..231c79fe91f3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,7 +93,6 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; -int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; @@ -108,17 +107,14 @@ int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ -#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ -#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) -#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -1159,10 +1155,6 @@ static u8 tcp_sacktag_one(struct sock *sk, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); - - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(end_seq, tp->frto_highmark)) - state->flag |= FLAG_ONLY_ORIG_SACKED; } if (sacked & TCPCB_LOST) { @@ -1555,7 +1547,6 @@ static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); @@ -1728,12 +1719,6 @@ walk: start_seq, end_seq, dup_sack); advance_sp: - /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct - * due to in-order walk - */ - if (after(end_seq, tp->frto_highmark)) - state.flag &= ~FLAG_ONLY_ORIG_SACKED; - i++; } @@ -1750,8 +1735,7 @@ advance_sp: tcp_verify_left_out(tp); if ((state.reord < tp->fackets_out) && - ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && - (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) + ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); out: @@ -1825,197 +1809,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static int tcp_is_sackfrto(const struct tcp_sock *tp) -{ - return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); -} - -/* F-RTO can only be used if TCP has never retransmitted anything other than - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) - */ -bool tcp_use_frto(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - - if (!sysctl_tcp_frto) - return false; - - /* MTU probe and F-RTO won't really play nicely along currently */ - if (icsk->icsk_mtup.probe_size) - return false; - - if (tcp_is_sackfrto(tp)) - return true; - - /* Avoid expensive walking of rexmit queue if possible */ - if (tp->retrans_out > 1) - return false; - - skb = tcp_write_queue_head(sk); - if (tcp_skb_is_last(sk, skb)) - return true; - skb = tcp_write_queue_next(sk, skb); /* Skips head */ - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - return false; - /* Short-circuit when first non-SACKed skb has been checked */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) - break; - } - return true; -} - -/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO - * recovery a bit and use heuristics in tcp_process_frto() to detect if - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to - * keep retrans_out counting accurate (with SACK F-RTO, other than head - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS - * bits are handled if the Loss state is really to be entered (in - * tcp_enter_frto_loss). - * - * Do like tcp_enter_loss() would; when RTO expires the second time it - * does: - * "Reduce ssthresh if it has not yet been made inside this window." - */ -void tcp_enter_frto(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || - tp->snd_una == tp->high_seq || - ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && - !icsk->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(sk); - /* Our state is too optimistic in ssthresh() call because cwnd - * is not reduced until tcp_enter_frto_loss() when previous F-RTO - * recovery has not yet completed. Pattern would be this: RTO, - * Cumulative ACK, RTO (2xRTO for the same segment does not end - * up here twice). - * RFC4138 should be more specific on what to do, even though - * RTO is quite unlikely to occur after the first Cumulative ACK - * due to back-off and complexity of triggering events ... - */ - if (tp->frto_counter) { - u32 stored_cwnd; - stored_cwnd = tp->snd_cwnd; - tp->snd_cwnd = 2; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = stored_cwnd; - } else { - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - } - /* ... in theory, cong.control module could do "any tricks" in - * ssthresh(), which means that ca_state, lost bits and lost_out - * counter would have to be faked before the call occurs. We - * consider that too expensive, unlikely and hacky, so modules - * using these in ssthresh() must deal these incompatibility - * issues if they receives CA_EVENT_FRTO and frto_counter != 0 - */ - tcp_ca_event(sk, CA_EVENT_FRTO); - } - - tp->undo_marker = tp->snd_una; - tp->undo_retrans = 0; - - skb = tcp_write_queue_head(sk); - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_verify_left_out(tp); - - /* Too bad if TCP was application limited */ - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - - /* Earlier loss recovery underway (see RFC4138; Appendix B). - * The last condition is necessary at least in tp->frto_counter case. - */ - if (tcp_is_sackfrto(tp) && (tp->frto_counter || - ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && - after(tp->high_seq, tp->snd_una)) { - tp->frto_highmark = tp->high_seq; - } else { - tp->frto_highmark = tp->snd_nxt; - } - tcp_set_ca_state(sk, TCP_CA_Disorder); - tp->high_seq = tp->snd_nxt; - tp->frto_counter = 1; -} - -/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, - * which indicates that we should follow the traditional RTO recovery, - * i.e. mark everything lost and do go-back-N retransmission. - */ -static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - tp->lost_out = 0; - tp->retrans_out = 0; - if (tcp_is_reno(tp)) - tcp_reset_reno_sack(tp); - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - /* - * Count the retransmission made on RTO correctly (only when - * waiting for the first ACK and did not get it)... - */ - if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { - /* For some reason this R-bit might get cleared? */ - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out += tcp_skb_pcount(skb); - /* ...enter this if branch just for the first segment */ - flag |= FLAG_DATA_ACKED; - } else { - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - } - - /* Marking forward transmissions that were made after RTO lost - * can cause unnecessary retransmissions in some scenarios, - * SACK blocks will mitigate that in some but not in all cases. - * We used to not mark them but it was causing break-ups with - * receivers that do only in-order receival. - * - * TODO: we could detect presence of such receiver and select - * different behavior per flow. - */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; - } - } - tcp_verify_left_out(tp); - - tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; - tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - tp->frto_counter = 0; - - tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); - tcp_set_ca_state(sk, TCP_CA_Loss); - tp->high_seq = tp->snd_nxt; - TCP_ECN_queue_cwr(tp); - - tcp_clear_all_retrans_hints(tp); -} - static void tcp_clear_retrans_partial(struct tcp_sock *tp) { tp->retrans_out = 0; @@ -2090,8 +1883,6 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - /* Abort F-RTO algorithm if one is in progress */ - tp->frto_counter = 0; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2275,10 +2066,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; - /* Do not perform any recovery during F-RTO algorithm */ - if (tp->frto_counter) - return false; - /* Trick#1: The loss is proven. */ if (tp->lost_out) return true; @@ -2760,7 +2547,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) tcp_verify_left_out(tp); - if (!tp->frto_counter && !tcp_any_retrans_done(sk)) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -3198,8 +2985,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_RETRANS_DATA_ACKED; ca_seq_rtt = -1; seq_rtt = -1; - if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) - flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; last_ackt = skb->tstamp; @@ -3408,150 +3193,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } -/* A very conservative spurious RTO response algorithm: reduce cwnd and - * continue in congestion avoidance. - */ -static void tcp_conservative_spur_to_response(struct tcp_sock *tp) -{ - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_cnt = 0; - TCP_ECN_queue_cwr(tp); - tcp_moderate_cwnd(tp); -} - -/* A conservative spurious RTO response algorithm: reduce cwnd using - * PRR and continue in congestion avoidance. - */ -static void tcp_cwr_spur_to_response(struct sock *sk) -{ - tcp_enter_cwr(sk, 0); -} - -static void tcp_undo_spur_to_response(struct sock *sk, int flag) -{ - if (flag & FLAG_ECE) - tcp_cwr_spur_to_response(sk); - else - tcp_undo_cwr(sk, true); -} - -/* F-RTO spurious RTO detection algorithm (RFC4138) - * - * F-RTO affects during two new ACKs following RTO (well, almost, see inline - * comments). State (ACK number) is kept in frto_counter. When ACK advances - * window (but not to or beyond highest sequence sent before RTO): - * On First ACK, send two new segments out. - * On Second ACK, RTO was likely spurious. Do spurious response (response - * algorithm is not part of the F-RTO detection algorithm - * given in RFC4138 but can be selected separately). - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding - * of Nagle, this is done using frto_counter states 2 and 3, when a new data - * segment of any size sent during F-RTO, state 2 is upgraded to 3. - * - * Rationale: if the RTO was spurious, new ACKs should arrive from the - * original window even after we transmit two new data segments. - * - * SACK version: - * on first step, wait until first cumulative ACK arrives, then move to - * the second step. In second step, the next ACK decides. - * - * F-RTO is implemented (mainly) in four functions: - * - tcp_use_frto() is used to determine if TCP is can use F-RTO - * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is - * called when tcp_use_frto() showed green light - * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm - * - tcp_enter_frto_loss() is called if there is not enough evidence - * to prove that the RTO is indeed spurious. It transfers the control - * from F-RTO to the conventional RTO recovery - */ -static bool tcp_process_frto(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_verify_left_out(tp); - - /* Duplicate the behavior from Loss state (fastretrans_alert) */ - if (flag & FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; - - if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || - ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) - tp->undo_marker = 0; - - if (!before(tp->snd_una, tp->frto_highmark)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); - return true; - } - - if (!tcp_is_sackfrto(tp)) { - /* RFC4138 shortcoming in step 2; should also have case c): - * ACK isn't duplicate nor advances window, e.g., opposite dir - * data, winupdate - */ - if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) - return true; - - if (!(flag & FLAG_DATA_ACKED)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), - flag); - return true; - } - } else { - if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { - if (!tcp_packets_in_flight(tp)) { - tcp_enter_frto_loss(sk, 2, flag); - return true; - } - - /* Prevent sending of new data. */ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)); - return true; - } - - if ((tp->frto_counter >= 2) && - (!(flag & FLAG_FORWARD_PROGRESS) || - ((flag & FLAG_DATA_SACKED) && - !(flag & FLAG_ONLY_ORIG_SACKED)))) { - /* RFC4138 shortcoming (see comment above) */ - if (!(flag & FLAG_FORWARD_PROGRESS) && - (flag & FLAG_NOT_DUP)) - return true; - - tcp_enter_frto_loss(sk, 3, flag); - return true; - } - } - - if (tp->frto_counter == 1) { - /* tcp_may_send_now needs to see updated state */ - tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; - tp->frto_counter = 2; - - if (!tcp_may_send_now(sk)) - tcp_enter_frto_loss(sk, 2, flag); - - return true; - } else { - switch (sysctl_tcp_frto_response) { - case 2: - tcp_undo_spur_to_response(sk, flag); - break; - case 1: - tcp_conservative_spur_to_response(tp); - break; - default: - tcp_cwr_spur_to_response(sk); - break; - } - tp->frto_counter = 0; - tp->undo_marker = 0; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); - } - return false; -} - /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk) { @@ -3616,7 +3257,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - bool frto_cwnd = false; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3690,22 +3330,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) pkts_acked = prior_packets - tp->packets_out; - if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, flag); - /* Guarantee sacktag reordering detection against wrap-arounds */ - if (before(tp->frto_highmark, tp->snd_una)) - tp->frto_highmark = 0; - if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && - tcp_may_raise_cwnd(sk, flag)) + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) + if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8f0234f8bb95..05eaf8904613 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -422,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->frto_counter = 0; - newtp->frto_highmark = 0; - if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && !try_module_get(newicsk->icsk_ca_ops->owner)) newicsk->icsk_ca_ops = &tcp_init_congestion_ops; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e787ecec505e..163cf5fc0119 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -78,10 +78,6 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - /* Don't override Nagle indefinitely with F-RTO */ - if (tp->frto_counter == 2) - tp->frto_counter = 3; - tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) @@ -1470,11 +1466,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (nonagle & TCP_NAGLE_PUSH) return true; - /* Don't use the nagle rule for urgent data (or for the final FIN). - * Nagle can be ignored during F-RTO too (see RFC4138). - */ - if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) + /* Don't use the nagle rule for urgent data (or for the final FIN). */ + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index eeccf795e917..4b85e6f636c9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -416,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), mib_idx); } - if (tcp_use_frto(sk)) { - tcp_enter_frto(sk); - } else { - tcp_enter_loss(sk, 0); - } + tcp_enter_loss(sk, 0); if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { /* Retransmission failed because of local congestion, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 1b91bf48e277..76a1e23259e1 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); break; - case CA_EVENT_FRTO: + case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; -- cgit From e33099f96d99c391b3325caa9c44258de04aae86 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 20 Mar 2013 13:33:00 +0000 Subject: tcp: implement RFC5682 F-RTO This patch implements F-RTO (foward RTO recovery): When the first retransmission after timeout is acknowledged, F-RTO sends new data instead of old data. If the next ACK acknowledges some never-retransmitted data, then the timeout was spurious and the congestion state is reverted. Otherwise if the next ACK selectively acknowledges the new data, then the timeout was genuine and the loss recovery continues. This idea applies to recurring timeouts as well. While F-RTO sends different data during timeout recovery, it does not (and should not) change the congestion control. The implementaion follows the three steps of SACK enhanced algorithm (section 3) in RFC5682. Step 1 is in tcp_enter_loss(). Step 2 and 3 are in tcp_process_loss(). The basic version is not supported because SACK enhanced version also works for non-SACK connections. The new implementation is functionally in parity with the old F-RTO implementation except the one case where it increases undo events: In addition to the RFC algorithm, a spurious timeout may be detected without sending data in step 2, as long as the SACK confirms not all the original data are dropped. When this happens, the sender will undo the cwnd and perhaps enter fast recovery instead. This additional check increases the F-RTO undo events by 5x compared to the prior implementation on Google Web servers, since the sender often does not have new data to send for HTTP. Note F-RTO may detect spurious timeout before Eifel with timestamps does so. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 18 +++------ include/linux/tcp.h | 3 +- net/ipv4/tcp_input.c | 73 ++++++++++++++++++++++++++++------ 3 files changed, 68 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 8a977a0aaede..f98ca633b528 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -225,19 +225,13 @@ tcp_fin_timeout - INTEGER Default: 60 seconds tcp_frto - INTEGER - Enables Forward RTO-Recovery (F-RTO) defined in RFC4138. + Enables Forward RTO-Recovery (F-RTO) defined in RFC5682. F-RTO is an enhanced recovery algorithm for TCP retransmission - timeouts. It is particularly beneficial in wireless environments - where packet loss is typically due to random radio interference - rather than intermediate router congestion. F-RTO is sender-side - only modification. Therefore it does not require any support from - the peer. - - If set to 1, basic version is enabled. 2 enables SACK enhanced - F-RTO if flow uses SACK. The basic version can be used also when - SACK is in use though scenario(s) with it exists where F-RTO - interacts badly with the packet counting of the SACK enabled TCP - flow. + timeouts. It is particularly beneficial in networks where the + RTT fluctuates (e.g., wireless). F-RTO is sender-side only + modification. It does not require any support from the peer. + + By default it's enabled with a non-zero value. 0 disables F-RTO. tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f5f203b36379..5adbc33d1ab3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -192,7 +192,8 @@ struct tcp_sock { u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ - repair : 1; + repair : 1, + frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8d821e45b917..b2b36196b342 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -107,6 +107,7 @@ int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ @@ -1155,6 +1156,8 @@ static u8 tcp_sacktag_one(struct sock *sk, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); + if (!after(end_seq, tp->high_seq)) + state->flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_LOST) { @@ -1835,10 +1838,13 @@ void tcp_enter_loss(struct sock *sk, int how) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + bool new_recovery = false; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || + !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + new_recovery = true; tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); @@ -1883,6 +1889,14 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); + + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + */ + tp->frto = sysctl_tcp_frto && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2426,12 +2440,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) return failed; } -/* Undo during loss recovery after partial ACK. */ -static bool tcp_try_undo_loss(struct sock *sk) +/* Undo during loss recovery after partial ACK or using F-RTO. */ +static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_may_undo(tp)) { + if (frto_undo || tcp_may_undo(tp)) { struct sk_buff *skb; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -2445,9 +2459,12 @@ static bool tcp_try_undo_loss(struct sock *sk) tp->lost_out = 0; tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); + if (frto_undo) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; - if (tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; } @@ -2667,24 +2684,52 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ -static void tcp_process_loss(struct sock *sk, int flag) +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + bool recovered = !before(tp->snd_una, tp->high_seq); - if (!before(tp->snd_una, tp->high_seq)) { + if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ + if (flag & FLAG_ORIG_SACK_ACKED) { + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + tcp_try_undo_loss(sk, true); + return; + } + if (after(tp->snd_nxt, tp->high_seq) && + (flag & FLAG_DATA_SACKED || is_dupack)) { + tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ + } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { + tp->high_seq = tp->snd_nxt; + __tcp_push_pending_frames(sk, tcp_current_mss(sk), + TCP_NAGLE_OFF); + if (after(tp->snd_nxt, tp->high_seq)) + return; /* Step 2.b */ + tp->frto = 0; + } + } + + if (recovered) { + /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ icsk->icsk_retransmits = 0; tcp_try_undo_recovery(sk); return; } - if (flag & FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; - if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) - tcp_reset_reno_sack(tp); - if (tcp_try_undo_loss(sk)) + if (tcp_is_reno(tp)) { + /* A Reno DUPACK means new data in F-RTO step 2.b above are + * delivered. Lower inflight to clock out (re)tranmissions. + */ + if (after(tp->snd_nxt, tp->high_seq) && is_dupack) + tcp_add_reno_sack(sk); + else if (flag & FLAG_SND_UNA_ADVANCED) + tcp_reset_reno_sack(tp); + } + if (tcp_try_undo_loss(sk, false)) return; - tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); } @@ -2764,7 +2809,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: - tcp_process_loss(sk, flag); + tcp_process_loss(sk, flag, is_dupack); if (icsk->icsk_ca_state != TCP_CA_Open) return; /* Fall through to processing in Open state. */ @@ -3003,6 +3048,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } if (!(sacked & TCPCB_SACKED_ACKED)) reord = min(pkts_acked, reord); + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_SACKED_ACKED) -- cgit From 3cf956eebe54cdb7cf1701642085507f0354e56a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Mar 2013 10:12:10 +0100 Subject: ASoC: wm8994: Support constraining the maximum number of channels clocked Some systems use the audio CODEC to clock a DAI with multiple data lines in parallel, meaning that bit clocks are only required for a smaller number of channels than data is sent for. In some cases providing the extra bit clocks can take the other devices on the audio bus out of spec. Support such systems by allowing a maximum number of channels to be specified. Signed-off-by: Mark Brown --- include/linux/mfd/wm8994/pdata.h | 8 ++++++++ sound/soc/codecs/wm8994.c | 13 +++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h index 8e21a094836d..68e776594889 100644 --- a/include/linux/mfd/wm8994/pdata.h +++ b/include/linux/mfd/wm8994/pdata.h @@ -17,6 +17,7 @@ #define WM8994_NUM_LDO 2 #define WM8994_NUM_GPIO 11 +#define WM8994_NUM_AIF 3 struct wm8994_ldo_pdata { /** GPIOs to enable regulator, 0 or less if not available */ @@ -215,6 +216,13 @@ struct wm8994_pdata { * system. */ bool spkmode_pu; + + /** + * Maximum number of channels clocks will be generated for, + * useful for systems where and I2S bus with multiple data + * lines is mastered. + */ + int max_channels_clocked[WM8994_NUM_AIF]; }; #endif diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index c9bd445c4976..318ea64b9800 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -2656,6 +2656,8 @@ static int wm8994_hw_params(struct snd_pcm_substream *substream, { struct snd_soc_codec *codec = dai->codec; struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec); + struct wm8994 *control = wm8994->wm8994; + struct wm8994_pdata *pdata = &control->pdata; int aif1_reg; int aif2_reg; int bclk_reg; @@ -2723,7 +2725,14 @@ static int wm8994_hw_params(struct snd_pcm_substream *substream, } wm8994->channels[id] = params_channels(params); - switch (params_channels(params)) { + if (pdata->max_channels_clocked[id] && + wm8994->channels[id] > pdata->max_channels_clocked[id]) { + dev_dbg(dai->dev, "Constraining channels to %d from %d\n", + pdata->max_channels_clocked[id], wm8994->channels[id]); + wm8994->channels[id] = pdata->max_channels_clocked[id]; + } + + switch (wm8994->channels[id]) { case 1: case 2: bclk_rate *= 2; @@ -2745,7 +2754,7 @@ static int wm8994_hw_params(struct snd_pcm_substream *substream, dev_dbg(dai->dev, "AIF%dCLK is %dHz, target BCLK %dHz\n", dai->id, wm8994->aifclk[id], bclk_rate); - if (params_channels(params) == 1 && + if (wm8994->channels[id] == 1 && (snd_soc_read(codec, aif1_reg) & 0x18) == 0x18) aif2 |= WM8994_AIF1_MONO; -- cgit From 4f1b07581613bf076b0dacdd9a3fb290d3caa227 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Jan 2013 15:38:48 +0000 Subject: mfd: wm5102: Add additional speaker control registers Signed-off-by: Mark Brown --- drivers/mfd/wm5102-tables.c | 2 ++ include/linux/mfd/arizona/registers.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index 8a6ce8c12505..7d01069c09db 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -1169,6 +1169,8 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg) case ARIZONA_NOISE_GATE_CONTROL: case ARIZONA_PDM_SPK1_CTRL_1: case ARIZONA_PDM_SPK1_CTRL_2: + case ARIZONA_SPK_CTRL_2: + case ARIZONA_SPK_CTRL_3: case ARIZONA_DAC_COMP_1: case ARIZONA_DAC_COMP_2: case ARIZONA_DAC_COMP_3: diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index a61ce90ecd3f..a47fd358016f 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -217,6 +217,8 @@ #define ARIZONA_PDM_SPK1_CTRL_2 0x491 #define ARIZONA_PDM_SPK2_CTRL_1 0x492 #define ARIZONA_PDM_SPK2_CTRL_2 0x493 +#define ARIZONA_SPK_CTRL_2 0x4B5 +#define ARIZONA_SPK_CTRL_3 0x4B6 #define ARIZONA_DAC_COMP_1 0x4DC #define ARIZONA_DAC_COMP_2 0x4DD #define ARIZONA_DAC_COMP_3 0x4DE -- cgit From 79617801ea0c0e6664cb497d4c1892c2ff407364 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 21 Mar 2013 22:22:03 +0100 Subject: filter: bpf_jit_comp: refactor and unify BPF JIT image dump output If bpf_jit_enable > 1, then we dump the emitted JIT compiled image after creation. Currently, only SPARC and PowerPC has similar output as in the reference implementation on x86_64. Make a small helper function in order to reduce duplicated code and make the dump output uniform across architectures x86_64, SPARC, PPC, ARM (e.g. on ARM flen, pass and proglen are currently not shown, but would be interesting to know as well), also for future BPF JIT implementations on other archs. Cc: Mircea Gherzan Cc: Matt Evans Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 5 ++--- arch/powerpc/net/bpf_jit_comp.c | 12 ++++-------- arch/sparc/net/bpf_jit_comp.c | 6 +----- arch/x86/net/bpf_jit_comp.c | 9 ++------- include/linux/filter.h | 10 ++++++++++ 5 files changed, 19 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a0bd8a755bdf..1a643ee8e082 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -918,9 +918,8 @@ void bpf_jit_compile(struct sk_filter *fp) #endif if (bpf_jit_enable > 1) - print_hex_dump(KERN_INFO, "BPF JIT code: ", - DUMP_PREFIX_ADDRESS, 16, 4, ctx.target, - alloc_size, false); + /* there are 2 passes here */ + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; out: diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index e834f1ec23c8..c427ae36374a 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -671,16 +671,12 @@ void bpf_jit_compile(struct sk_filter *fp) } if (bpf_jit_enable > 1) - pr_info("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + /* Note that we output the base address of the code_base + * rather than image, since opcodes are in code_base. + */ + bpf_jit_dump(flen, proglen, pass, code_base); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", - DUMP_PREFIX_ADDRESS, - 16, 1, code_base, - proglen, false); - bpf_flush_icache(code_base, code_base + (proglen/4)); /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 3109ca684a99..d36a85ebb5e0 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -795,13 +795,9 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; } if (bpf_jit_enable > 1) - pr_err("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + bpf_jit_dump(flen, proglen, pass, image); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, - 16, 1, image, proglen, false); bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 3cbe45381bbb..f66b54086ce5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -725,17 +725,12 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; } oldproglen = proglen; } + if (bpf_jit_enable > 1) - pr_err("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + bpf_jit_dump(flen, proglen, pass, image); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, - 16, 1, image, proglen, false); - bpf_flush_icache(image, image + proglen); - fp->bpf_func = (void *)image; } out: diff --git a/include/linux/filter.h b/include/linux/filter.h index d2059cb4e465..d7d25083130b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -50,6 +50,16 @@ extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, uns #ifdef CONFIG_BPF_JIT extern void bpf_jit_compile(struct sk_filter *fp); extern void bpf_jit_free(struct sk_filter *fp); + +static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, + u32 pass, void *image) +{ + pr_err("flen=%u proglen=%u pass=%u image=%p\n", + flen, proglen, pass, image); + if (image) + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, + 16, 1, image, proglen, false); +} #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else static inline void bpf_jit_compile(struct sk_filter *fp) -- cgit From f58b082aed43400c03e53beacc50a9f9eb23ac91 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Mar 2013 23:46:20 +0100 Subject: ACPI / scan: Add special handler for Intel Lynxpoint LPSS devices Devices on the Intel Lynxpoint Low Power Subsystem (LPSS) have some common features that aren't shared with any other platform devices, including the clock and LTR (Latency Tolerance Reporting) registers. It is better to handle those features in common code than to bother device drivers with doing that (I/O functionality-wise the LPSS devices are generally compatible with other devices that don't have those special registers and may be handled by the same drivers). The clock registers of the LPSS devices are now taken care of by the special clk-x86-lpss driver, but the MMIO mappings used for accessing those registers can also be used for accessing the LTR registers on those devices (LTR support for the Lynxpoint LPSS is going to be added by a subsequent patch). Thus it is convenient to add a special ACPI scan handler for the Lynxpoint LPSS devices that will create the MMIO mappings for accessing the clock (and LTR in the future) registers and will register the LPSS devices' clocks, so the clk-x86-lpss driver will only need to take care of the main Lynxpoint LPSS clock. Introduce a special ACPI scan handler for Intel Lynxpoint LPSS devices as described above. This also reduces overhead related to browsing the ACPI namespace in search of the LPSS devices before the registration of their clocks, removes some LPSS-specific (and somewhat ugly) code from acpi_platform.c and shrinks the overall code size slightly. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Acked-by: Mike Turquette --- drivers/acpi/Makefile | 1 + drivers/acpi/acpi_lpss.c | 163 +++++++++++++++++++++++++++++++++ drivers/acpi/acpi_platform.c | 40 +------- drivers/acpi/internal.h | 8 ++ drivers/acpi/scan.c | 1 + drivers/clk/x86/Makefile | 2 +- drivers/clk/x86/clk-lpss.c | 99 -------------------- drivers/clk/x86/clk-lpss.h | 36 -------- drivers/clk/x86/clk-lpt.c | 40 +------- include/linux/platform_data/clk-lpss.h | 18 ++++ 10 files changed, 195 insertions(+), 213 deletions(-) create mode 100644 drivers/acpi/acpi_lpss.c delete mode 100644 drivers/clk/x86/clk-lpss.c delete mode 100644 drivers/clk/x86/clk-lpss.h create mode 100644 include/linux/platform_data/clk-lpss.h (limited to 'include/linux') diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 474fcfeba66c..ecb743bf05a5 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -39,6 +39,7 @@ acpi-y += ec.o acpi-$(CONFIG_ACPI_DOCK) += dock.o acpi-y += pci_root.o pci_link.o pci_irq.o acpi-y += csrt.o +acpi-$(CONFIG_X86_INTEL_LPSS) += acpi_lpss.o acpi-y += acpi_platform.o acpi-y += power.o acpi-y += event.o diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c new file mode 100644 index 000000000000..823df46a3deb --- /dev/null +++ b/drivers/acpi/acpi_lpss.c @@ -0,0 +1,163 @@ +/* + * ACPI support for Intel Lynxpoint LPSS. + * + * Copyright (C) 2013, Intel Corporation + * Authors: Mika Westerberg + * Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +ACPI_MODULE_NAME("acpi_lpss"); + +#define LPSS_CLK_OFFSET 0x800 +#define LPSS_CLK_SIZE 0x04 + +struct lpss_device_desc { + bool clk_required; + const char *clk_parent; +}; + +struct lpss_private_data { + void __iomem *mmio_base; + resource_size_t mmio_size; + struct clk *clk; + const struct lpss_device_desc *dev_desc; +}; + +static struct lpss_device_desc lpt_dev_desc = { + .clk_required = true, + .clk_parent = "lpss_clk", +}; + +static const struct acpi_device_id acpi_lpss_device_ids[] = { + /* Lynxpoint LPSS devices */ + { "INT33C0", (unsigned long)&lpt_dev_desc }, + { "INT33C1", (unsigned long)&lpt_dev_desc }, + { "INT33C2", (unsigned long)&lpt_dev_desc }, + { "INT33C3", (unsigned long)&lpt_dev_desc }, + { "INT33C4", (unsigned long)&lpt_dev_desc }, + { "INT33C5", (unsigned long)&lpt_dev_desc }, + { "INT33C6", }, + { "INT33C7", }, + + { } +}; + +static int is_memory(struct acpi_resource *res, void *not_used) +{ + struct resource r; + return !acpi_dev_resource_memory(res, &r); +} + +/* LPSS main clock device. */ +static struct platform_device *lpss_clk_dev; + +static inline void lpt_register_clock_device(void) +{ + lpss_clk_dev = platform_device_register_simple("clk-lpt", -1, NULL, 0); +} + +static int register_device_clock(struct acpi_device *adev, + struct lpss_private_data *pdata) +{ + const struct lpss_device_desc *dev_desc = pdata->dev_desc; + + if (!lpss_clk_dev) + lpt_register_clock_device(); + + if (!dev_desc->clk_parent || !pdata->mmio_base + || pdata->mmio_size < LPSS_CLK_OFFSET + LPSS_CLK_SIZE) + return -ENODATA; + + pdata->clk = clk_register_gate(NULL, dev_name(&adev->dev), + dev_desc->clk_parent, 0, + pdata->mmio_base + LPSS_CLK_OFFSET, + 0, 0, NULL); + if (IS_ERR(pdata->clk)) + return PTR_ERR(pdata->clk); + + clk_register_clkdev(pdata->clk, NULL, dev_name(&adev->dev)); + return 0; +} + +static int acpi_lpss_create_device(struct acpi_device *adev, + const struct acpi_device_id *id) +{ + struct lpss_device_desc *dev_desc; + struct lpss_private_data *pdata; + struct resource_list_entry *rentry; + struct list_head resource_list; + int ret; + + dev_desc = (struct lpss_device_desc *)id->driver_data; + if (!dev_desc) + return acpi_create_platform_device(adev, id); + + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, is_memory, NULL); + if (ret < 0) + goto err_out; + + list_for_each_entry(rentry, &resource_list, node) + if (resource_type(&rentry->res) == IORESOURCE_MEM) { + pdata->mmio_size = resource_size(&rentry->res); + pdata->mmio_base = ioremap(rentry->res.start, + pdata->mmio_size); + pdata->dev_desc = dev_desc; + break; + } + + acpi_dev_free_resource_list(&resource_list); + + if (dev_desc->clk_required) { + ret = register_device_clock(adev, pdata); + if (ret) { + /* + * Skip the device, but don't terminate the namespace + * scan. + */ + ret = 0; + goto err_out; + } + } + + adev->driver_data = pdata; + ret = acpi_create_platform_device(adev, id); + if (ret > 0) + return ret; + + adev->driver_data = NULL; + + err_out: + kfree(pdata); + return ret; +} + +static struct acpi_scan_handler lpss_handler = { + .ids = acpi_lpss_device_ids, + .attach = acpi_lpss_create_device, +}; + +void __init acpi_lpss_init(void) +{ + if (!lpt_clk_init()) + acpi_scan_add_handler(&lpss_handler); +} diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 26fce4b8a632..fafec5ddf17f 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -22,9 +22,6 @@ ACPI_MODULE_NAME("platform"); -/* Flags for acpi_create_platform_device */ -#define ACPI_PLATFORM_CLK BIT(0) - /* * The following ACPI IDs are known to be suitable for representing as * platform devices. @@ -33,33 +30,9 @@ static const struct acpi_device_id acpi_platform_device_ids[] = { { "PNP0D40" }, - /* Haswell LPSS devices */ - { "INT33C0", ACPI_PLATFORM_CLK }, - { "INT33C1", ACPI_PLATFORM_CLK }, - { "INT33C2", ACPI_PLATFORM_CLK }, - { "INT33C3", ACPI_PLATFORM_CLK }, - { "INT33C4", ACPI_PLATFORM_CLK }, - { "INT33C5", ACPI_PLATFORM_CLK }, - { "INT33C6", ACPI_PLATFORM_CLK }, - { "INT33C7", ACPI_PLATFORM_CLK }, - { } }; -static int acpi_create_platform_clks(struct acpi_device *adev) -{ - static struct platform_device *pdev; - - /* Create Lynxpoint LPSS clocks */ - if (!pdev && !strncmp(acpi_device_hid(adev), "INT33C", 6)) { - pdev = platform_device_register_simple("clk-lpt", -1, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - } - - return 0; -} - /** * acpi_create_platform_device - Create platform device for ACPI device node * @adev: ACPI device node to create a platform device for. @@ -71,10 +44,9 @@ static int acpi_create_platform_clks(struct acpi_device *adev) * * Name of the platform device will be the same as @adev's. */ -static int acpi_create_platform_device(struct acpi_device *adev, - const struct acpi_device_id *id) +int acpi_create_platform_device(struct acpi_device *adev, + const struct acpi_device_id *id) { - unsigned long flags = id->driver_data; struct platform_device *pdev = NULL; struct acpi_device *acpi_parent; struct platform_device_info pdevinfo; @@ -83,14 +55,6 @@ static int acpi_create_platform_device(struct acpi_device *adev, struct resource *resources; int count; - if (flags & ACPI_PLATFORM_CLK) { - int ret = acpi_create_platform_clks(adev); - if (ret) { - dev_err(&adev->dev, "failed to create clocks\n"); - return ret; - } - } - /* If the ACPI node already has a physical device attached, skip it. */ if (adev->physical_node_count) return 0; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 3c94a732b4b3..e227819217fb 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -48,6 +48,11 @@ int acpi_debugfs_init(void); #else static inline void acpi_debugfs_init(void) { return; } #endif +#ifdef CONFIG_X86_INTEL_LPSS +void acpi_lpss_init(void); +#else +static inline void acpi_lpss_init(void) {} +#endif /* -------------------------------------------------------------------------- Device Node Initialization / Removal @@ -131,4 +136,7 @@ static inline void suspend_nvs_restore(void) {} -------------------------------------------------------------------------- */ struct platform_device; +int acpi_create_platform_device(struct acpi_device *adev, + const struct acpi_device_id *id); + #endif /* _ACPI_INTERNAL_H_ */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 5e7e991717d7..433a4e15019c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1788,6 +1788,7 @@ int __init acpi_scan_init(void) acpi_pci_root_init(); acpi_pci_link_init(); acpi_platform_init(); + acpi_lpss_init(); acpi_csrt_init(); acpi_container_init(); acpi_pci_slot_init(); diff --git a/drivers/clk/x86/Makefile b/drivers/clk/x86/Makefile index f9ba4fab0ddc..04781389d0fb 100644 --- a/drivers/clk/x86/Makefile +++ b/drivers/clk/x86/Makefile @@ -1,2 +1,2 @@ -clk-x86-lpss-objs := clk-lpss.o clk-lpt.o +clk-x86-lpss-objs := clk-lpt.o obj-$(CONFIG_X86_INTEL_LPSS) += clk-x86-lpss.o diff --git a/drivers/clk/x86/clk-lpss.c b/drivers/clk/x86/clk-lpss.c deleted file mode 100644 index b5e229f3c3d9..000000000000 --- a/drivers/clk/x86/clk-lpss.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Intel Low Power Subsystem clocks. - * - * Copyright (C) 2013, Intel Corporation - * Authors: Mika Westerberg - * Heikki Krogerus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -static int clk_lpss_is_mmio_resource(struct acpi_resource *res, void *data) -{ - struct resource r; - return !acpi_dev_resource_memory(res, &r); -} - -static acpi_status clk_lpss_find_mmio(acpi_handle handle, u32 level, - void *data, void **retval) -{ - struct resource_list_entry *rentry; - struct list_head resource_list; - struct acpi_device *adev; - const char *uid = data; - int ret; - - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - - if (uid) { - if (!adev->pnp.unique_id) - return AE_OK; - if (strcmp(uid, adev->pnp.unique_id)) - return AE_OK; - } - - INIT_LIST_HEAD(&resource_list); - ret = acpi_dev_get_resources(adev, &resource_list, - clk_lpss_is_mmio_resource, NULL); - if (ret < 0) - return AE_NO_MEMORY; - - list_for_each_entry(rentry, &resource_list, node) - if (resource_type(&rentry->res) == IORESOURCE_MEM) { - *(struct resource *)retval = rentry->res; - break; - } - - acpi_dev_free_resource_list(&resource_list); - return AE_OK; -} - -/** - * clk_register_lpss_gate - register LPSS clock gate - * @name: name of this clock gate - * @parent_name: parent clock name - * @hid: ACPI _HID of the device - * @uid: ACPI _UID of the device (optional) - * @offset: LPSS PRV_CLOCK_PARAMS offset - * - * Creates and registers LPSS clock gate. - */ -struct clk *clk_register_lpss_gate(const char *name, const char *parent_name, - const char *hid, const char *uid, - unsigned offset) -{ - struct resource res = { }; - void __iomem *mmio_base; - acpi_status status; - struct clk *clk; - - /* - * First try to look the device and its mmio resource from the - * ACPI namespace. - */ - status = acpi_get_devices(hid, clk_lpss_find_mmio, (void *)uid, - (void **)&res); - if (ACPI_FAILURE(status) || !res.start) - return ERR_PTR(-ENODEV); - - mmio_base = ioremap(res.start, resource_size(&res)); - if (!mmio_base) - return ERR_PTR(-ENOMEM); - - clk = clk_register_gate(NULL, name, parent_name, 0, mmio_base + offset, - 0, 0, NULL); - if (IS_ERR(clk)) - iounmap(mmio_base); - - return clk; -} diff --git a/drivers/clk/x86/clk-lpss.h b/drivers/clk/x86/clk-lpss.h deleted file mode 100644 index e9460f442297..000000000000 --- a/drivers/clk/x86/clk-lpss.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Intel Low Power Subsystem clock. - * - * Copyright (C) 2013, Intel Corporation - * Authors: Mika Westerberg - * Heikki Krogerus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __CLK_LPSS_H -#define __CLK_LPSS_H - -#include -#include -#include - -#ifdef CONFIG_ACPI -extern struct clk *clk_register_lpss_gate(const char *name, - const char *parent_name, - const char *hid, const char *uid, - unsigned offset); -#else -static inline struct clk *clk_register_lpss_gate(const char *name, - const char *parent_name, - const char *hid, - const char *uid, - unsigned offset) -{ - return ERR_PTR(-ENODEV); -} -#endif - -#endif /* __CLK_LPSS_H */ diff --git a/drivers/clk/x86/clk-lpt.c b/drivers/clk/x86/clk-lpt.c index 81298aeef7e3..5cf4f4686406 100644 --- a/drivers/clk/x86/clk-lpt.c +++ b/drivers/clk/x86/clk-lpt.c @@ -10,7 +10,6 @@ * published by the Free Software Foundation. */ -#include #include #include #include @@ -18,8 +17,6 @@ #include #include -#include "clk-lpss.h" - #define PRV_CLOCK_PARAMS 0x800 static int lpt_clk_probe(struct platform_device *pdev) @@ -34,40 +31,6 @@ static int lpt_clk_probe(struct platform_device *pdev) /* Shared DMA clock */ clk_register_clkdev(clk, "hclk", "INTL9C60.0.auto"); - - /* SPI clocks */ - clk = clk_register_lpss_gate("spi0_clk", "lpss_clk", "INT33C0", NULL, - PRV_CLOCK_PARAMS); - if (!IS_ERR(clk)) - clk_register_clkdev(clk, NULL, "INT33C0:00"); - - clk = clk_register_lpss_gate("spi1_clk", "lpss_clk", "INT33C1", NULL, - PRV_CLOCK_PARAMS); - if (!IS_ERR(clk)) - clk_register_clkdev(clk, NULL, "INT33C1:00"); - - /* I2C clocks */ - clk = clk_register_lpss_gate("i2c0_clk", "lpss_clk", "INT33C2", NULL, - PRV_CLOCK_PARAMS); - if (!IS_ERR(clk)) - clk_register_clkdev(clk, NULL, "INT33C2:00"); - - clk = clk_register_lpss_gate("i2c1_clk", "lpss_clk", "INT33C3", NULL, - PRV_CLOCK_PARAMS); - if (!IS_ERR(clk)) - clk_register_clkdev(clk, NULL, "INT33C3:00"); - - /* UART clocks */ - clk = clk_register_lpss_gate("uart0_clk", "lpss_clk", "INT33C4", NULL, - PRV_CLOCK_PARAMS); - if (!IS_ERR(clk)) - clk_register_clkdev(clk, NULL, "INT33C4:00"); - - clk = clk_register_lpss_gate("uart1_clk", "lpss_clk", "INT33C5", NULL, - PRV_CLOCK_PARAMS); - if (!IS_ERR(clk)) - clk_register_clkdev(clk, NULL, "INT33C5:00"); - return 0; } @@ -79,8 +42,7 @@ static struct platform_driver lpt_clk_driver = { .probe = lpt_clk_probe, }; -static int __init lpt_clk_init(void) +int __init lpt_clk_init(void) { return platform_driver_register(&lpt_clk_driver); } -arch_initcall(lpt_clk_init); diff --git a/include/linux/platform_data/clk-lpss.h b/include/linux/platform_data/clk-lpss.h new file mode 100644 index 000000000000..528e73ce46d2 --- /dev/null +++ b/include/linux/platform_data/clk-lpss.h @@ -0,0 +1,18 @@ +/* + * Intel Low Power Subsystem clocks. + * + * Copyright (C) 2013, Intel Corporation + * Authors: Mika Westerberg + * Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CLK_LPSS_H +#define __CLK_LPSS_H + +extern int lpt_clk_init(void); + +#endif /* __CLK_LPSS_H */ -- cgit From 19919226c3f20e6bf5de3df96432ce80ffd63ff2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Mar 2013 10:48:33 +0100 Subject: clockevents: Add missing tick_check_broadcast_expired() for CLOCKEVENTS=n Fengs build robot reports: arch/arm/kernel/process.c: In function 'cpu_idle': arch/arm/kernel/process.c:211:4: error: implicit declaration of function 'tick_check_broadcast_expired' [-Werror=implicit-function-declaration] Add the missing inline function for non clockevent builds Reported-by: Wu Fengguang Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 646aac136eed..464e229e7d84 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -193,6 +193,7 @@ static inline void clockevents_suspend(void) {} static inline void clockevents_resume(void) {} #define clockevents_notify(reason, arg) do { } while (0) +static inline int tick_check_broadcast_expired(void) { return 0; } #endif -- cgit From 19dde0bd71e3dffb03ddc509019e22250f4e20c0 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 21 Mar 2013 15:47:54 +0100 Subject: cfg80211: add P2P Notice of Absence attribute Add P2P Notice of Absence attribute structure. Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 4cf0c9e4dd99..d10b5bba3268 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1027,6 +1027,26 @@ enum ieee80211_p2p_attr_id { IEEE80211_P2P_ATTR_MAX }; +/* Notice of Absence attribute - described in P2P spec 4.1.14 */ +/* Typical max value used here */ +#define IEEE80211_P2P_NOA_DESC_MAX 4 + +struct ieee80211_p2p_noa_desc { + u8 count; + __le32 duration; + __le32 interval; + __le32 start_time; +} __packed; + +struct ieee80211_p2p_noa_attr { + u8 index; + u8 oppps_ctwindow; + struct ieee80211_p2p_noa_desc desc[IEEE80211_P2P_NOA_DESC_MAX]; +} __packed; + +#define IEEE80211_P2P_OPPPS_ENABLE_BIT BIT(7) +#define IEEE80211_P2P_OPPPS_CTWINDOW_MASK 0x7F + /** * struct ieee80211_bar - HT Block Ack Request * -- cgit From d79df329d0bd425c00856915b7b12f54dd100154 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Mar 2013 15:58:58 +0000 Subject: regulator: ab8500: Further populate initialisation registers This patch supplies access to some extra settings provided by the AB8500 regulator device. We also update some of the existing initialisation values in accordance with internal ST-Ericsson code submissions. This single patch was originally a collection of updates which have been squashed together to aid with clarity. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- arch/arm/mach-ux500/board-mop500-regulators.c | 40 +++++++++++++++++++++++++-- drivers/regulator/ab8500.c | 39 ++++++++++++++++++++++++-- include/linux/regulator/ab8500.h | 8 ++++++ 3 files changed, 81 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500-regulators.c b/arch/arm/mach-ux500/board-mop500-regulators.c index 2a17bc506cff..4b3c51905071 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.c +++ b/arch/arm/mach-ux500/board-mop500-regulators.c @@ -126,6 +126,7 @@ struct ab8500_regulator_reg_init ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { /* * VanaRequestCtrl = HP/LP depending on VxRequest + * VpllRequestCtrl = HP/LP depending on VxRequest * VextSupply1RequestCtrl = HP/LP depending on VxRequest */ INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL2, 0x00), @@ -142,12 +143,16 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { */ INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL4, 0x00), /* + * Vsmps1SysClkReq1HPValid = enabled + * Vsmps2SysClkReq1HPValid = enabled + * Vsmps3SysClkReq1HPValid = enabled * VanaSysClkReq1HPValid = disabled + * VpllSysClkReq1HPValid = enabled * Vaux1SysClkReq1HPValid = disabled * Vaux2SysClkReq1HPValid = disabled * Vaux3SysClkReq1HPValid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID1, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID1, 0x17), /* * VextSupply1SysClkReq1HPValid = disabled * VextSupply2SysClkReq1HPValid = disabled @@ -233,6 +238,34 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * Vamic2_dzout = high-Z when Vamic2 is disabled */ INIT_REGULATOR_REGISTER(AB8500_REGUCTRL1VAMIC, 0x00), + /* + * VBBN = force OFF + * VBBP = force OFF + * NOTE! PRCMU register + */ + INIT_REGULATOR_REGISTER(AB8500_ARMREGU2, 0x00), + /* + * VBBNSel1 = VBBP = VBBPFB + * VBBPSel1 = 0 V + * NOTE! PRCMU register + */ + INIT_REGULATOR_REGISTER(AB8500_VBBSEL1, 0x00), + /* + * VBBNSel2 = VBBP = VBBPFB + * VBBPSel2 = 0 V + * NOTE! PRCMU register + */ + INIT_REGULATOR_REGISTER(AB8500_VBBSEL2, 0x00), + /* + * Vsmps1Regu = HW control + * Vsmps1SelCtrl = Vsmps1 voltage defined by Vsmsp1Sel2 + */ + INIT_REGULATOR_REGISTER(AB8500_VSMPS1REGU, 0x06), + /* + * Vsmps2Regu = HW control + * Vsmps2SelCtrl = Vsmps2 voltage defined by Vsmsp2Sel2 + */ + INIT_REGULATOR_REGISTER(AB8500_VSMPS2REGU, 0x06), /* * VPll = Hw controlled * VanaRegu = force off @@ -257,9 +290,10 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { */ INIT_REGULATOR_REGISTER(AB8500_VAUX12REGU, 0x01), /* - * Vaux3regu = force off + * Vrf1Regu = HW control + * Vaux3Regu = force off */ - INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3REGU, 0x00), + INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3REGU, 0x08), /* * Vsmps1 = 1.15V */ diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 09014f38a948..4d9d556a47cc 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -477,7 +477,7 @@ struct ab8500_reg_init { static struct ab8500_reg_init ab8500_reg_init[] = { /* * 0x30, VanaRequestCtrl - * 0x0C, VpllRequestCtrl + * 0x0c, VpllRequestCtrl * 0xc0, VextSupply1RequestCtrl */ REG_INIT(AB8500_REGUREQUESTCTRL2, 0x03, 0x04, 0xfc), @@ -494,12 +494,16 @@ static struct ab8500_reg_init ab8500_reg_init[] = { */ REG_INIT(AB8500_REGUREQUESTCTRL4, 0x03, 0x06, 0x07), /* + * 0x01, Vsmps1SysClkReq1HPValid + * 0x02, Vsmps2SysClkReq1HPValid + * 0x04, Vsmps3SysClkReq1HPValid * 0x08, VanaSysClkReq1HPValid + * 0x10, VpllSysClkReq1HPValid * 0x20, Vaux1SysClkReq1HPValid * 0x40, Vaux2SysClkReq1HPValid * 0x80, Vaux3SysClkReq1HPValid */ - REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID1, 0x03, 0x07, 0xe8), + REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID1, 0x03, 0x07, 0xff), /* * 0x10, VextSupply1SysClkReq1HPValid * 0x20, VextSupply2SysClkReq1HPValid @@ -577,6 +581,34 @@ static struct ab8500_reg_init ab8500_reg_init[] = { * 0x02, Vamic2_dzout */ REG_INIT(AB8500_REGUCTRL1VAMIC, 0x03, 0x84, 0x03), + /* + * 0x0c, VBBNRegu + * 0x03, VBBPRegu + * NOTE! PRCMU register + */ + REG_INIT(AB8500_ARMREGU2, 0x04, 0x01, 0x0f), + /* + * 0x0c, VBBPSel1 + * 0x03, VBBNSel1 + * NOTE! PRCMU register + */ + REG_INIT(AB8500_VBBSEL1, 0x04, 0x11, 0x0f), + /* + * 0x0c, VBBNSel2 + * 0x03, VBBPSel2 + * NOTE! PRCMU register + */ + REG_INIT(AB8500_VBBSEL2, 0x04, 0x12, 0x0f), + /* + * 0x03, Vsmps1Regu + * 0x0c, Vsmps1SelCtrl + */ + REG_INIT(AB8500_VSMPS1REGU, 0x04, 0x03, 0x0f), + /* + * 0x03, Vsmps2Regu + * 0x0c, Vsmps2SelCtrl + */ + REG_INIT(AB8500_VSMPS2REGU, 0x04, 0x04, 0x0f), /* * 0x0c, VanaRegu * 0x03, VpllRegu @@ -601,9 +633,10 @@ static struct ab8500_reg_init ab8500_reg_init[] = { */ REG_INIT(AB8500_VAUX12REGU, 0x04, 0x09, 0x0f), /* + * 0x0c, Vrf1Regu * 0x03, Vaux3Regu */ - REG_INIT(AB8500_VRF1VAUX3REGU, 0x04, 0x0a, 0x03), + REG_INIT(AB8500_VRF1VAUX3REGU, 0x04, 0x0a, 0x0f), /* * 0x3f, Vsmps1Sel1 */ diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 7bd73bbdfd1b..2c6c9625013c 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -86,7 +86,15 @@ enum ab8500_regulator_reg { AB8500_REGUCTRL2SPARE, AB8500_REGUCTRLDISCH, AB8500_REGUCTRLDISCH2, + AB8500_ARMREGU2, /* NOTE! PRCMU register */ + AB8500_VBBSEL1, /* NOTE! PRCMU register */ + AB8500_VBBSEL2, /* NOTE! PRCMU register */ + AB8500_VSMPS1REGU, + AB8500_VSMPS2REGU, + AB8500_VSMPS3REGU, /* NOTE! PRCMU register */ AB8500_VSMPS1SEL1, + AB8500_VSMPS3SEL1, /* NOTE! PRCMU register */ + AB8500_VSMPS3SEL2, /* NOTE! PRCMU register */ AB8500_NUM_REGULATOR_REGISTERS, }; -- cgit From 3c1b8438d4bc99269aba560739e3e6cb640584f4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Mar 2013 15:59:01 +0000 Subject: ARM: ux500: regulators: Add mask for configuration There is already before a register mask in the regulator driver to allow some bits of a register to be initialized. The register value is defined in the board configuration. This patch puts a mask in the board configuration to specify which bits should actually be altered. The purpose with this patch is to avoid future mistakes when updating the allowed bits in the regulator driver. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- arch/arm/mach-ux500/board-mop500-regulators.c | 66 +++++++++++++-------------- drivers/regulator/ab8500.c | 36 ++++++--------- include/linux/regulator/ab8500.h | 10 ++-- 3 files changed, 53 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500-regulators.c b/arch/arm/mach-ux500/board-mop500-regulators.c index 4b3c51905071..96dd17490bea 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.c +++ b/arch/arm/mach-ux500/board-mop500-regulators.c @@ -129,19 +129,19 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * VpllRequestCtrl = HP/LP depending on VxRequest * VextSupply1RequestCtrl = HP/LP depending on VxRequest */ - INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL2, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL2, 0xfc, 0x00), /* * VextSupply2RequestCtrl = HP/LP depending on VxRequest * VextSupply3RequestCtrl = HP/LP depending on VxRequest * Vaux1RequestCtrl = HP/LP depending on VxRequest * Vaux2RequestCtrl = HP/LP depending on VxRequest */ - INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL3, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL3, 0xff, 0x00), /* * Vaux3RequestCtrl = HP/LP depending on VxRequest * SwHPReq = Control through SWValid disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL4, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL4, 0x07, 0x00), /* * Vsmps1SysClkReq1HPValid = enabled * Vsmps2SysClkReq1HPValid = enabled @@ -152,44 +152,44 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * Vaux2SysClkReq1HPValid = disabled * Vaux3SysClkReq1HPValid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID1, 0x17), + INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID1, 0xff, 0x17), /* * VextSupply1SysClkReq1HPValid = disabled * VextSupply2SysClkReq1HPValid = disabled * VextSupply3SysClkReq1HPValid = SysClkReq1 controlled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID2, 0x40), + INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID2, 0x70, 0x40), /* * VanaHwHPReq1Valid = disabled * Vaux1HwHPreq1Valid = disabled * Vaux2HwHPReq1Valid = disabled * Vaux3HwHPReqValid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ1VALID1, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ1VALID1, 0xe8, 0x00), /* * VextSupply1HwHPReq1Valid = disabled * VextSupply2HwHPReq1Valid = disabled * VextSupply3HwHPReq1Valid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ1VALID2, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ1VALID2, 0x07, 0x00), /* * VanaHwHPReq2Valid = disabled * Vaux1HwHPReq2Valid = disabled * Vaux2HwHPReq2Valid = disabled * Vaux3HwHPReq2Valid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ2VALID1, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ2VALID1, 0xe8, 0x00), /* * VextSupply1HwHPReq2Valid = disabled * VextSupply2HwHPReq2Valid = disabled * VextSupply3HwHPReq2Valid = HWReq2 controlled */ - INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ2VALID2, 0x04), + INIT_REGULATOR_REGISTER(AB8500_REGUHWHPREQ2VALID2, 0x07, 0x04), /* * VanaSwHPReqValid = disabled * Vaux1SwHPReqValid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSWHPREQVALID1, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUSWHPREQVALID1, 0xa0, 0x00), /* * Vaux2SwHPReqValid = disabled * Vaux3SwHPReqValid = disabled @@ -197,7 +197,7 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * VextSupply2SwHPReqValid = disabled * VextSupply3SwHPReqValid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSWHPREQVALID2, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUSWHPREQVALID2, 0x1f, 0x00), /* * SysClkReq2Valid1 = SysClkReq2 controlled * SysClkReq3Valid1 = disabled @@ -207,7 +207,7 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * SysClkReq7Valid1 = disabled * SysClkReq8Valid1 = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQVALID1, 0x2a), + INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQVALID1, 0xfe, 0x2a), /* * SysClkReq2Valid2 = disabled * SysClkReq3Valid2 = disabled @@ -217,7 +217,7 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * SysClkReq7Valid2 = disabled * SysClkReq8Valid2 = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQVALID2, 0x20), + INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQVALID2, 0xfe, 0x20), /* * VTVoutEna = disabled * Vintcore12Ena = disabled @@ -225,57 +225,57 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * Vintcore12LP = inactive (HP) * VTVoutLP = inactive (HP) */ - INIT_REGULATOR_REGISTER(AB8500_REGUMISC1, 0x10), + INIT_REGULATOR_REGISTER(AB8500_REGUMISC1, 0xfe, 0x10), /* * VaudioEna = disabled * VdmicEna = disabled * Vamic1Ena = disabled * Vamic2Ena = disabled */ - INIT_REGULATOR_REGISTER(AB8500_VAUDIOSUPPLY, 0x00), + INIT_REGULATOR_REGISTER(AB8500_VAUDIOSUPPLY, 0x1e, 0x00), /* * Vamic1_dzout = high-Z when Vamic1 is disabled * Vamic2_dzout = high-Z when Vamic2 is disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUCTRL1VAMIC, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUCTRL1VAMIC, 0x03, 0x00), /* * VBBN = force OFF * VBBP = force OFF * NOTE! PRCMU register */ - INIT_REGULATOR_REGISTER(AB8500_ARMREGU2, 0x00), + INIT_REGULATOR_REGISTER(AB8500_ARMREGU2, 0x0f, 0x00), /* * VBBNSel1 = VBBP = VBBPFB * VBBPSel1 = 0 V * NOTE! PRCMU register */ - INIT_REGULATOR_REGISTER(AB8500_VBBSEL1, 0x00), + INIT_REGULATOR_REGISTER(AB8500_VBBSEL1, 0x0f, 0x00), /* * VBBNSel2 = VBBP = VBBPFB * VBBPSel2 = 0 V * NOTE! PRCMU register */ - INIT_REGULATOR_REGISTER(AB8500_VBBSEL2, 0x00), + INIT_REGULATOR_REGISTER(AB8500_VBBSEL2, 0x0f, 0x00), /* * Vsmps1Regu = HW control * Vsmps1SelCtrl = Vsmps1 voltage defined by Vsmsp1Sel2 */ - INIT_REGULATOR_REGISTER(AB8500_VSMPS1REGU, 0x06), + INIT_REGULATOR_REGISTER(AB8500_VSMPS1REGU, 0x0f, 0x06), /* * Vsmps2Regu = HW control * Vsmps2SelCtrl = Vsmps2 voltage defined by Vsmsp2Sel2 */ - INIT_REGULATOR_REGISTER(AB8500_VSMPS2REGU, 0x06), + INIT_REGULATOR_REGISTER(AB8500_VSMPS2REGU, 0x0f, 0x06), /* * VPll = Hw controlled * VanaRegu = force off */ - INIT_REGULATOR_REGISTER(AB8500_VPLLVANAREGU, 0x02), + INIT_REGULATOR_REGISTER(AB8500_VPLLVANAREGU, 0x0f, 0x02), /* * VrefDDREna = disabled * VrefDDRSleepMode = inactive (no pulldown) */ - INIT_REGULATOR_REGISTER(AB8500_VREFDDR, 0x00), + INIT_REGULATOR_REGISTER(AB8500_VREFDDR, 0x03, 0x00), /* * VextSupply1Regu = HW control * VextSupply2Regu = HW control @@ -283,37 +283,37 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * ExtSupply2Bypass = ExtSupply12LPn ball is 0 when Ena is 0 * ExtSupply3Bypass = ExtSupply3LPn ball is 0 when Ena is 0 */ - INIT_REGULATOR_REGISTER(AB8500_EXTSUPPLYREGU, 0x2a), + INIT_REGULATOR_REGISTER(AB8500_EXTSUPPLYREGU, 0xff, 0x1a), /* * Vaux1Regu = force HP * Vaux2Regu = force off */ - INIT_REGULATOR_REGISTER(AB8500_VAUX12REGU, 0x01), + INIT_REGULATOR_REGISTER(AB8500_VAUX12REGU, 0x0f, 0x01), /* * Vrf1Regu = HW control * Vaux3Regu = force off */ - INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3REGU, 0x08), + INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3REGU, 0x0f, 0x08), /* * Vsmps1 = 1.15V */ - INIT_REGULATOR_REGISTER(AB8500_VSMPS1SEL1, 0x24), + INIT_REGULATOR_REGISTER(AB8500_VSMPS1SEL1, 0x3f, 0x24), /* * Vaux1Sel = 2.5 V */ - INIT_REGULATOR_REGISTER(AB8500_VAUX1SEL, 0x08), + INIT_REGULATOR_REGISTER(AB8500_VAUX1SEL, 0x0f, 0x08), /* * Vaux2Sel = 2.9 V */ - INIT_REGULATOR_REGISTER(AB8500_VAUX2SEL, 0x0d), + INIT_REGULATOR_REGISTER(AB8500_VAUX2SEL, 0x0f, 0x0d), /* * Vaux3Sel = 2.91 V */ - INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3SEL, 0x07), + INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3SEL, 0x07, 0x07), /* * VextSupply12LP = disabled (no LP) */ - INIT_REGULATOR_REGISTER(AB8500_REGUCTRL2SPARE, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUCTRL2SPARE, 0x01, 0x00), /* * Vaux1Disch = short discharge time * Vaux2Disch = short discharge time @@ -322,13 +322,13 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * VTVoutDisch = short discharge time * VaudioDisch = short discharge time */ - INIT_REGULATOR_REGISTER(AB8500_REGUCTRLDISCH, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUCTRLDISCH, 0xfc, 0x00), /* * VanaDisch = short discharge time * VdmicPullDownEna = pulldown disabled when Vdmic is disabled * VdmicDisch = short discharge time */ - INIT_REGULATOR_REGISTER(AB8500_REGUCTRLDISCH2, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUCTRLDISCH2, 0x16, 0x00), }; /* AB8500 regulators */ diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 3465ac38bffe..a847744f8c20 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -811,23 +811,20 @@ static struct ab8500_reg_init ab8500_reg_init[] = { REG_INIT(AB8500_REGUCTRLDISCH2, 0x04, 0x44, 0x16), }; -static int -ab8500_regulator_init_registers(struct platform_device *pdev, int id, int value) +static int ab8500_regulator_init_registers(struct platform_device *pdev, + int id, int mask, int value) { int err; - if (value & ~ab8500_reg_init[id].mask) { - dev_err(&pdev->dev, - "Configuration error: value outside mask.\n"); - return -EINVAL; - } + BUG_ON(value & ~mask); + BUG_ON(mask & ~ab8500_reg_init[id].mask); + /* initialize register */ err = abx500_mask_and_set_register_interruptible( &pdev->dev, ab8500_reg_init[id].bank, ab8500_reg_init[id].addr, - ab8500_reg_init[id].mask, - value); + mask, value); if (err < 0) { dev_err(&pdev->dev, "Failed to initialize 0x%02x, 0x%02x.\n", @@ -835,13 +832,11 @@ ab8500_regulator_init_registers(struct platform_device *pdev, int id, int value) ab8500_reg_init[id].addr); return err; } - dev_vdbg(&pdev->dev, - "init: 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", - ab8500_reg_init[id].bank, - ab8500_reg_init[id].addr, - ab8500_reg_init[id].mask, - value); + " init: 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", + ab8500_reg_init[id].bank, + ab8500_reg_init[id].addr, + mask, value); return 0; } @@ -960,19 +955,16 @@ static int ab8500_regulator_probe(struct platform_device *pdev) /* initialize registers */ for (i = 0; i < pdata->num_regulator_reg_init; i++) { - int id, value; + int id, mask, value; id = pdata->regulator_reg_init[i].id; + mask = pdata->regulator_reg_init[i].mask; value = pdata->regulator_reg_init[i].value; /* check for configuration errors */ - if (id >= AB8500_NUM_REGULATOR_REGISTERS) { - dev_err(&pdev->dev, - "Configuration error: id outside range.\n"); - return -EINVAL; - } + BUG_ON(id >= AB8500_NUM_REGULATOR_REGISTERS); - err = ab8500_regulator_init_registers(pdev, id, value); + err = ab8500_regulator_init_registers(pdev, id, mask, value); if (err < 0) return err; } diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 2c6c9625013c..a1d245f13d9c 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -48,13 +48,15 @@ enum ab9540_regulator_id { /* AB8500 and AB9540 register initialization */ struct ab8500_regulator_reg_init { int id; + u8 mask; u8 value; }; -#define INIT_REGULATOR_REGISTER(_id, _value) \ - { \ - .id = _id, \ - .value = _value, \ +#define INIT_REGULATOR_REGISTER(_id, _mask, _value) \ + { \ + .id = _id, \ + .mask = _mask, \ + .value = _value, \ } /* AB8500 registers */ -- cgit From 33bc8f46a8ee3fc1836def9713933435b7ff0b90 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Mar 2013 15:59:02 +0000 Subject: regulator: ab8500: Another push to synchronise recent AB8500 developments This patch ensures that many of the recent developments pertaining to the AB8500 regulator device are propagated out into the public arena. It aims to update some of the existing initialisation values in accordance with internal ST-Ericsson code submissions. This single patch was originally a collection of updates which have been squashed together to aid with clarity. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- arch/arm/mach-ux500/board-mop500-regulators.c | 18 ------ drivers/regulator/ab8500.c | 86 +++++++++++++++++---------- include/linux/regulator/ab8500.h | 21 ++++--- 3 files changed, 65 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500-regulators.c b/arch/arm/mach-ux500/board-mop500-regulators.c index 96dd17490bea..a8141e3e8ca1 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.c +++ b/arch/arm/mach-ux500/board-mop500-regulators.c @@ -238,24 +238,6 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { * Vamic2_dzout = high-Z when Vamic2 is disabled */ INIT_REGULATOR_REGISTER(AB8500_REGUCTRL1VAMIC, 0x03, 0x00), - /* - * VBBN = force OFF - * VBBP = force OFF - * NOTE! PRCMU register - */ - INIT_REGULATOR_REGISTER(AB8500_ARMREGU2, 0x0f, 0x00), - /* - * VBBNSel1 = VBBP = VBBPFB - * VBBPSel1 = 0 V - * NOTE! PRCMU register - */ - INIT_REGULATOR_REGISTER(AB8500_VBBSEL1, 0x0f, 0x00), - /* - * VBBNSel2 = VBBP = VBBPFB - * VBBPSel2 = 0 V - * NOTE! PRCMU register - */ - INIT_REGULATOR_REGISTER(AB8500_VBBSEL2, 0x0f, 0x00), /* * Vsmps1Regu = HW control * Vsmps1SelCtrl = Vsmps1 voltage defined by Vsmsp1Sel2 diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index a847744f8c20..c7784c4bff4f 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -613,11 +613,19 @@ struct ab8500_reg_init { static struct ab8500_reg_init ab8500_reg_init[] = { /* - * 0x30, VanaRequestCtrl + * 0x03, VarmRequestCtrl + * 0x0c, VapeRequestCtrl + * 0x30, Vsmps1RequestCtrl + * 0xc0, Vsmps2RequestCtrl + */ + REG_INIT(AB8500_REGUREQUESTCTRL1, 0x03, 0x03, 0xff), + /* + * 0x03, Vsmps3RequestCtrl * 0x0c, VpllRequestCtrl + * 0x30, VanaRequestCtrl * 0xc0, VextSupply1RequestCtrl */ - REG_INIT(AB8500_REGUREQUESTCTRL2, 0x03, 0x04, 0xfc), + REG_INIT(AB8500_REGUREQUESTCTRL2, 0x03, 0x04, 0xff), /* * 0x03, VextSupply2RequestCtrl * 0x0c, VextSupply3RequestCtrl @@ -642,50 +650,71 @@ static struct ab8500_reg_init ab8500_reg_init[] = { */ REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID1, 0x03, 0x07, 0xff), /* + * 0x01, VapeSysClkReq1HPValid + * 0x02, VarmSysClkReq1HPValid + * 0x04, VbbSysClkReq1HPValid + * 0x08, VmodSysClkReq1HPValid * 0x10, VextSupply1SysClkReq1HPValid * 0x20, VextSupply2SysClkReq1HPValid * 0x40, VextSupply3SysClkReq1HPValid */ - REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID2, 0x03, 0x08, 0x70), + REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID2, 0x03, 0x08, 0x7f), /* + * 0x01, Vsmps1HwHPReq1Valid + * 0x02, Vsmps2HwHPReq1Valid + * 0x04, Vsmps3HwHPReq1Valid * 0x08, VanaHwHPReq1Valid + * 0x10, VpllHwHPReq1Valid * 0x20, Vaux1HwHPReq1Valid * 0x40, Vaux2HwHPReq1Valid * 0x80, Vaux3HwHPReq1Valid */ - REG_INIT(AB8500_REGUHWHPREQ1VALID1, 0x03, 0x09, 0xe8), + REG_INIT(AB8500_REGUHWHPREQ1VALID1, 0x03, 0x09, 0xff), /* * 0x01, VextSupply1HwHPReq1Valid * 0x02, VextSupply2HwHPReq1Valid * 0x04, VextSupply3HwHPReq1Valid + * 0x08, VmodHwHPReq1Valid */ - REG_INIT(AB8500_REGUHWHPREQ1VALID2, 0x03, 0x0a, 0x07), + REG_INIT(AB8500_REGUHWHPREQ1VALID2, 0x03, 0x0a, 0x0f), /* + * 0x01, Vsmps1HwHPReq2Valid + * 0x02, Vsmps2HwHPReq2Valid + * 0x03, Vsmps3HwHPReq2Valid * 0x08, VanaHwHPReq2Valid + * 0x10, VpllHwHPReq2Valid * 0x20, Vaux1HwHPReq2Valid * 0x40, Vaux2HwHPReq2Valid * 0x80, Vaux3HwHPReq2Valid */ - REG_INIT(AB8500_REGUHWHPREQ2VALID1, 0x03, 0x0b, 0xe8), + REG_INIT(AB8500_REGUHWHPREQ2VALID1, 0x03, 0x0b, 0xff), /* * 0x01, VextSupply1HwHPReq2Valid * 0x02, VextSupply2HwHPReq2Valid * 0x04, VextSupply3HwHPReq2Valid + * 0x08, VmodHwHPReq2Valid */ - REG_INIT(AB8500_REGUHWHPREQ2VALID2, 0x03, 0x0c, 0x07), + REG_INIT(AB8500_REGUHWHPREQ2VALID2, 0x03, 0x0c, 0x0f), /* + * 0x01, VapeSwHPReqValid + * 0x02, VarmSwHPReqValid + * 0x04, Vsmps1SwHPReqValid + * 0x08, Vsmps2SwHPReqValid + * 0x10, Vsmps3SwHPReqValid * 0x20, VanaSwHPReqValid + * 0x40, VpllSwHPReqValid * 0x80, Vaux1SwHPReqValid */ - REG_INIT(AB8500_REGUSWHPREQVALID1, 0x03, 0x0d, 0xa0), + REG_INIT(AB8500_REGUSWHPREQVALID1, 0x03, 0x0d, 0xff), /* * 0x01, Vaux2SwHPReqValid * 0x02, Vaux3SwHPReqValid * 0x04, VextSupply1SwHPReqValid * 0x08, VextSupply2SwHPReqValid * 0x10, VextSupply3SwHPReqValid + * 0x20, VmodSwHPReqValid */ - REG_INIT(AB8500_REGUSWHPREQVALID2, 0x03, 0x0e, 0x1f), + REG_INIT(AB8500_REGUSWHPREQVALID2, 0x03, 0x0e, 0x3f), /* * 0x02, SysClkReq2Valid1 * ... @@ -718,37 +747,23 @@ static struct ab8500_reg_init ab8500_reg_init[] = { * 0x02, Vamic2_dzout */ REG_INIT(AB8500_REGUCTRL1VAMIC, 0x03, 0x84, 0x03), - /* - * 0x0c, VBBNRegu - * 0x03, VBBPRegu - * NOTE! PRCMU register - */ - REG_INIT(AB8500_ARMREGU2, 0x04, 0x01, 0x0f), - /* - * 0x0c, VBBPSel1 - * 0x03, VBBNSel1 - * NOTE! PRCMU register - */ - REG_INIT(AB8500_VBBSEL1, 0x04, 0x11, 0x0f), - /* - * 0x0c, VBBNSel2 - * 0x03, VBBPSel2 - * NOTE! PRCMU register - */ - REG_INIT(AB8500_VBBSEL2, 0x04, 0x12, 0x0f), /* * 0x03, Vsmps1Regu * 0x0c, Vsmps1SelCtrl + * 0x10, Vsmps1AutoMode + * 0x20, Vsmps1PWMMode */ - REG_INIT(AB8500_VSMPS1REGU, 0x04, 0x03, 0x0f), + REG_INIT(AB8500_VSMPS1REGU, 0x04, 0x03, 0x3f), /* * 0x03, Vsmps2Regu * 0x0c, Vsmps2SelCtrl + * 0x10, Vsmps2AutoMode + * 0x20, Vsmps2PWMMode */ - REG_INIT(AB8500_VSMPS2REGU, 0x04, 0x04, 0x0f), + REG_INIT(AB8500_VSMPS2REGU, 0x04, 0x04, 0x3f), /* - * 0x0c, VanaRegu * 0x03, VpllRegu + * 0x0c, VanaRegu */ REG_INIT(AB8500_VPLLVANAREGU, 0x04, 0x06, 0x0f), /* @@ -788,13 +803,16 @@ static struct ab8500_reg_init ab8500_reg_init[] = { REG_INIT(AB8500_VAUX2SEL, 0x04, 0x20, 0x0f), /* * 0x07, Vaux3Sel + * 0x30, Vrf1Sel */ - REG_INIT(AB8500_VRF1VAUX3SEL, 0x04, 0x21, 0x07), + REG_INIT(AB8500_VRF1VAUX3SEL, 0x04, 0x21, 0x37), /* * 0x01, VextSupply12LP */ REG_INIT(AB8500_REGUCTRL2SPARE, 0x04, 0x22, 0x01), /* + * 0x01, VpllDisch + * 0x02, Vrf1Disch * 0x04, Vaux1Disch * 0x08, Vaux2Disch * 0x10, Vaux3Disch @@ -802,13 +820,15 @@ static struct ab8500_reg_init ab8500_reg_init[] = { * 0x40, VTVoutDisch * 0x80, VaudioDisch */ - REG_INIT(AB8500_REGUCTRLDISCH, 0x04, 0x43, 0xfc), + REG_INIT(AB8500_REGUCTRLDISCH, 0x04, 0x43, 0xff), /* + * 0x01, VsimDisch * 0x02, VanaDisch * 0x04, VdmicPullDownEna + * 0x08, VpllPullDownEna * 0x10, VdmicDisch */ - REG_INIT(AB8500_REGUCTRLDISCH2, 0x04, 0x44, 0x16), + REG_INIT(AB8500_REGUCTRLDISCH2, 0x04, 0x44, 0x1f), }; static int ab8500_regulator_init_registers(struct platform_device *pdev, diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index a1d245f13d9c..dd7944f735d8 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -61,6 +61,7 @@ struct ab8500_regulator_reg_init { /* AB8500 registers */ enum ab8500_regulator_reg { + AB8500_REGUREQUESTCTRL1, AB8500_REGUREQUESTCTRL2, AB8500_REGUREQUESTCTRL3, AB8500_REGUREQUESTCTRL4, @@ -77,26 +78,28 @@ enum ab8500_regulator_reg { AB8500_REGUMISC1, AB8500_VAUDIOSUPPLY, AB8500_REGUCTRL1VAMIC, + AB8500_VSMPS1REGU, + AB8500_VSMPS2REGU, + AB8500_VSMPS3REGU, /* NOTE! PRCMU register */ AB8500_VPLLVANAREGU, AB8500_VREFDDR, AB8500_EXTSUPPLYREGU, AB8500_VAUX12REGU, AB8500_VRF1VAUX3REGU, + AB8500_VSMPS1SEL1, + AB8500_VSMPS1SEL2, + AB8500_VSMPS1SEL3, + AB8500_VSMPS2SEL1, + AB8500_VSMPS2SEL2, + AB8500_VSMPS2SEL3, + AB8500_VSMPS3SEL1, /* NOTE! PRCMU register */ + AB8500_VSMPS3SEL2, /* NOTE! PRCMU register */ AB8500_VAUX1SEL, AB8500_VAUX2SEL, AB8500_VRF1VAUX3SEL, AB8500_REGUCTRL2SPARE, AB8500_REGUCTRLDISCH, AB8500_REGUCTRLDISCH2, - AB8500_ARMREGU2, /* NOTE! PRCMU register */ - AB8500_VBBSEL1, /* NOTE! PRCMU register */ - AB8500_VBBSEL2, /* NOTE! PRCMU register */ - AB8500_VSMPS1REGU, - AB8500_VSMPS2REGU, - AB8500_VSMPS3REGU, /* NOTE! PRCMU register */ - AB8500_VSMPS1SEL1, - AB8500_VSMPS3SEL1, /* NOTE! PRCMU register */ - AB8500_VSMPS3SEL2, /* NOTE! PRCMU register */ AB8500_NUM_REGULATOR_REGISTERS, }; -- cgit From 732805a563617aafc7405409c03182afafb3943b Mon Sep 17 00:00:00 2001 From: Bengt Jonsson Date: Thu, 21 Mar 2013 15:59:03 +0000 Subject: regulator: ab8500: Separate regulator and MFD platform data The ab8500 MFD should not have knowledge about regulator- specific platform data like number of regulators and regulator registers. As the regulator platform data is about to grow with external regulators, this information is moved to a new structure provided by the regulator driver. Signed-off-by: Bengt Jonsson Signed-off-by: Lee Jones Reviewed-by: Yvan FILLION Signed-off-by: Mark Brown --- arch/arm/mach-ux500/board-mop500-regulators.c | 12 +++++++++--- arch/arm/mach-ux500/board-mop500-regulators.h | 4 +--- drivers/regulator/ab8500.c | 21 +++++++++++++++------ include/linux/regulator/ab8500.h | 7 +++++++ 4 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500-regulators.c b/arch/arm/mach-ux500/board-mop500-regulators.c index a8141e3e8ca1..0fd84d42e1ec 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.c +++ b/arch/arm/mach-ux500/board-mop500-regulators.c @@ -122,8 +122,7 @@ static struct regulator_consumer_supply ab8500_vana_consumers[] = { }; /* ab8500 regulator register initialization */ -struct ab8500_regulator_reg_init -ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { +static struct ab8500_regulator_reg_init ab8500_reg_init[] = { /* * VanaRequestCtrl = HP/LP depending on VxRequest * VpllRequestCtrl = HP/LP depending on VxRequest @@ -314,7 +313,7 @@ ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS] = { }; /* AB8500 regulators */ -struct regulator_init_data ab8500_regulators[AB8500_NUM_REGULATORS] = { +static struct regulator_init_data ab8500_regulators[AB8500_NUM_REGULATORS] = { /* supplies to the display/camera */ [AB8500_LDO_AUX1] = { .constraints = { @@ -423,3 +422,10 @@ struct regulator_init_data ab8500_regulators[AB8500_NUM_REGULATORS] = { .consumer_supplies = ab8500_vana_consumers, }, }; + +struct ab8500_regulator_platform_data ab8500_regulator_plat_data = { + .reg_init = ab8500_reg_init, + .num_reg_init = ARRAY_SIZE(ab8500_reg_init), + .regulator = ab8500_regulators, + .num_regulator = ARRAY_SIZE(ab8500_regulators), +}; diff --git a/arch/arm/mach-ux500/board-mop500-regulators.h b/arch/arm/mach-ux500/board-mop500-regulators.h index 78a0642a2206..9ca4869a6f23 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.h +++ b/arch/arm/mach-ux500/board-mop500-regulators.h @@ -14,9 +14,7 @@ #include #include -extern struct ab8500_regulator_reg_init -ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS]; -extern struct regulator_init_data ab8500_regulators[AB8500_NUM_REGULATORS]; +extern struct ab8500_regulator_platform_data ab8500_regulator_plat_data; extern struct regulator_init_data tps61052_regulator; extern struct regulator_init_data gpio_en_3v3_regulator; diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index c7784c4bff4f..f7d1f538c200 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -939,8 +939,9 @@ ab8500_regulator_of_probe(struct platform_device *pdev, struct device_node *np) static int ab8500_regulator_probe(struct platform_device *pdev) { struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent); - struct ab8500_platform_data *pdata; struct device_node *np = pdev->dev.of_node; + struct ab8500_platform_data *ppdata; + struct ab8500_regulator_platform_data *pdata; int i, err; if (np) { @@ -961,7 +962,14 @@ static int ab8500_regulator_probe(struct platform_device *pdev) dev_err(&pdev->dev, "null mfd parent\n"); return -EINVAL; } - pdata = dev_get_platdata(ab8500->dev); + + ppdata = dev_get_platdata(ab8500->dev); + if (!ppdata) { + dev_err(&pdev->dev, "null parent pdata\n"); + return -EINVAL; + } + + pdata = ppdata->regulator; if (!pdata) { dev_err(&pdev->dev, "null pdata\n"); return -EINVAL; @@ -974,12 +982,12 @@ static int ab8500_regulator_probe(struct platform_device *pdev) } /* initialize registers */ - for (i = 0; i < pdata->num_regulator_reg_init; i++) { + for (i = 0; i < pdata->num_reg_init; i++) { int id, mask, value; - id = pdata->regulator_reg_init[i].id; - mask = pdata->regulator_reg_init[i].mask; - value = pdata->regulator_reg_init[i].value; + id = pdata->reg_init[i].id; + mask = pdata->reg_init[i].mask; + value = pdata->reg_init[i].value; /* check for configuration errors */ BUG_ON(id >= AB8500_NUM_REGULATOR_REGISTERS); @@ -1045,5 +1053,6 @@ module_exit(ab8500_regulator_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Sundar Iyer "); +MODULE_AUTHOR("Bengt Jonsson "); MODULE_DESCRIPTION("Regulator Driver for ST-Ericsson AB8500 Mixed-Sig PMIC"); MODULE_ALIAS("platform:ab8500-regulator"); diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index dd7944f735d8..3a8e02687f7b 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -152,4 +152,11 @@ enum ab9540_regulator_reg { AB9540_NUM_REGULATOR_REGISTERS, }; +struct ab8500_regulator_platform_data { + int num_reg_init; + struct ab8500_regulator_reg_init *reg_init; + int num_regulator; + struct regulator_init_data *regulator; +}; + #endif -- cgit From 9d0ca6ed6f2f12eb488f450d5d38d047aa402a53 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 21 Mar 2013 14:17:34 +0000 Subject: virtio: remove obsolete virtqueue_get_queue_index() You can access it directly now, since 3.8: v3.7-rc1-13-g06ca287 'virtio: move queue_index and num_free fields into core struct virtqueue.' Cc: Cornelia Huck Signed-off-by: Rusty Russell Acked-by: Cornelia Huck Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- drivers/s390/kvm/virtio_ccw.c | 6 +++--- include/linux/virtio.h | 6 ------ 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 57ac4b0294bc..f7d67e8eb1aa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -154,7 +154,7 @@ struct padded_vnet_hdr { */ static int vq2txq(struct virtqueue *vq) { - return (virtqueue_get_queue_index(vq) - 1) / 2; + return (vq->index - 1) / 2; } static int txq2vq(int txq) @@ -164,7 +164,7 @@ static int txq2vq(int txq) static int vq2rxq(struct virtqueue *vq) { - return virtqueue_get_queue_index(vq) / 2; + return vq->index / 2; } static int rxq2vq(int rxq) diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 2029b6caa595..fb877b59ec57 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -166,7 +166,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq) vcdev = to_vc_device(info->vq->vdev); ccw_device_get_schid(vcdev->cdev, &schid); - do_kvm_notify(schid, virtqueue_get_queue_index(vq)); + do_kvm_notify(schid, vq->index); } static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, @@ -188,7 +188,7 @@ static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) unsigned long flags; unsigned long size; int ret; - unsigned int index = virtqueue_get_queue_index(vq); + unsigned int index = vq->index; /* Remove from our list. */ spin_lock_irqsave(&vcdev->lock, flags); @@ -610,7 +610,7 @@ static struct virtqueue *virtio_ccw_vq_by_ind(struct virtio_ccw_device *vcdev, vq = NULL; spin_lock_irqsave(&vcdev->lock, flags); list_for_each_entry(info, &vcdev->virtqueues, node) { - if (virtqueue_get_queue_index(info->vq) == index) { + if (info->vq->index == index) { vq = info->vq; break; } diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ff6714e6d0f5..2d7a5e045908 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -58,12 +58,6 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq); unsigned int virtqueue_get_vring_size(struct virtqueue *vq); -/* FIXME: Obsolete accessor, but required for virtio_net merge. */ -static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq) -{ - return vq->index; -} - /** * virtio_device - representation of a device using virtio * @index: unique position on the virtio bus -- cgit From c3a07134e6aa5b93a37f72ffa3d11fadf72bf757 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Mar 2013 03:39:28 +0000 Subject: mv643xx_eth: convert to use the Marvell Orion MDIO driver This patch converts the Marvell MV643XX ethernet driver to use the Marvell Orion MDIO driver. As a result, PowerPC and ARM platforms registering the Marvell MV643XX ethernet driver are also updated to register a Marvell Orion MDIO driver. This driver voluntarily overlaps with the Marvell Ethernet shared registers because it will use a subset of this shared register (shared_base + 0x4 to shared_base + 0x84). The Ethernet driver is also updated to look up for a PHY device using the Orion MDIO bus driver. For ARM and PowerPC we register a single instance of the "mvmdio" driver in the system like it used to be done with the use of the "shared_smi" platform_data cookie on ARM. Note that it is safe to register the mvmdio driver only for the "ge00" instance of the driver because this "ge00" interface is guaranteed to always be explicitely registered by consumers of arch/arm/plat-orion/common.c and other instances (ge01, ge10 and ge11) were all pointing their shared_smi to ge00. For PowerPC the in-tree Device Tree Source files mention only one MV643XX ethernet MAC instance so the MDIO bus driver is registered only when id == 0. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- arch/arm/plat-orion/common.c | 54 ++++---- arch/powerpc/platforms/chrp/pegasos_eth.c | 20 +++ arch/powerpc/sysdev/mv64x60_dev.c | 16 ++- drivers/net/ethernet/marvell/Kconfig | 5 +- drivers/net/ethernet/marvell/Makefile | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 195 +++-------------------------- include/linux/mv643xx_eth.h | 1 - 7 files changed, 84 insertions(+), 209 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 2d4b6414609f..251f827271e9 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -238,6 +238,7 @@ static __init void ge_complete( struct mv643xx_eth_shared_platform_data *orion_ge_shared_data, struct resource *orion_ge_resource, unsigned long irq, struct platform_device *orion_ge_shared, + struct platform_device *orion_ge_mvmdio, struct mv643xx_eth_platform_data *eth_data, struct platform_device *orion_ge) { @@ -247,6 +248,8 @@ static __init void ge_complete( orion_ge->dev.platform_data = eth_data; platform_device_register(orion_ge_shared); + if (orion_ge_mvmdio) + platform_device_register(orion_ge_mvmdio); platform_device_register(orion_ge); } @@ -258,8 +261,6 @@ struct mv643xx_eth_shared_platform_data orion_ge00_shared_data; static struct resource orion_ge00_shared_resources[] = { { .name = "ge00 base", - }, { - .name = "ge00 err irq", }, }; @@ -271,6 +272,19 @@ static struct platform_device orion_ge00_shared = { }, }; +static struct resource orion_ge_mvmdio_resources[] = { + { + .name = "ge00 mvmdio base", + }, { + .name = "ge00 mvmdio err irq", + }, +}; + +static struct platform_device orion_ge_mvmdio = { + .name = "orion-mdio", + .id = -1, +}; + static struct resource orion_ge00_resources[] = { { .name = "ge00 irq", @@ -295,26 +309,25 @@ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data, unsigned int tx_csum_limit) { fill_resources(&orion_ge00_shared, orion_ge00_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); + fill_resources(&orion_ge_mvmdio, orion_ge_mvmdio_resources, + mapbase + 0x2004, 0x84 - 1, irq_err); orion_ge00_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge00_shared_data, orion_ge00_resources, irq, &orion_ge00_shared, + &orion_ge_mvmdio, eth_data, &orion_ge00); } /***************************************************************************** * GE01 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge01_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge01_shared_data; static struct resource orion_ge01_shared_resources[] = { { .name = "ge01 base", - }, { - .name = "ge01 err irq", - }, + } }; static struct platform_device orion_ge01_shared = { @@ -349,26 +362,23 @@ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data, unsigned int tx_csum_limit) { fill_resources(&orion_ge01_shared, orion_ge01_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); orion_ge01_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge01_shared_data, orion_ge01_resources, irq, &orion_ge01_shared, + NULL, eth_data, &orion_ge01); } /***************************************************************************** * GE10 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge10_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge10_shared_data; static struct resource orion_ge10_shared_resources[] = { { .name = "ge10 base", - }, { - .name = "ge10 err irq", - }, + } }; static struct platform_device orion_ge10_shared = { @@ -402,24 +412,21 @@ void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data, unsigned long irq_err) { fill_resources(&orion_ge10_shared, orion_ge10_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); ge_complete(&orion_ge10_shared_data, orion_ge10_resources, irq, &orion_ge10_shared, + NULL, eth_data, &orion_ge10); } /***************************************************************************** * GE11 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge11_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge11_shared_data; static struct resource orion_ge11_shared_resources[] = { { .name = "ge11 base", - }, { - .name = "ge11 err irq", }, }; @@ -454,9 +461,10 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, unsigned long irq_err) { fill_resources(&orion_ge11_shared, orion_ge11_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); ge_complete(&orion_ge11_shared_data, orion_ge11_resources, irq, &orion_ge11_shared, + NULL, eth_data, &orion_ge11); } diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c index 039fc8e82199..2b4dc6abde6c 100644 --- a/arch/powerpc/platforms/chrp/pegasos_eth.c +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -47,6 +47,25 @@ static struct platform_device mv643xx_eth_shared_device = { .resource = mv643xx_eth_shared_resources, }; +/* + * The orion mdio driver only covers shared + 0x4 up to shared + 0x84 - 1 + */ +static struct resource mv643xx_eth_mvmdio_resources[] = { + [0] = { + .name = "ethernet mdio base", + .start = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x4, + .end = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x83, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device mv643xx_eth_mvmdio_device = { + .name = "orion-mdio", + .id = -1, + .num_resources = ARRAY_SIZE(mv643xx_eth_mvmdio_resources), + .resource = mv643xx_eth_shared_resources, +}; + static struct resource mv643xx_eth_port1_resources[] = { [0] = { .name = "eth port1 irq", @@ -82,6 +101,7 @@ static struct platform_device eth_port1_device = { static struct platform_device *mv643xx_eth_pd_devs[] __initdata = { &mv643xx_eth_shared_device, + &mv643xx_eth_mvmdio_device, ð_port1_device, }; diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index 0f6af41ebb44..4a25c26f0bf4 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -214,15 +214,27 @@ static struct platform_device * __init mv64x60_eth_register_shared_pdev( struct device_node *np, int id) { struct platform_device *pdev; - struct resource r[1]; + struct resource r[2]; int err; err = of_address_to_resource(np, 0, &r[0]); if (err) return ERR_PTR(err); + /* register an orion mdio bus driver */ + r[1].start = r[0].start + 0x4; + r[1].end = r[0].start + 0x84 - 1; + r[1].flags = IORESOURCE_MEM; + + if (id == 0) { + pdev = platform_device_register_simple("orion-mdio", -1, &r[1], 1); + if (!pdev) + return pdev; + } + pdev = platform_device_register_simple(MV643XX_ETH_SHARED_NAME, id, - r, 1); + &r[0], 1); + return pdev; } diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index edfba9370922..5170ecb00acc 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -23,6 +23,7 @@ config MV643XX_ETH depends on (MV64X60 || PPC32 || PLAT_ORION) && INET select INET_LRO select PHYLIB + select MVMDIO ---help--- This driver supports the gigabit ethernet MACs in the Marvell Discovery PPC/MIPS chipset family (MV643XX) and @@ -38,9 +39,7 @@ config MVMDIO interface units of the Marvell EBU SoCs (Kirkwood, Orion5x, Dove, Armada 370 and Armada XP). - For now, this driver is only needed for the MVNETA driver - (used on Armada 370 and XP), but it could be used in the - future by the MV643XX_ETH driver. + This driver is used by the MV643XX_ETH and MVNETA drivers. config MVNETA tristate "Marvell Armada 370/XP network interface support" diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile index 7f63b4aac434..5c4a7765ff0e 100644 --- a/drivers/net/ethernet/marvell/Makefile +++ b/drivers/net/ethernet/marvell/Makefile @@ -2,8 +2,8 @@ # Makefile for the Marvell device drivers. # -obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o obj-$(CONFIG_MVMDIO) += mvmdio.o +obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o obj-$(CONFIG_MVNETA) += mvneta.o obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_SKGE) += skge.o diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index d1ecf4bf7da7..a65a92ef19ec 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -69,14 +69,6 @@ static char mv643xx_eth_driver_version[] = "1.4"; * Registers shared between all ports. */ #define PHY_ADDR 0x0000 -#define SMI_REG 0x0004 -#define SMI_BUSY 0x10000000 -#define SMI_READ_VALID 0x08000000 -#define SMI_OPCODE_READ 0x04000000 -#define SMI_OPCODE_WRITE 0x00000000 -#define ERR_INT_CAUSE 0x0080 -#define ERR_INT_SMI_DONE 0x00000010 -#define ERR_INT_MASK 0x0084 #define WINDOW_BASE(w) (0x0200 + ((w) << 3)) #define WINDOW_SIZE(w) (0x0204 + ((w) << 3)) #define WINDOW_REMAP_HIGH(w) (0x0280 + ((w) << 2)) @@ -265,25 +257,6 @@ struct mv643xx_eth_shared_private { */ void __iomem *base; - /* - * Points at the right SMI instance to use. - */ - struct mv643xx_eth_shared_private *smi; - - /* - * Provides access to local SMI interface. - */ - struct mii_bus *smi_bus; - - /* - * If we have access to the error interrupt pin (which is - * somewhat misnamed as it not only reflects internal errors - * but also reflects SMI completion), use that to wait for - * SMI access completion instead of polling the SMI busy bit. - */ - int err_interrupt; - wait_queue_head_t smi_busy_wait; - /* * Per-port MBUS window access register value. */ @@ -1122,97 +1095,6 @@ out_write: wrlp(mp, PORT_SERIAL_CONTROL, pscr); } -static irqreturn_t mv643xx_eth_err_irq(int irq, void *dev_id) -{ - struct mv643xx_eth_shared_private *msp = dev_id; - - if (readl(msp->base + ERR_INT_CAUSE) & ERR_INT_SMI_DONE) { - writel(~ERR_INT_SMI_DONE, msp->base + ERR_INT_CAUSE); - wake_up(&msp->smi_busy_wait); - return IRQ_HANDLED; - } - - return IRQ_NONE; -} - -static int smi_is_done(struct mv643xx_eth_shared_private *msp) -{ - return !(readl(msp->base + SMI_REG) & SMI_BUSY); -} - -static int smi_wait_ready(struct mv643xx_eth_shared_private *msp) -{ - if (msp->err_interrupt == NO_IRQ) { - int i; - - for (i = 0; !smi_is_done(msp); i++) { - if (i == 10) - return -ETIMEDOUT; - msleep(10); - } - - return 0; - } - - if (!smi_is_done(msp)) { - wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp), - msecs_to_jiffies(100)); - if (!smi_is_done(msp)) - return -ETIMEDOUT; - } - - return 0; -} - -static int smi_bus_read(struct mii_bus *bus, int addr, int reg) -{ - struct mv643xx_eth_shared_private *msp = bus->priv; - void __iomem *smi_reg = msp->base + SMI_REG; - int ret; - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - writel(SMI_OPCODE_READ | (reg << 21) | (addr << 16), smi_reg); - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - ret = readl(smi_reg); - if (!(ret & SMI_READ_VALID)) { - pr_warn("SMI bus read not valid\n"); - return -ENODEV; - } - - return ret & 0xffff; -} - -static int smi_bus_write(struct mii_bus *bus, int addr, int reg, u16 val) -{ - struct mv643xx_eth_shared_private *msp = bus->priv; - void __iomem *smi_reg = msp->base + SMI_REG; - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - writel(SMI_OPCODE_WRITE | (reg << 21) | - (addr << 16) | (val & 0xffff), smi_reg); - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - return 0; -} - - /* statistics ***************************************************************/ static struct net_device_stats *mv643xx_eth_get_stats(struct net_device *dev) { @@ -2687,47 +2569,6 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) if (msp->base == NULL) goto out_free; - /* - * Set up and register SMI bus. - */ - if (pd == NULL || pd->shared_smi == NULL) { - msp->smi_bus = mdiobus_alloc(); - if (msp->smi_bus == NULL) - goto out_unmap; - - msp->smi_bus->priv = msp; - msp->smi_bus->name = "mv643xx_eth smi"; - msp->smi_bus->read = smi_bus_read; - msp->smi_bus->write = smi_bus_write, - snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%s-%d", - pdev->name, pdev->id); - msp->smi_bus->parent = &pdev->dev; - msp->smi_bus->phy_mask = 0xffffffff; - if (mdiobus_register(msp->smi_bus) < 0) - goto out_free_mii_bus; - msp->smi = msp; - } else { - msp->smi = platform_get_drvdata(pd->shared_smi); - } - - msp->err_interrupt = NO_IRQ; - init_waitqueue_head(&msp->smi_busy_wait); - - /* - * Check whether the error interrupt is hooked up. - */ - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (res != NULL) { - int err; - - err = request_irq(res->start, mv643xx_eth_err_irq, - IRQF_SHARED, "mv643xx_eth", msp); - if (!err) { - writel(ERR_INT_SMI_DONE, msp->base + ERR_INT_MASK); - msp->err_interrupt = res->start; - } - } - /* * (Re-)program MBUS remapping windows if we are asked to. */ @@ -2743,10 +2584,6 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) return 0; -out_free_mii_bus: - mdiobus_free(msp->smi_bus); -out_unmap: - iounmap(msp->base); out_free: kfree(msp); out: @@ -2756,14 +2593,7 @@ out: static int mv643xx_eth_shared_remove(struct platform_device *pdev) { struct mv643xx_eth_shared_private *msp = platform_get_drvdata(pdev); - struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data; - if (pd == NULL || pd->shared_smi == NULL) { - mdiobus_unregister(msp->smi_bus); - mdiobus_free(msp->smi_bus); - } - if (msp->err_interrupt != NO_IRQ) - free_irq(msp->err_interrupt, msp); iounmap(msp->base); kfree(msp); @@ -2826,14 +2656,21 @@ static void set_params(struct mv643xx_eth_private *mp, mp->txq_count = pd->tx_queue_count ? : 1; } +static void mv643xx_eth_adjust_link(struct net_device *dev) +{ + struct mv643xx_eth_private *mp = netdev_priv(dev); + + mv643xx_adjust_pscr(mp); +} + static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, int phy_addr) { - struct mii_bus *bus = mp->shared->smi->smi_bus; struct phy_device *phydev; int start; int num; int i; + char phy_id[MII_BUS_ID_SIZE + 3]; if (phy_addr == MV643XX_ETH_PHY_ADDR_DEFAULT) { start = phy_addr_get(mp) & 0x1f; @@ -2843,17 +2680,19 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, num = 1; } + /* Attempt to connect to the PHY using orion-mdio */ phydev = NULL; for (i = 0; i < num; i++) { int addr = (start + i) & 0x1f; - if (bus->phy_map[addr] == NULL) - mdiobus_scan(bus, addr); + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, + "orion-mdio-mii", addr); - if (phydev == NULL) { - phydev = bus->phy_map[addr]; - if (phydev != NULL) - phy_addr_set(mp, addr); + phydev = phy_connect(mp->dev, phy_id, mv643xx_eth_adjust_link, + PHY_INTERFACE_MODE_GMII); + if (!IS_ERR(phydev)) { + phy_addr_set(mp, addr); + break; } } @@ -2866,8 +2705,6 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex) phy_reset(mp); - phy_attach(mp->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_GMII); - if (speed == 0) { phy->autoneg = AUTONEG_ENABLE; phy->speed = 0; diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 49258e0ed1c6..141d395bbb5f 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -19,7 +19,6 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; - struct platform_device *shared_smi; /* * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default * limit of 9KiB will be used. -- cgit From 51e7e8b632d8e564ba494dfa61358ac1a97e4ceb Mon Sep 17 00:00:00 2001 From: Bernie Thompson Date: Wed, 27 Feb 2013 12:19:17 -0800 Subject: mmc: core: Add in support to expose PRV for v4 MMCs The JEDEC MMC v4 spec defines a new PRV value in place of the original fwrev and hwrev specified in v1. We can expose this in the kernel to enable user space to more easily determine the product revision of a given MMC. Signed-off-by: Bernie Thompson Reviewed-by: Ulf Hansson Signed-off-by: Chris Ball --- Documentation/mmc/mmc-dev-attrs.txt | 1 + drivers/mmc/core/mmc.c | 3 +++ include/linux/mmc/card.h | 1 + 3 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt index 0d98fac8893b..189bab09255a 100644 --- a/Documentation/mmc/mmc-dev-attrs.txt +++ b/Documentation/mmc/mmc-dev-attrs.txt @@ -22,6 +22,7 @@ All attributes are read-only. manfid Manufacturer ID (from CID Register) name Product Name (from CID Register) oemid OEM/Application ID (from CID Register) + prv Product Revision (from CID Register) (SD and MMCv4 only) serial Product Serial Number (from CID Register) erase_size Erase group size preferred_erase_size Preferred erase size diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index c8f3d6e0684e..d584f7ca168c 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -96,6 +96,7 @@ static int mmc_decode_cid(struct mmc_card *card) card->cid.prod_name[3] = UNSTUFF_BITS(resp, 72, 8); card->cid.prod_name[4] = UNSTUFF_BITS(resp, 64, 8); card->cid.prod_name[5] = UNSTUFF_BITS(resp, 56, 8); + card->cid.prv = UNSTUFF_BITS(resp, 48, 8); card->cid.serial = UNSTUFF_BITS(resp, 16, 32); card->cid.month = UNSTUFF_BITS(resp, 12, 4); card->cid.year = UNSTUFF_BITS(resp, 8, 4) + 1997; @@ -627,6 +628,7 @@ MMC_DEV_ATTR(hwrev, "0x%x\n", card->cid.hwrev); MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); +MMC_DEV_ATTR(prv, "0x%x\n", card->cid.prv); MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial); MMC_DEV_ATTR(enhanced_area_offset, "%llu\n", card->ext_csd.enhanced_area_offset); @@ -645,6 +647,7 @@ static struct attribute *mmc_std_attrs[] = { &dev_attr_manfid.attr, &dev_attr_name.attr, &dev_attr_oemid.attr, + &dev_attr_prv.attr, &dev_attr_serial.attr, &dev_attr_enhanced_area_offset.attr, &dev_attr_enhanced_area_size.attr, diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 61b2c30c903b..f31725ba49f3 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -17,6 +17,7 @@ struct mmc_cid { unsigned int manfid; char prod_name[8]; + unsigned char prv; unsigned int serial; unsigned short oemid; unsigned short year; -- cgit From eed222aca8d077af3600b651176f6fd04d95cce1 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Tue, 5 Mar 2013 11:24:52 +0800 Subject: mmc: sdio: bind acpi with sdio function device ACPI spec 5 defined the _ADR encoding for sdio bus as: High word - slot number (0 based) Low word - function number This patch adds support for binding sdio function device with acpi node, and if successful, involve acpi into its power management. Signed-off-by: Aaron Lu Reviewed-by: Adrian Hunter Signed-off-by: Chris Ball --- drivers/mmc/core/sdio_bus.c | 20 +++++++++++++++++++- drivers/mmc/host/sdhci-pci.c | 1 + include/linux/mmc/host.h | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 5e57048e2c1d..8d6bb1821834 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -299,6 +300,19 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card) return func; } +#ifdef CONFIG_ACPI +static void sdio_acpi_set_handle(struct sdio_func *func) +{ + struct mmc_host *host = func->card->host; + u64 addr = (host->slotno << 16) | func->num; + + ACPI_HANDLE_SET(&func->dev, + acpi_get_child(ACPI_HANDLE(host->parent), addr)); +} +#else +static inline void sdio_acpi_set_handle(struct sdio_func *func) {} +#endif + /* * Register a new SDIO function with the driver model. */ @@ -308,9 +322,12 @@ int sdio_add_func(struct sdio_func *func) dev_set_name(&func->dev, "%s:%d", mmc_card_id(func->card), func->num); + sdio_acpi_set_handle(func); ret = device_add(&func->dev); - if (ret == 0) + if (ret == 0) { sdio_func_set_present(func); + acpi_dev_pm_attach(&func->dev, false); + } return ret; } @@ -326,6 +343,7 @@ void sdio_remove_func(struct sdio_func *func) if (!sdio_func_present(func)) return; + acpi_dev_pm_detach(&func->dev, false); device_del(&func->dev); put_device(&func->dev); } diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index c7ccf3034dad..3dee22d098e9 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -1279,6 +1279,7 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot( } host->mmc->pm_caps = MMC_PM_KEEP_POWER | MMC_PM_WAKE_SDIO_IRQ; + host->mmc->slotno = slotno; ret = sdhci_add_host(host); if (ret) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index d6f20cc6415e..17d714801e94 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -361,6 +361,8 @@ struct mmc_host { unsigned int actual_clock; /* Actual HC clock rate */ + unsigned int slotno; /* used for sdio acpi binding */ + unsigned long private[0] ____cacheline_aligned; }; -- cgit From ce4f3313b05c836c21a91ac89f87dccf84ce9561 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 22 Mar 2013 14:07:53 +0200 Subject: clk: add table lookup to mux Add a table lookup feature to the mux clock. Also allow arbitrary masks instead of the width. This will be used by some clocks on Tegra114. Also adapt the tegra periph clk because it uses struct clk_mux directly. Signed-off-by: Peter De Schrijver Tested-by: Stephen Warren Signed-off-by: Mike Turquette --- drivers/clk/clk-mux.c | 50 ++++++++++++++++++++++++++++++++++---------- drivers/clk/tegra/clk.h | 27 +++++++++++++++++------- include/linux/clk-private.h | 2 +- include/linux/clk-provider.h | 9 +++++++- 4 files changed, 67 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index 508c032edce4..25b1734560d0 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -32,6 +32,7 @@ static u8 clk_mux_get_parent(struct clk_hw *hw) { struct clk_mux *mux = to_clk_mux(hw); + int num_parents = __clk_get_num_parents(hw->clk); u32 val; /* @@ -42,7 +43,16 @@ static u8 clk_mux_get_parent(struct clk_hw *hw) * val = 0x4 really means "bit 2, index starts at bit 0" */ val = readl(mux->reg) >> mux->shift; - val &= (1 << mux->width) - 1; + val &= mux->mask; + + if (mux->table) { + int i; + + for (i = 0; i < num_parents; i++) + if (mux->table[i] == val) + return i; + return -EINVAL; + } if (val && (mux->flags & CLK_MUX_INDEX_BIT)) val = ffs(val) - 1; @@ -50,7 +60,7 @@ static u8 clk_mux_get_parent(struct clk_hw *hw) if (val && (mux->flags & CLK_MUX_INDEX_ONE)) val--; - if (val >= __clk_get_num_parents(hw->clk)) + if (val >= num_parents) return -EINVAL; return val; @@ -62,17 +72,22 @@ static int clk_mux_set_parent(struct clk_hw *hw, u8 index) u32 val; unsigned long flags = 0; - if (mux->flags & CLK_MUX_INDEX_BIT) - index = (1 << ffs(index)); + if (mux->table) + index = mux->table[index]; - if (mux->flags & CLK_MUX_INDEX_ONE) - index++; + else { + if (mux->flags & CLK_MUX_INDEX_BIT) + index = (1 << ffs(index)); + + if (mux->flags & CLK_MUX_INDEX_ONE) + index++; + } if (mux->lock) spin_lock_irqsave(mux->lock, flags); val = readl(mux->reg); - val &= ~(((1 << mux->width) - 1) << mux->shift); + val &= ~(mux->mask << mux->shift); val |= index << mux->shift; writel(val, mux->reg); @@ -88,10 +103,10 @@ const struct clk_ops clk_mux_ops = { }; EXPORT_SYMBOL_GPL(clk_mux_ops); -struct clk *clk_register_mux(struct device *dev, const char *name, +struct clk *clk_register_mux_table(struct device *dev, const char *name, const char **parent_names, u8 num_parents, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, - u8 clk_mux_flags, spinlock_t *lock) + void __iomem *reg, u8 shift, u32 mask, + u8 clk_mux_flags, u32 *table, spinlock_t *lock) { struct clk_mux *mux; struct clk *clk; @@ -113,9 +128,10 @@ struct clk *clk_register_mux(struct device *dev, const char *name, /* struct clk_mux assignments */ mux->reg = reg; mux->shift = shift; - mux->width = width; + mux->mask = mask; mux->flags = clk_mux_flags; mux->lock = lock; + mux->table = table; mux->hw.init = &init; clk = clk_register(dev, &mux->hw); @@ -125,3 +141,15 @@ struct clk *clk_register_mux(struct device *dev, const char *name, return clk; } + +struct clk *clk_register_mux(struct device *dev, const char *name, + const char **parent_names, u8 num_parents, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_mux_flags, spinlock_t *lock) +{ + u32 mask = BIT(width) - 1; + + return clk_register_mux_table(dev, name, parent_names, num_parents, + flags, reg, shift, mask, clk_mux_flags, + NULL, lock); +} diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h index 0744731c6229..a09d7dcaf183 100644 --- a/drivers/clk/tegra/clk.h +++ b/drivers/clk/tegra/clk.h @@ -355,15 +355,16 @@ struct clk *tegra_clk_register_periph_nodiv(const char *name, struct tegra_clk_periph *periph, void __iomem *clk_base, u32 offset); -#define TEGRA_CLK_PERIPH(_mux_shift, _mux_width, _mux_flags, \ +#define TEGRA_CLK_PERIPH(_mux_shift, _mux_mask, _mux_flags, \ _div_shift, _div_width, _div_frac_width, \ _div_flags, _clk_num, _enb_refcnt, _regs, \ - _gate_flags) \ + _gate_flags, _table) \ { \ .mux = { \ .flags = _mux_flags, \ .shift = _mux_shift, \ - .width = _mux_width, \ + .mask = _mux_mask, \ + .table = _table, \ }, \ .divider = { \ .flags = _div_flags, \ @@ -393,26 +394,36 @@ struct tegra_periph_init_data { const char *dev_id; }; -#define TEGRA_INIT_DATA(_name, _con_id, _dev_id, _parent_names, _offset, \ - _mux_shift, _mux_width, _mux_flags, _div_shift, \ +#define TEGRA_INIT_DATA_TABLE(_name, _con_id, _dev_id, _parent_names, _offset,\ + _mux_shift, _mux_mask, _mux_flags, _div_shift, \ _div_width, _div_frac_width, _div_flags, _regs, \ - _clk_num, _enb_refcnt, _gate_flags, _clk_id) \ + _clk_num, _enb_refcnt, _gate_flags, _clk_id, _table) \ { \ .name = _name, \ .clk_id = _clk_id, \ .parent_names = _parent_names, \ .num_parents = ARRAY_SIZE(_parent_names), \ - .periph = TEGRA_CLK_PERIPH(_mux_shift, _mux_width, \ + .periph = TEGRA_CLK_PERIPH(_mux_shift, _mux_mask, \ _mux_flags, _div_shift, \ _div_width, _div_frac_width, \ _div_flags, _clk_num, \ _enb_refcnt, _regs, \ - _gate_flags), \ + _gate_flags, _table), \ .offset = _offset, \ .con_id = _con_id, \ .dev_id = _dev_id, \ } +#define TEGRA_INIT_DATA(_name, _con_id, _dev_id, _parent_names, _offset,\ + _mux_shift, _mux_width, _mux_flags, _div_shift, \ + _div_width, _div_frac_width, _div_flags, _regs, \ + _clk_num, _enb_refcnt, _gate_flags, _clk_id) \ + TEGRA_INIT_DATA_TABLE(_name, _con_id, _dev_id, _parent_names, _offset,\ + _mux_shift, BIT(_mux_width) - 1, _mux_flags, \ + _div_shift, _div_width, _div_frac_width, _div_flags, \ + _regs, _clk_num, _enb_refcnt, _gate_flags, _clk_id,\ + NULL) + /** * struct clk_super_mux - super clock * diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index 9c7f5807824b..dd7adff76e81 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -152,7 +152,7 @@ struct clk { }, \ .reg = _reg, \ .shift = _shift, \ - .width = _width, \ + .mask = BIT(_width) - 1, \ .flags = _mux_flags, \ .lock = _lock, \ }; \ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 56e6cc12c796..63ba3b740794 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -297,8 +297,9 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, struct clk_mux { struct clk_hw hw; void __iomem *reg; + u32 *table; + u32 mask; u8 shift; - u8 width; u8 flags; spinlock_t *lock; }; @@ -307,11 +308,17 @@ struct clk_mux { #define CLK_MUX_INDEX_BIT BIT(1) extern const struct clk_ops clk_mux_ops; + struct clk *clk_register_mux(struct device *dev, const char *name, const char **parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_mux_flags, spinlock_t *lock); +struct clk *clk_register_mux_table(struct device *dev, const char *name, + const char **parent_names, u8 num_parents, unsigned long flags, + void __iomem *reg, u8 shift, u32 mask, + u8 clk_mux_flags, u32 *table, spinlock_t *lock); + /** * struct clk_fixed_factor - fixed multiplier and divider clock * -- cgit From cc244ddae6d4c6902ac9d7d64023534f8c44a7eb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 3 May 2012 12:30:07 -0700 Subject: timekeeping: Move TAI managment into timekeeping core from ntp Currently NTP manages the TAI offset. Since there's plans for a CLOCK_TAI clockid, push the TAI management into the timekeeping core. CC: Thomas Gleixner CC: Eric Dumazet CC: Richard Cochran Signed-off-by: John Stultz --- include/linux/time.h | 2 ++ include/linux/timekeeper_internal.h | 3 +++ kernel/time/ntp.c | 18 ++++++++------- kernel/time/timekeeping.c | 44 +++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index d4835dfdf25e..47210a175e78 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -181,6 +181,8 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); extern int timekeeping_inject_offset(struct timespec *ts); +extern s32 timekeeping_get_tai_offset(void); +extern void timekeeping_set_tai_offset(s32 tai_offset); struct tms; extern void do_sys_times(struct tms *); diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e1d558e237ec..ff94f436f8b7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -62,6 +62,9 @@ struct timekeeper { ktime_t offs_boot; /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ struct timespec raw_time; + /* The current UTC to TAI offset in seconds */ + s32 tai_offset; + /* Seqlock for all timekeeper values */ seqlock_t lock; }; diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 072bb066bb7d..59e2749be0fa 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -53,9 +53,6 @@ static int time_state = TIME_OK; /* clock status bits: */ static int time_status = STA_UNSYNC; -/* TAI offset (secs): */ -static long time_tai; - /* time adjustment (nsecs): */ static s64 time_offset; @@ -415,7 +412,6 @@ int second_overflow(unsigned long secs) else if (secs % 86400 == 0) { leap = -1; time_state = TIME_OOP; - time_tai++; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); } @@ -425,7 +421,6 @@ int second_overflow(unsigned long secs) time_state = TIME_OK; else if ((secs + 1) % 86400 == 0) { leap = 1; - time_tai--; time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); @@ -579,7 +574,9 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) * Called with ntp_lock held, so we can access and modify * all the global NTP state: */ -static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts) +static inline void process_adjtimex_modes(struct timex *txc, + struct timespec *ts, + s32 *time_tai) { if (txc->modes & ADJ_STATUS) process_adj_status(txc, ts); @@ -613,7 +610,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts } if (txc->modes & ADJ_TAI && txc->constant > 0) - time_tai = txc->constant; + *time_tai = txc->constant; if (txc->modes & ADJ_OFFSET) ntp_update_offset(txc->offset); @@ -632,6 +629,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts int do_adjtimex(struct timex *txc) { struct timespec ts; + u32 time_tai, orig_tai; int result; /* Validate the data before disabling interrupts */ @@ -671,6 +669,7 @@ int do_adjtimex(struct timex *txc) } getnstimeofday(&ts); + orig_tai = time_tai = timekeeping_get_tai_offset(); raw_spin_lock_irq(&ntp_lock); @@ -687,7 +686,7 @@ int do_adjtimex(struct timex *txc) /* If there are input parameters, then process them: */ if (txc->modes) - process_adjtimex_modes(txc, &ts); + process_adjtimex_modes(txc, &ts, &time_tai); txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); @@ -716,6 +715,9 @@ int do_adjtimex(struct timex *txc) raw_spin_unlock_irq(&ntp_lock); + if (time_tai != orig_tai) + timekeeping_set_tai_offset(time_tai); + txc->time.tv_sec = ts.tv_sec; txc->time.tv_usec = ts.tv_nsec; if (!(time_status & STA_NANO)) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 0355f125d585..937098aab498 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -513,6 +513,48 @@ error: /* even if we error out, we forwarded the time, so call update */ } EXPORT_SYMBOL(timekeeping_inject_offset); + +/** + * timekeeping_get_tai_offset - Returns current TAI offset from UTC + * + */ +s32 timekeeping_get_tai_offset(void) +{ + struct timekeeper *tk = &timekeeper; + unsigned int seq; + s32 ret; + + do { + seq = read_seqbegin(&tk->lock); + ret = tk->tai_offset; + } while (read_seqretry(&tk->lock, seq)); + + return ret; +} + +/** + * __timekeeping_set_tai_offset - Lock free worker function + * + */ +void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) +{ + tk->tai_offset = tai_offset; +} + +/** + * timekeeping_set_tai_offset - Sets the current TAI offset from UTC + * + */ +void timekeeping_set_tai_offset(s32 tai_offset) +{ + struct timekeeper *tk = &timekeeper; + unsigned long flags; + + write_seqlock_irqsave(&tk->lock, flags); + __timekeeping_set_tai_offset(tk, tai_offset); + write_sequnlock_irqrestore(&tk->lock, flags); +} + /** * change_clocksource - Swaps clocksources if a new one is available * @@ -1143,6 +1185,8 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts)); + __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); + clock_was_set_delayed(); } } -- cgit From 1ff3c9677bff7e468e0c487d0ffefe4e901d33f4 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 3 May 2012 12:43:40 -0700 Subject: timekeeping: Add CLOCK_TAI clockid This add a CLOCK_TAI clockid and the needed accessors. CC: Thomas Gleixner CC: Eric Dumazet CC: Richard Cochran Signed-off-by: John Stultz --- include/linux/time.h | 1 + include/uapi/linux/time.h | 6 ++---- kernel/posix-timers.c | 10 ++++++++++ kernel/time/timekeeping.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 47210a175e78..22d81b3c955b 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -183,6 +183,7 @@ extern u64 timekeeping_max_deferment(void); extern int timekeeping_inject_offset(struct timespec *ts); extern s32 timekeeping_get_tai_offset(void); extern void timekeeping_set_tai_offset(s32 tai_offset); +extern void timekeeping_clocktai(struct timespec *ts); struct tms; extern void do_sys_times(struct tms *); diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 0d3c0edc3eda..e75e1b6ff27f 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -54,11 +54,9 @@ struct itimerval { #define CLOCK_BOOTTIME 7 #define CLOCK_REALTIME_ALARM 8 #define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_SGI_CYCLE 10 /* Hardware specific */ +#define CLOCK_TAI 11 -/* - * The IDs of various hardware clocks: - */ -#define CLOCK_SGI_CYCLE 10 #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 6edbb2c55c22..fbfc5f1b7710 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -221,6 +221,11 @@ static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp) return 0; } +static int posix_get_tai(clockid_t which_clock, struct timespec *tp) +{ + timekeeping_clocktai(tp); + return 0; +} /* * Initialize everything, well, just everything in Posix clocks/timers ;) @@ -261,6 +266,10 @@ static __init int init_posix_timers(void) .clock_getres = posix_get_coarse_res, .clock_get = posix_get_monotonic_coarse, }; + struct k_clock clock_tai = { + .clock_getres = hrtimer_get_res, + .clock_get = posix_get_tai, + }; struct k_clock clock_boottime = { .clock_getres = hrtimer_get_res, .clock_get = posix_get_boottime, @@ -278,6 +287,7 @@ static __init int init_posix_timers(void) posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime); + posix_timers_register_clock(CLOCK_TAI, &clock_tai); posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof (struct k_itimer), 0, SLAB_PANIC, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 937098aab498..8a842756572d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -379,6 +379,36 @@ void ktime_get_ts(struct timespec *ts) } EXPORT_SYMBOL_GPL(ktime_get_ts); + +/** + * timekeeping_clocktai - Returns the TAI time of day in a timespec + * @ts: pointer to the timespec to be set + * + * Returns the time of day in a timespec. + */ +void timekeeping_clocktai(struct timespec *ts) +{ + struct timekeeper *tk = &timekeeper; + unsigned long seq; + u64 nsecs; + + WARN_ON(timekeeping_suspended); + + do { + seq = read_seqbegin(&tk->lock); + + ts->tv_sec = tk->xtime_sec + tk->tai_offset; + nsecs = timekeeping_get_ns(tk); + + } while (read_seqretry(&tk->lock, seq)); + + ts->tv_nsec = 0; + timespec_add_ns(ts, nsecs); + +} +EXPORT_SYMBOL(timekeeping_clocktai); + + #ifdef CONFIG_NTP_PPS /** -- cgit From 90adda98b89aaf68b06014ecf805b6c477daa19b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 21 Jan 2013 17:00:11 -0800 Subject: hrtimer: Add hrtimer support for CLOCK_TAI Add hrtimer support for CLOCK_TAI, as well as posix timer interfaces. Signed-off-by: John Stultz --- include/linux/hrtimer.h | 5 ++++- include/linux/timekeeper_internal.h | 2 ++ kernel/hrtimer.c | 14 +++++++++++++- kernel/posix-timers.c | 6 ++++++ kernel/time/timekeeping.c | 20 +++++++++++++++++++- 5 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index cc07d2777bbe..d19a5c2d2270 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -157,6 +157,7 @@ enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_TAI, HRTIMER_MAX_CLOCK_BASES, }; @@ -327,7 +328,9 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); extern ktime_t ktime_get_monotonic_offset(void); -extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot); +extern ktime_t ktime_get_clocktai(void); +extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot, + ktime_t *offs_tai); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index ff94f436f8b7..26700d870506 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -64,6 +64,8 @@ struct timekeeper { struct timespec raw_time; /* The current UTC to TAI offset in seconds */ s32 tai_offset; + /* Offset clock monotonic -> clock tai */ + ktime_t offs_tai; /* Seqlock for all timekeeper values */ seqlock_t lock; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3feb..258720741d3e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -83,6 +83,12 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, + { + .index = HRTIMER_BASE_TAI, + .clockid = CLOCK_TAI, + .get_time = &ktime_get_clocktai, + .resolution = KTIME_LOW_RES, + }, } }; @@ -90,6 +96,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, + [CLOCK_TAI] = HRTIMER_BASE_TAI, }; static inline int hrtimer_clockid_to_base(clockid_t clock_id) @@ -106,8 +113,10 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) { ktime_t xtim, mono, boot; struct timespec xts, tom, slp; + s32 tai_offset; get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp); + tai_offset = timekeeping_get_tai_offset(); xtim = timespec_to_ktime(xts); mono = ktime_add(xtim, timespec_to_ktime(tom)); @@ -115,6 +124,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; + base->clock_base[HRTIMER_BASE_TAI].softirq_time = + ktime_add(xtim, ktime_set(tai_offset, 0)); } /* @@ -651,8 +662,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; + ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; - return ktime_get_update_offsets(offs_real, offs_boot); + return ktime_get_update_offsets(offs_real, offs_boot, offs_tai); } /* diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index fbfc5f1b7710..2a2e173d0a7a 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -269,6 +269,12 @@ static __init int init_posix_timers(void) struct k_clock clock_tai = { .clock_getres = hrtimer_get_res, .clock_get = posix_get_tai, + .nsleep = common_nsleep, + .nsleep_restart = hrtimer_nanosleep_restart, + .timer_create = common_timer_create, + .timer_set = common_timer_set, + .timer_get = common_timer_get, + .timer_del = common_timer_del, }; struct k_clock clock_boottime = { .clock_getres = hrtimer_get_res, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8a842756572d..8061ae0be7bd 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -67,6 +67,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) tk->wall_to_monotonic = wtm; set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec); tk->offs_real = timespec_to_ktime(tmp); + tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0)); } static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) @@ -409,6 +410,20 @@ void timekeeping_clocktai(struct timespec *ts) EXPORT_SYMBOL(timekeeping_clocktai); +/** + * ktime_get_clocktai - Returns the TAI time of day in a ktime + * + * Returns the time of day in a ktime. + */ +ktime_t ktime_get_clocktai(void) +{ + struct timespec ts; + + timekeeping_clocktai(&ts); + return timespec_to_ktime(ts); +} +EXPORT_SYMBOL(ktime_get_clocktai); + #ifdef CONFIG_NTP_PPS /** @@ -569,6 +584,7 @@ s32 timekeeping_get_tai_offset(void) void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) { tk->tai_offset = tai_offset; + tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0)); } /** @@ -1539,7 +1555,8 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, * Returns current monotonic time and updates the offsets * Called from hrtimer_interupt() or retrigger_next_event() */ -ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) +ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot, + ktime_t *offs_tai) { struct timekeeper *tk = &timekeeper; ktime_t now; @@ -1554,6 +1571,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; + *offs_tai = tk->offs_tai; } while (read_seqretry(&tk->lock, seq)); now = ktime_add_ns(ktime_set(secs, 0), nsecs); -- cgit From eb93e4d93093615c60cb7dd3dcb24e46bd7d62d4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Feb 2013 22:51:36 +0000 Subject: timekeeping: Make jiffies_lock internal Nothing outside of the timekeeping core needs that lock. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/jiffies.h | 1 - kernel/time/tick-internal.h | 2 ++ kernel/time/timekeeping.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 82ed068b1ebe..8fb8edf12417 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -75,7 +75,6 @@ extern int register_refined_jiffies(long clock_tick_rate); */ extern u64 __jiffy_data jiffies_64; extern unsigned long volatile __jiffy_data jiffies; -extern seqlock_t jiffies_lock; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index cf3e59ed6dc0..f5c9207967cf 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -4,6 +4,8 @@ #include #include +extern seqlock_t jiffies_lock; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD #define TICK_DO_TIMER_NONE -1 diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c442a4ccccc9..b0c648fc959f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -23,6 +23,7 @@ #include #include +#include "tick-internal.h" static struct timekeeper timekeeper; -- cgit From 7e40672d930b369c1984457233ec5557aa53bfb8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Feb 2013 22:51:37 +0000 Subject: timekeeping: Move lock out of timekeeper struct Make the lock a separate entity. Preparatory patch for shadow timekeeper structure. Signed-off-by: Thomas Gleixner [Merged with CLOCK_TAI changes] Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 2 - kernel/time/timekeeping.c | 108 ++++++++++++++++++------------------ 2 files changed, 53 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 26700d870506..a151bd70e52b 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -67,8 +67,6 @@ struct timekeeper { /* Offset clock monotonic -> clock tai */ ktime_t offs_tai; - /* Seqlock for all timekeeper values */ - seqlock_t lock; }; static inline struct timespec tk_xtime(struct timekeeper *tk) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b0c648fc959f..caede71c0a35 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -26,6 +26,7 @@ #include "tick-internal.h" static struct timekeeper timekeeper; +static DEFINE_SEQLOCK(timekeeper_lock); /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -212,11 +213,11 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb) unsigned long flags; int ret; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); /* update timekeeping data */ update_pvclock_gtod(tk); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); return ret; } @@ -230,13 +231,12 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); */ int pvclock_gtod_unregister_notifier(struct notifier_block *nb) { - struct timekeeper *tk = &timekeeper; unsigned long flags; int ret; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); return ret; } @@ -296,12 +296,12 @@ int __getnstimeofday(struct timespec *ts) s64 nsecs = 0; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); ts->tv_sec = tk->xtime_sec; nsecs = timekeeping_get_ns(tk); - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); ts->tv_nsec = 0; timespec_add_ns(ts, nsecs); @@ -337,11 +337,11 @@ ktime_t ktime_get(void) WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); /* * Use ktime_set/ktime_add_ns to create a proper ktime on * 32-bit architectures without CONFIG_KTIME_SCALAR. @@ -368,12 +368,12 @@ void ktime_get_ts(struct timespec *ts) WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); ts->tv_sec = tk->xtime_sec; nsec = timekeeping_get_ns(tk); tomono = tk->wall_to_monotonic; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); ts->tv_sec += tomono.tv_sec; ts->tv_nsec = 0; @@ -397,12 +397,12 @@ void timekeeping_clocktai(struct timespec *ts) WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); ts->tv_sec = tk->xtime_sec + tk->tai_offset; nsecs = timekeeping_get_ns(tk); - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); ts->tv_nsec = 0; timespec_add_ns(ts, nsecs); @@ -445,7 +445,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) WARN_ON_ONCE(timekeeping_suspended); do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); *ts_raw = tk->raw_time; ts_real->tv_sec = tk->xtime_sec; @@ -454,7 +454,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) nsecs_raw = timekeeping_get_ns_raw(tk); nsecs_real = timekeeping_get_ns(tk); - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); timespec_add_ns(ts_raw, nsecs_raw); timespec_add_ns(ts_real, nsecs_real); @@ -494,7 +494,7 @@ int do_settimeofday(const struct timespec *tv) if (!timespec_valid_strict(tv)) return -EINVAL; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); timekeeping_forward_now(tk); @@ -508,7 +508,7 @@ int do_settimeofday(const struct timespec *tv) timekeeping_update(tk, true); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@ -533,7 +533,7 @@ int timekeeping_inject_offset(struct timespec *ts) if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); timekeeping_forward_now(tk); @@ -550,7 +550,7 @@ int timekeeping_inject_offset(struct timespec *ts) error: /* even if we error out, we forwarded the time, so call update */ timekeeping_update(tk, true); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@ -571,9 +571,9 @@ s32 timekeeping_get_tai_offset(void) s32 ret; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); ret = tk->tai_offset; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); return ret; } @@ -597,9 +597,9 @@ void timekeeping_set_tai_offset(s32 tai_offset) struct timekeeper *tk = &timekeeper; unsigned long flags; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); __timekeeping_set_tai_offset(tk, tai_offset); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); } /** @@ -615,7 +615,7 @@ static int change_clocksource(void *data) new = (struct clocksource *) data; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); timekeeping_forward_now(tk); if (!new->enable || new->enable(new) == 0) { @@ -626,7 +626,7 @@ static int change_clocksource(void *data) } timekeeping_update(tk, true); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); return 0; } @@ -676,11 +676,11 @@ void getrawmonotonic(struct timespec *ts) s64 nsecs; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); nsecs = timekeeping_get_ns_raw(tk); *ts = tk->raw_time; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); timespec_add_ns(ts, nsecs); } @@ -696,11 +696,11 @@ int timekeeping_valid_for_hres(void) int ret; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); return ret; } @@ -715,11 +715,11 @@ u64 timekeeping_max_deferment(void) u64 ret; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); ret = tk->clock->max_idle_ns; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); return ret; } @@ -782,11 +782,9 @@ void __init timekeeping_init(void) boot.tv_nsec = 0; } - seqlock_init(&tk->lock); - ntp_init(); - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); @@ -805,7 +803,7 @@ void __init timekeeping_init(void) tmp.tv_nsec = 0; tk_set_sleep_time(tk, tmp); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); } /* time in seconds when suspend began */ @@ -853,7 +851,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) if (has_persistent_clock()) return; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); timekeeping_forward_now(tk); @@ -861,7 +859,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) timekeeping_update(tk, true); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@ -888,7 +886,7 @@ static void timekeeping_resume(void) clockevents_resume(); clocksource_resume(); - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); /* * After system resumes, we need to calculate the suspended time and @@ -940,7 +938,7 @@ static void timekeeping_resume(void) tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, false); - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); touch_softlockup_watchdog(); @@ -959,7 +957,7 @@ static int timekeeping_suspend(void) read_persistent_clock(&timekeeping_suspend_time); - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); timekeeping_forward_now(tk); timekeeping_suspended = 1; @@ -982,7 +980,7 @@ static int timekeeping_suspend(void) timekeeping_suspend_time = timespec_add(timekeeping_suspend_time, delta_delta); } - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); clocksource_suspend(); @@ -1322,7 +1320,7 @@ static void update_wall_time(void) int shift = 0, maxshift; unsigned long flags; - write_seqlock_irqsave(&tk->lock, flags); + write_seqlock_irqsave(&timekeeper_lock, flags); /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) @@ -1377,7 +1375,7 @@ static void update_wall_time(void) timekeeping_update(tk, false); out: - write_sequnlock_irqrestore(&tk->lock, flags); + write_sequnlock_irqrestore(&timekeeper_lock, flags); } @@ -1425,13 +1423,13 @@ void get_monotonic_boottime(struct timespec *ts) WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); ts->tv_sec = tk->xtime_sec; nsec = timekeeping_get_ns(tk); tomono = tk->wall_to_monotonic; sleep = tk->total_sleep_time; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); ts->tv_sec += tomono.tv_sec + sleep.tv_sec; ts->tv_nsec = 0; @@ -1490,10 +1488,10 @@ struct timespec current_kernel_time(void) unsigned long seq; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); now = tk_xtime(tk); - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); return now; } @@ -1506,11 +1504,11 @@ struct timespec get_monotonic_coarse(void) unsigned long seq; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); now = tk_xtime(tk); mono = tk->wall_to_monotonic; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, now.tv_nsec + mono.tv_nsec); @@ -1541,11 +1539,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, unsigned long seq; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); *xtim = tk_xtime(tk); *wtom = tk->wall_to_monotonic; *sleep = tk->total_sleep_time; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); } #ifdef CONFIG_HIGH_RES_TIMERS @@ -1566,7 +1564,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot, u64 secs, nsecs; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); secs = tk->xtime_sec; nsecs = timekeeping_get_ns(tk); @@ -1574,7 +1572,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot, *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; *offs_tai = tk->offs_tai; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); now = ktime_add_ns(ktime_set(secs, 0), nsecs); now = ktime_sub(now, *offs_real); @@ -1592,9 +1590,9 @@ ktime_t ktime_get_monotonic_offset(void) struct timespec wtom; do { - seq = read_seqbegin(&tk->lock); + seq = read_seqbegin(&timekeeper_lock); wtom = tk->wall_to_monotonic; - } while (read_seqretry(&tk->lock, seq)); + } while (read_seqretry(&timekeeper_lock, seq)); return timespec_to_ktime(wtom); } -- cgit From cbe5e6109538ddab57764a88d9f0c2accd0c7d48 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 22 Mar 2013 22:17:36 -0600 Subject: lru_cache: introduce lc_get_cumulative() New helper to be able to consolidate more updates into a single transaction. Without this, we can only grab a single refcount on an updated element while preparing a transaction. lc_get_cumulative - like lc_get; also finds to-be-changed elements @lc: the lru cache to operate on @enr: the label to look up Unlike lc_get this also returns the element for @enr, if it is belonging to a pending transaction, so the return values are like for lc_get(), plus: pointer to an element already on the "to_be_changed" list. In this case, the cache was already marked %LC_DIRTY. Caller needs to make sure that the pending transaction is completed, before proceeding to actually use this element. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Fixed up by Jens to export lc_get_cumulative(). Signed-off-by: Jens Axboe --- include/linux/lru_cache.h | 1 + lib/lru_cache.c | 56 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4019013c6593..46262284de47 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -256,6 +256,7 @@ extern void lc_destroy(struct lru_cache *lc); extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); extern void lc_del(struct lru_cache *lc, struct lc_element *element); +extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 8335d39d2ccd..4a83ecd03650 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -365,7 +365,13 @@ static int lc_unused_element_available(struct lru_cache *lc) return 0; } -static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change) +/* used as internal flags to __lc_get */ +enum { + LC_GET_MAY_CHANGE = 1, + LC_GET_MAY_USE_UNCOMMITTED = 2, +}; + +static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags) { struct lc_element *e; @@ -380,22 +386,31 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool * this enr is currently being pulled in already, * and will be available once the pending transaction * has been committed. */ - if (e && e->lc_new_number == e->lc_number) { + if (e) { + if (e->lc_new_number != e->lc_number) { + /* It has been found above, but on the "to_be_changed" + * list, not yet committed. Don't pull it in twice, + * wait for the transaction, then try again... + */ + if (!(flags & LC_GET_MAY_USE_UNCOMMITTED)) + RETURN(NULL); + /* ... unless the caller is aware of the implications, + * probably preparing a cumulative transaction. */ + ++e->refcnt; + ++lc->hits; + RETURN(e); + } + /* else: lc_new_number == lc_number; a real hit. */ ++lc->hits; if (e->refcnt++ == 0) lc->used++; list_move(&e->list, &lc->in_use); /* Not evictable... */ RETURN(e); } + /* e == NULL */ ++lc->misses; - if (!may_change) - RETURN(NULL); - - /* It has been found above, but on the "to_be_changed" list, not yet - * committed. Don't pull it in twice, wait for the transaction, then - * try again */ - if (e) + if (!(flags & LC_GET_MAY_CHANGE)) RETURN(NULL); /* To avoid races with lc_try_lock(), first, mark us dirty @@ -477,7 +492,27 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool */ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) { - return __lc_get(lc, enr, 1); + return __lc_get(lc, enr, LC_GET_MAY_CHANGE); +} + +/** + * lc_get_cumulative - like lc_get; also finds to-be-changed elements + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Unlike lc_get this also returns the element for @enr, if it is belonging to + * a pending transaction, so the return values are like for lc_get(), + * plus: + * + * pointer to an element already on the "to_be_changed" list. + * In this case, the cache was already marked %LC_DIRTY. + * + * Caller needs to make sure that the pending transaction is completed, + * before proceeding to actually use this element. + */ +struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr) +{ + return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED); } /** @@ -648,3 +683,4 @@ EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); EXPORT_SYMBOL(lc_try_lock); EXPORT_SYMBOL(lc_is_used); +EXPORT_SYMBOL(lc_get_cumulative); -- cgit From 5bbcf5e6abe97485748b51ea0713cc3012b4a8f0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:59 +0100 Subject: drbd: adjust upper limit for activity log extents Now that the on-disk activity-log ring buffer size is adjustable, the maximum active set can become larger, and is now limited by the use of 16bit "labels". This increases the maximum working set from 6433 to 65534 extents, each of which covers an area of 4MiB. Which means that if you use the maximum, you'd have to resync more than 250 GiB after an unclean Primary shutdown. With capable backend storage and replication links, this is entirely feasible. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 47 +++++++++++++++++++++++++++++++++++--------- include/linux/drbd_limits.h | 11 +++++------ 2 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bcf900bcd142..42fda4ae2f87 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1141,15 +1141,32 @@ static bool should_set_defaults(struct genl_info *info) return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); } -static void enforce_disk_conf_limits(struct disk_conf *dc) +static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) { - if (dc->al_extents < DRBD_AL_EXTENTS_MIN) - dc->al_extents = DRBD_AL_EXTENTS_MIN; - if (dc->al_extents > DRBD_AL_EXTENTS_MAX) - dc->al_extents = DRBD_AL_EXTENTS_MAX; + /* This is limited by 16 bit "slot" numbers, + * and by available on-disk context storage. + * + * Also (u16)~0 is special (denotes a "free" extent). + * + * One transaction occupies one 4kB on-disk block, + * we have n such blocks in the on disk ring buffer, + * the "current" transaction may fail (n-1), + * and there is 919 slot numbers context information per transaction. + * + * 72 transaction blocks amounts to more than 2**16 context slots, + * so cap there first. + */ + const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX; + const unsigned int sufficient_on_disk = + (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1) + /AL_CONTEXT_PER_TRANSACTION; - if (dc->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) - dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; + unsigned int al_size_4k = bdev->md.al_size_4k; + + if (al_size_4k > sufficient_on_disk) + return max_al_nr; + + return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; } int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) @@ -1196,7 +1213,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (!expect(new_disk_conf->resync_rate >= 1)) new_disk_conf->resync_rate = 1; - enforce_disk_conf_limits(new_disk_conf); + if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) + new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; + if (new_disk_conf->al_extents > drbd_al_extents_max(mdev->ldev)) + new_disk_conf->al_extents = drbd_al_extents_max(mdev->ldev); + + if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) + new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s->size) { @@ -1344,7 +1367,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - enforce_disk_conf_limits(new_disk_conf); + if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) + new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ); if (!new_plan) { @@ -1419,6 +1443,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; + if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) + new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; + if (new_disk_conf->al_extents > drbd_al_extents_max(nbc)) + new_disk_conf->al_extents = drbd_al_extents_max(nbc); + if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 1fa19c5f5e64..1fedf2b17cc8 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -126,13 +126,12 @@ #define DRBD_RESYNC_RATE_DEF 250 #define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ - /* less than 7 would hit performance unnecessarily. - * 919 slots context information per transaction, - * 32k activity log, 4k transaction size, - * one transaction in flight: - * 919 * 7 = 6433 */ + /* less than 7 would hit performance unnecessarily. */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 6433 + /* we use u16 as "slot number", (u16)~0 is "FREE". + * If you use >= 292 kB on-disk ring buffer, + * this is the maximum you can use: */ +#define DRBD_AL_EXTENTS_MAX 0xfffe #define DRBD_AL_EXTENTS_DEF 1237 #define DRBD_AL_EXTENTS_SCALE '1' -- cgit From 66311274691ec65972cad3626057fa8d00c146d8 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 23 Mar 2013 11:42:24 +0800 Subject: block: add a flag to identify PM request Add a flag REQ_PM to identify the request is PM related, such requests will not change the device request queue's runtime status. It is intended to be used in driver's runtime PM callback, so that driver can perform some IO to the device there with the queue's runtime status unaffected. e.g. in SCSI disk's runtime suspend callback, the disk will be put into stopped power state, and this require sending a command to the device. Such command processing should not change the disk's runtime status. Signed-off-by: Lin Ming Signed-off-by: Aaron Lu Acked-by: Alan Stern Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cdf11191e645..fcc1ce28d5ca 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -175,6 +175,7 @@ enum rq_flag_bits { __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_KERNEL, /* direct IO to kernel pages */ + __REQ_PM, /* runtime pm request */ __REQ_NR_BITS, /* stops here */ }; @@ -223,5 +224,6 @@ enum rq_flag_bits { #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_SECURE (1 << __REQ_SECURE) #define REQ_KERNEL (1 << __REQ_KERNEL) +#define REQ_PM (1 << __REQ_PM) #endif /* __LINUX_BLK_TYPES_H */ -- cgit From 6c9546675864f51506af69eca388e5d922942c56 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 23 Mar 2013 11:42:26 +0800 Subject: block: add runtime pm helpers Add runtime pm helper functions: void blk_pm_runtime_init(struct request_queue *q, struct device *dev) - Initialization function for drivers to call. int blk_pre_runtime_suspend(struct request_queue *q) - If any requests are in the queue, mark last busy and return -EBUSY. Otherwise set q->rpm_status to RPM_SUSPENDING and return 0. void blk_post_runtime_suspend(struct request_queue *q, int err) - If the suspend succeeded then set q->rpm_status to RPM_SUSPENDED. Otherwise set it to RPM_ACTIVE and mark last busy. void blk_pre_runtime_resume(struct request_queue *q) - Set q->rpm_status to RPM_RESUMING. void blk_post_runtime_resume(struct request_queue *q, int err) - If the resume succeeded then set q->rpm_status to RPM_ACTIVE and call __blk_run_queue, then mark last busy and autosuspend. Otherwise set q->rpm_status to RPM_SUSPENDED. The idea and API is designed by Alan Stern and described here: http://marc.info/?l=linux-scsi&m=133727953625963&w=2 Signed-off-by: Lin Ming Signed-off-by: Aaron Lu Acked-by: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 27 ++++++++++ 2 files changed, 171 insertions(+) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 074b758efc42..123d240132bf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -30,6 +30,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -3045,6 +3046,149 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); +#ifdef CONFIG_PM_RUNTIME +/** + * blk_pm_runtime_init - Block layer runtime PM initialization routine + * @q: the queue of the device + * @dev: the device the queue belongs to + * + * Description: + * Initialize runtime-PM-related fields for @q and start auto suspend for + * @dev. Drivers that want to take advantage of request-based runtime PM + * should call this function after @dev has been initialized, and its + * request queue @q has been allocated, and runtime PM for it can not happen + * yet(either due to disabled/forbidden or its usage_count > 0). In most + * cases, driver should call this function before any I/O has taken place. + * + * This function takes care of setting up using auto suspend for the device, + * the autosuspend delay is set to -1 to make runtime suspend impossible + * until an updated value is either set by user or by driver. Drivers do + * not need to touch other autosuspend settings. + * + * The block layer runtime PM is request based, so only works for drivers + * that use request as their IO unit instead of those directly use bio's. + */ +void blk_pm_runtime_init(struct request_queue *q, struct device *dev) +{ + q->dev = dev; + q->rpm_status = RPM_ACTIVE; + pm_runtime_set_autosuspend_delay(q->dev, -1); + pm_runtime_use_autosuspend(q->dev); +} +EXPORT_SYMBOL(blk_pm_runtime_init); + +/** + * blk_pre_runtime_suspend - Pre runtime suspend check + * @q: the queue of the device + * + * Description: + * This function will check if runtime suspend is allowed for the device + * by examining if there are any requests pending in the queue. If there + * are requests pending, the device can not be runtime suspended; otherwise, + * the queue's status will be updated to SUSPENDING and the driver can + * proceed to suspend the device. + * + * For the not allowed case, we mark last busy for the device so that + * runtime PM core will try to autosuspend it some time later. + * + * This function should be called near the start of the device's + * runtime_suspend callback. + * + * Return: + * 0 - OK to runtime suspend the device + * -EBUSY - Device should not be runtime suspended + */ +int blk_pre_runtime_suspend(struct request_queue *q) +{ + int ret = 0; + + spin_lock_irq(q->queue_lock); + if (q->nr_pending) { + ret = -EBUSY; + pm_runtime_mark_last_busy(q->dev); + } else { + q->rpm_status = RPM_SUSPENDING; + } + spin_unlock_irq(q->queue_lock); + return ret; +} +EXPORT_SYMBOL(blk_pre_runtime_suspend); + +/** + * blk_post_runtime_suspend - Post runtime suspend processing + * @q: the queue of the device + * @err: return value of the device's runtime_suspend function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime suspend function and mark last busy for the device so + * that PM core will try to auto suspend the device at a later time. + * + * This function should be called near the end of the device's + * runtime_suspend callback. + */ +void blk_post_runtime_suspend(struct request_queue *q, int err) +{ + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_SUSPENDED; + } else { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_suspend); + +/** + * blk_pre_runtime_resume - Pre runtime resume processing + * @q: the queue of the device + * + * Description: + * Update the queue's runtime status to RESUMING in preparation for the + * runtime resume of the device. + * + * This function should be called near the start of the device's + * runtime_resume callback. + */ +void blk_pre_runtime_resume(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_RESUMING; + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_pre_runtime_resume); + +/** + * blk_post_runtime_resume - Post runtime resume processing + * @q: the queue of the device + * @err: return value of the device's runtime_resume function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime_resume function. If it is successfully resumed, process + * the requests that are queued into the device's queue when it is resuming + * and then mark last busy and initiate autosuspend for it. + * + * This function should be called near the end of the device's + * runtime_resume callback. + */ +void blk_post_runtime_resume(struct request_queue *q, int err) +{ + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_ACTIVE; + __blk_run_queue(q); + pm_runtime_mark_last_busy(q->dev); + pm_runtime_autosuspend(q->dev); + } else { + q->rpm_status = RPM_SUSPENDED; + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_resume); +#endif + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 78feda9bbae2..89d89c7162aa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -361,6 +361,12 @@ struct request_queue { */ struct kobject kobj; +#ifdef CONFIG_PM_RUNTIME + struct device *dev; + int rpm_status; + unsigned int nr_pending; +#endif + /* * queue settings */ @@ -960,6 +966,27 @@ struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +/* + * block layer runtime pm functions + */ +#ifdef CONFIG_PM_RUNTIME +extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); +extern int blk_pre_runtime_suspend(struct request_queue *q); +extern void blk_post_runtime_suspend(struct request_queue *q, int err); +extern void blk_pre_runtime_resume(struct request_queue *q); +extern void blk_post_runtime_resume(struct request_queue *q, int err); +#else +static inline void blk_pm_runtime_init(struct request_queue *q, + struct device *dev) {} +static inline int blk_pre_runtime_suspend(struct request_queue *q) +{ + return -ENOSYS; +} +static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} +static inline void blk_pre_runtime_resume(struct request_queue *q) {} +static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} +#endif + /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests -- cgit From 57fb233f078beb5d0437a4ae575fbd4d9eb9c738 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 24 Aug 2012 04:56:11 -0700 Subject: block: Reorder struct bio_set This is prep work for the next patch, which embeds a struct bio_list in struct bio_set. Signed-off-by: Kent Overstreet CC: Jens Axboe --- include/linux/bio.h | 66 ++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 820e7aaad4fd..93d3d17a300d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -298,39 +298,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } static inline void bio_disassociate_task(struct bio *bio) { } #endif /* CONFIG_BLK_CGROUP */ -/* - * bio_set is used to allow other portions of the IO system to - * allocate their own private memory pools for bio and iovec structures. - * These memory pools in turn all allocate from the bio_slab - * and the bvec_slabs[]. - */ -#define BIO_POOL_SIZE 2 -#define BIOVEC_NR_POOLS 6 -#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) - -struct bio_set { - struct kmem_cache *bio_slab; - unsigned int front_pad; - - mempool_t *bio_pool; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t *bio_integrity_pool; -#endif - mempool_t *bvec_pool; -}; - -struct biovec_slab { - int nr_vecs; - char *name; - struct kmem_cache *slab; -}; - -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #ifdef CONFIG_HIGHMEM /* * remember never ever reenable interrupts between a bvec_kmap_irq and @@ -527,6 +494,39 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } +/* + * bio_set is used to allow other portions of the IO system to + * allocate their own private memory pools for bio and iovec structures. + * These memory pools in turn all allocate from the bio_slab + * and the bvec_slabs[]. + */ +#define BIO_POOL_SIZE 2 +#define BIOVEC_NR_POOLS 6 +#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) + +struct bio_set { + struct kmem_cache *bio_slab; + unsigned int front_pad; + + mempool_t *bio_pool; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + mempool_t *bio_integrity_pool; +#endif + mempool_t *bvec_pool; +}; + +struct biovec_slab { + int nr_vecs; + char *name; + struct kmem_cache *slab; +}; + +/* + * a small number of entries is fine, not going to be performance critical. + * basically we just need to survive + */ +#define BIO_SPLIT_ENTRIES 2 + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -- cgit From df2cb6daa4cbc34406bc4b1ac9b9335df1083a72 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:33:46 -0700 Subject: block: Avoid deadlocks with bio allocation by stacking drivers Previously, if we ever try to allocate more than once from the same bio set while running under generic_make_request() (i.e. a stacking block driver), we risk deadlock. This is because of the code in generic_make_request() that converts recursion to iteration; any bios we submit won't actually be submitted (so they can complete and eventually be freed) until after we return - this means if we allocate a second bio, we're blocking the first one from ever being freed. Thus if enough threads call into a stacking block driver at the same time with bios that need multiple splits, and the bio_set's reserve gets used up, we deadlock. This can be worked around in the driver code - we could check if we're running under generic_make_request(), then mask out __GFP_WAIT when we go to allocate a bio, and if the allocation fails punt to workqueue and retry the allocation. But this is tricky and not a generic solution. This patch solves it for all users by inverting the previously described technique. We allocate a rescuer workqueue for each bio_set, and then in the allocation code if there are bios on current->bio_list we would be blocking, we punt them to the rescuer workqueue to be submitted. This guarantees forward progress for bio allocations under generic_make_request() provided each bio is submitted before allocating the next, and provided the bios are freed after they complete. Note that this doesn't do anything for allocation from other mempools. Instead of allocating per bio data structures from a mempool, code should use bio_set's front_pad. Tested it by forcing the rescue codepath to be taken (by disabling the first GFP_NOWAIT) attempt, and then ran it with bcache (which does a lot of arbitrary bio splitting) and verified that the rescuer was being invoked. Signed-off-by: Kent Overstreet CC: Jens Axboe Acked-by: Tejun Heo Reviewed-by: Muthukumar Ratty --- fs/bio.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/bio.h | 9 ++++ 2 files changed, 123 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/bio.c b/fs/bio.c index bb5768f59b32..73b544709945 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -297,6 +297,54 @@ void bio_reset(struct bio *bio) } EXPORT_SYMBOL(bio_reset); +static void bio_alloc_rescue(struct work_struct *work) +{ + struct bio_set *bs = container_of(work, struct bio_set, rescue_work); + struct bio *bio; + + while (1) { + spin_lock(&bs->rescue_lock); + bio = bio_list_pop(&bs->rescue_list); + spin_unlock(&bs->rescue_lock); + + if (!bio) + break; + + generic_make_request(bio); + } +} + +static void punt_bios_to_rescuer(struct bio_set *bs) +{ + struct bio_list punt, nopunt; + struct bio *bio; + + /* + * In order to guarantee forward progress we must punt only bios that + * were allocated from this bio_set; otherwise, if there was a bio on + * there for a stacking driver higher up in the stack, processing it + * could require allocating bios from this bio_set, and doing that from + * our own rescuer would be bad. + * + * Since bio lists are singly linked, pop them all instead of trying to + * remove from the middle of the list: + */ + + bio_list_init(&punt); + bio_list_init(&nopunt); + + while ((bio = bio_list_pop(current->bio_list))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + + *current->bio_list = nopunt; + + spin_lock(&bs->rescue_lock); + bio_list_merge(&bs->rescue_list, &punt); + spin_unlock(&bs->rescue_lock); + + queue_work(bs->rescue_workqueue, &bs->rescue_work); +} + /** * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator @@ -314,11 +362,27 @@ EXPORT_SYMBOL(bio_reset); * previously allocated bio for IO before attempting to allocate a new one. * Failure to do so can cause deadlocks under memory pressure. * + * Note that when running under generic_make_request() (i.e. any block + * driver), bios are not submitted until after you return - see the code in + * generic_make_request() that converts recursion into iteration, to prevent + * stack overflows. + * + * This would normally mean allocating multiple bios under + * generic_make_request() would be susceptible to deadlocks, but we have + * deadlock avoidance code that resubmits any blocked bios from a rescuer + * thread. + * + * However, we do not guarantee forward progress for allocations from other + * mempools. Doing multiple allocations from the same mempool under + * generic_make_request() should be avoided - instead, use bio_set's front_pad + * for per bio allocations. + * * RETURNS: * Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + gfp_t saved_gfp = gfp_mask; unsigned front_pad; unsigned inline_vecs; unsigned long idx = BIO_POOL_NONE; @@ -336,7 +400,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) front_pad = 0; inline_vecs = nr_iovecs; } else { + /* + * generic_make_request() converts recursion to iteration; this + * means if we're running beneath it, any bios we allocate and + * submit will not be submitted (and thus freed) until after we + * return. + * + * This exposes us to a potential deadlock if we allocate + * multiple bios from the same bio_set() while running + * underneath generic_make_request(). If we were to allocate + * multiple bios (say a stacking block driver that was splitting + * bios), we would deadlock if we exhausted the mempool's + * reserve. + * + * We solve this, and guarantee forward progress, with a rescuer + * workqueue per bio_set. If we go to allocate and there are + * bios on current->bio_list, we first try the allocation + * without __GFP_WAIT; if that fails, we punt those bios we + * would be blocking to the rescuer workqueue before we retry + * with the original gfp_flags. + */ + + if (current->bio_list && !bio_list_empty(current->bio_list)) + gfp_mask &= ~__GFP_WAIT; + p = mempool_alloc(bs->bio_pool, gfp_mask); + if (!p && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + p = mempool_alloc(bs->bio_pool, gfp_mask); + } + front_pad = bs->front_pad; inline_vecs = BIO_INLINE_VECS; } @@ -349,6 +443,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (nr_iovecs > inline_vecs) { bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + if (!bvl && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + } + if (unlikely(!bvl)) goto err_free; } else if (nr_iovecs) { @@ -1579,6 +1679,9 @@ static void biovec_free_pools(struct bio_set *bs) void bioset_free(struct bio_set *bs) { + if (bs->rescue_workqueue) + destroy_workqueue(bs->rescue_workqueue); + if (bs->bio_pool) mempool_destroy(bs->bio_pool); @@ -1614,6 +1717,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) bs->front_pad = front_pad; + spin_lock_init(&bs->rescue_lock); + bio_list_init(&bs->rescue_list); + INIT_WORK(&bs->rescue_work, bio_alloc_rescue); + bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); if (!bs->bio_slab) { kfree(bs); @@ -1624,9 +1731,14 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (!biovec_create_pools(bs, pool_size)) - return bs; + if (biovec_create_pools(bs, pool_size)) + goto bad; + + bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); + if (!bs->rescue_workqueue) + goto bad; + return bs; bad: bioset_free(bs); return NULL; diff --git a/include/linux/bio.h b/include/linux/bio.h index 93d3d17a300d..b31036ff779f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -513,6 +513,15 @@ struct bio_set { mempool_t *bio_integrity_pool; #endif mempool_t *bvec_pool; + + /* + * Deadlock avoidance for stacking block drivers: see comments in + * bio_alloc_bioset() for details + */ + spinlock_t rescue_lock; + struct bio_list rescue_list; + struct work_struct rescue_work; + struct workqueue_struct *rescue_workqueue; }; struct biovec_slab { -- cgit From 6fda981cafbf908acd11e1e636fec50e99d56a47 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Oct 2012 13:18:27 -0700 Subject: block: Fix a buffer overrun in bio_integrity_split() bio_integrity_split() seemed to be confusing pointers and arrays - bip_vec in bio_integrity_payload was an array appended to the end of the payload, so the bio_vecs in struct bio_pair should have come after the bio_integrity_payload they're for. Fix it by making bip_vec a pointer to the inline vecs - a later patch is going to make more use of this pointer. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen --- fs/bio-integrity.c | 5 +++-- include/linux/bio.h | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index a3f28f331b2b..94fa1c562c0e 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -112,6 +112,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bip->bip_slab = idx; bip->bip_bio = bio; + bip->bip_vec = bip->bip_inline_vecs; bio->bi_integrity = bip; return bip; @@ -697,8 +698,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->iv1 = bip->bip_vec[0]; bp->iv2 = bip->bip_vec[0]; - bp->bip1.bip_vec[0] = bp->iv1; - bp->bip2.bip_vec[0] = bp->iv2; + bp->bip1.bip_vec = &bp->iv1; + bp->bip2.bip_vec = &bp->iv2; bp->iv1.bv_len = sectors * bi->tuple_size; bp->iv2.bv_offset += sectors * bi->tuple_size; diff --git a/include/linux/bio.h b/include/linux/bio.h index b31036ff779f..81004fdcc277 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -182,7 +182,9 @@ struct bio_integrity_payload { unsigned short bip_idx; /* current bip_vec index */ struct work_struct bip_work; /* I/O completion */ - struct bio_vec bip_vec[0]; /* embedded bvec array */ + + struct bio_vec *bip_vec; + struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit From 9f060e2231ca96ca94f2ffcff730acd72606b280 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Oct 2012 15:29:33 -0700 Subject: block: Convert integrity to bvec_alloc_bs() This adds a pointer to the bvec array to struct bio_integrity_payload, instead of the bvecs always being inline; then the bvecs are allocated with bvec_alloc_bs(). Changed bvec_alloc_bs() and bvec_free_bs() to take a pointer to a mempool instead of the bioset, so that bio integrity can use a different mempool for its bvecs, and thus avoid a potential deadlock. This is eventually for immutable bio vecs - immutable bvecs aren't useful if we still have to copy them, hence the need for the pointer. Less code is always nice too, though. Also, bio_integrity_alloc() was using fs_bio_set if no bio_set was specified. This was wrong - using the bio_set doesn't protect us from memory allocation failures, because we just used kmalloc for the bio_integrity_payload. But it does introduce the possibility of deadlock, if for some reason we weren't supposed to be using fs_bio_set. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen --- fs/bio-integrity.c | 132 +++++++++++++++++++--------------------------------- fs/bio.c | 36 ++++++-------- include/linux/bio.h | 8 ++-- 3 files changed, 68 insertions(+), 108 deletions(-) (limited to 'include/linux') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 94fa1c562c0e..8c4c604c840d 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -27,48 +27,11 @@ #include #include -struct integrity_slab { - struct kmem_cache *slab; - unsigned short nr_vecs; - char name[8]; -}; - -#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) } -struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = { - IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES), -}; -#undef IS +#define BIP_INLINE_VECS 4 +static struct kmem_cache *bip_slab; static struct workqueue_struct *kintegrityd_wq; -static inline unsigned int vecs_to_idx(unsigned int nr) -{ - switch (nr) { - case 1: - return 0; - case 2 ... 4: - return 1; - case 5 ... 16: - return 2; - case 17 ... 64: - return 3; - case 65 ... 128: - return 4; - case 129 ... BIO_MAX_PAGES: - return 5; - default: - BUG(); - } -} - -static inline int use_bip_pool(unsigned int idx) -{ - if (idx == BIOVEC_MAX_IDX) - return 1; - - return 0; -} - /** * bio_integrity_alloc - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to @@ -84,38 +47,41 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned int nr_vecs) { struct bio_integrity_payload *bip; - unsigned int idx = vecs_to_idx(nr_vecs); struct bio_set *bs = bio->bi_pool; - - if (!bs) - bs = fs_bio_set; - - BUG_ON(bio == NULL); - bip = NULL; - - /* Lower order allocations come straight from slab */ - if (!use_bip_pool(idx)) - bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask); - - /* Use mempool if lower order alloc failed or max vecs were requested */ - if (bip == NULL) { - idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */ + unsigned long idx = BIO_POOL_NONE; + unsigned inline_vecs; + + if (!bs) { + bip = kmalloc(sizeof(struct bio_integrity_payload) + + sizeof(struct bio_vec) * nr_vecs, gfp_mask); + inline_vecs = nr_vecs; + } else { bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); - - if (unlikely(bip == NULL)) { - printk(KERN_ERR "%s: could not alloc bip\n", __func__); - return NULL; - } + inline_vecs = BIP_INLINE_VECS; } + if (unlikely(!bip)) + return NULL; + memset(bip, 0, sizeof(*bip)); + if (nr_vecs > inline_vecs) { + bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, + bs->bvec_integrity_pool); + if (!bip->bip_vec) + goto err; + } else { + bip->bip_vec = bip->bip_inline_vecs; + } + bip->bip_slab = idx; bip->bip_bio = bio; - bip->bip_vec = bip->bip_inline_vecs; bio->bi_integrity = bip; return bip; +err: + mempool_free(bip, bs->bio_integrity_pool); + return NULL; } EXPORT_SYMBOL(bio_integrity_alloc); @@ -131,20 +97,20 @@ void bio_integrity_free(struct bio *bio) struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_set *bs = bio->bi_pool; - if (!bs) - bs = fs_bio_set; - - BUG_ON(bip == NULL); - /* A cloned bio doesn't own the integrity metadata */ if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) && bip->bip_buf != NULL) kfree(bip->bip_buf); - if (use_bip_pool(bip->bip_slab)) + if (bs) { + if (bip->bip_slab != BIO_POOL_NONE) + bvec_free(bs->bvec_integrity_pool, bip->bip_vec, + bip->bip_slab); + mempool_free(bip, bs->bio_integrity_pool); - else - kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); + } else { + kfree(bip); + } bio->bi_integrity = NULL; } @@ -747,13 +713,14 @@ EXPORT_SYMBOL(bio_integrity_clone); int bioset_integrity_create(struct bio_set *bs, int pool_size) { - unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); - if (bs->bio_integrity_pool) return 0; - bs->bio_integrity_pool = - mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); + bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); + + bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_integrity_pool) + return -1; if (!bs->bio_integrity_pool) return -1; @@ -766,13 +733,14 @@ void bioset_integrity_free(struct bio_set *bs) { if (bs->bio_integrity_pool) mempool_destroy(bs->bio_integrity_pool); + + if (bs->bvec_integrity_pool) + mempool_destroy(bs->bio_integrity_pool); } EXPORT_SYMBOL(bioset_integrity_free); void __init bio_integrity_init(void) { - unsigned int i; - /* * kintegrityd won't block much but may burn a lot of CPU cycles. * Make it highpri CPU intensive wq with max concurrency of 1. @@ -782,14 +750,10 @@ void __init bio_integrity_init(void) if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); - for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { - unsigned int size; - - size = sizeof(struct bio_integrity_payload) - + bip_slab[i].nr_vecs * sizeof(struct bio_vec); - - bip_slab[i].slab = - kmem_cache_create(bip_slab[i].name, size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - } + bip_slab = kmem_cache_create("bio_integrity_payload", + sizeof(struct bio_integrity_payload) + + sizeof(struct bio_vec) * BIP_INLINE_VECS, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + if (!bip_slab) + panic("Failed to create slab\n"); } diff --git a/fs/bio.c b/fs/bio.c index 73b544709945..40aa96eae99f 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -160,12 +160,12 @@ unsigned int bvec_nr_vecs(unsigned short idx) return bvec_slabs[idx].nr_vecs; } -void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) +void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) { BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); if (idx == BIOVEC_MAX_IDX) - mempool_free(bv, bs->bvec_pool); + mempool_free(bv, pool); else { struct biovec_slab *bvs = bvec_slabs + idx; @@ -173,8 +173,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) } } -struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, - struct bio_set *bs) +struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, + mempool_t *pool) { struct bio_vec *bvl; @@ -210,7 +210,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, */ if (*idx == BIOVEC_MAX_IDX) { fallback: - bvl = mempool_alloc(bs->bvec_pool, gfp_mask); + bvl = mempool_alloc(pool, gfp_mask); } else { struct biovec_slab *bvs = bvec_slabs + *idx; gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); @@ -253,7 +253,7 @@ static void bio_free(struct bio *bio) if (bs) { if (bio_has_allocated_vec(bio)) - bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); + bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); /* * If we have front padding, adjust the bio pointer before freeing @@ -442,11 +442,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (nr_iovecs > inline_vecs) { - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); gfp_mask = saved_gfp; - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); } if (unlikely(!bvl)) @@ -1661,20 +1661,11 @@ EXPORT_SYMBOL(bio_sector_offset); * create memory pools for biovec's in a bio_set. * use the global biovec slabs created for general use. */ -static int biovec_create_pools(struct bio_set *bs, int pool_entries) +mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) { struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; - bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); - if (!bs->bvec_pool) - return -ENOMEM; - - return 0; -} - -static void biovec_free_pools(struct bio_set *bs) -{ - mempool_destroy(bs->bvec_pool); + return mempool_create_slab_pool(pool_entries, bp->slab); } void bioset_free(struct bio_set *bs) @@ -1685,8 +1676,10 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); + if (bs->bvec_pool) + mempool_destroy(bs->bvec_pool); + bioset_integrity_free(bs); - biovec_free_pools(bs); bio_put_slab(bs); kfree(bs); @@ -1731,7 +1724,8 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (biovec_create_pools(bs, pool_size)) + bs->bvec_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_pool) goto bad; bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); diff --git a/include/linux/bio.h b/include/linux/bio.h index 81004fdcc277..669b1cb18fee 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -213,6 +213,7 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); +extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); @@ -288,8 +289,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); -extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); -extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); +extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); +extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); #ifdef CONFIG_BLK_CGROUP @@ -511,10 +512,11 @@ struct bio_set { unsigned int front_pad; mempool_t *bio_pool; + mempool_t *bvec_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t *bio_integrity_pool; + mempool_t *bvec_integrity_pool; #endif - mempool_t *bvec_pool; /* * Deadlock avoidance for stacking block drivers: see comments in -- cgit From 054bdf646e36c2f7dc1bf6bc6209dbbb5909164b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Sep 2012 13:17:55 -0700 Subject: block: Add bio_advance() This is prep work for immutable bio vecs; we first want to centralize where bvecs are modified. Next two patches convert some existing code to use this function. Signed-off-by: Kent Overstreet CC: Jens Axboe --- fs/bio.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 ++ include/linux/blk_types.h | 2 ++ 3 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/fs/bio.c b/fs/bio.c index 40aa96eae99f..7edc08d2246c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -752,6 +752,47 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, } EXPORT_SYMBOL(bio_add_page); +/** + * bio_advance - increment/complete a bio by some number of bytes + * @bio: bio to advance + * @bytes: number of bytes to complete + * + * This updates bi_sector, bi_size and bi_idx; if the number of bytes to + * complete doesn't align with a bvec boundary, then bv_len and bv_offset will + * be updated on the last bvec as well. + * + * @bio will then represent the remaining, uncompleted portion of the io. + */ +void bio_advance(struct bio *bio, unsigned bytes) +{ + if (bio_integrity(bio)) + bio_integrity_advance(bio, bytes); + + bio->bi_sector += bytes >> 9; + bio->bi_size -= bytes; + + if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + return; + + while (bytes) { + if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { + WARN_ONCE(1, "bio idx %d >= vcnt %d\n", + bio->bi_idx, bio->bi_vcnt); + break; + } + + if (bytes >= bio_iovec(bio)->bv_len) { + bytes -= bio_iovec(bio)->bv_len; + bio->bi_idx++; + } else { + bio_iovec(bio)->bv_len -= bytes; + bio_iovec(bio)->bv_offset += bytes; + bytes = 0; + } + } +} +EXPORT_SYMBOL(bio_advance); + struct bio_map_data { struct bio_vec *iovecs; struct sg_iovec *sgvecs; diff --git a/include/linux/bio.h b/include/linux/bio.h index 669b1cb18fee..fcb4dba2d8ea 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -248,6 +248,8 @@ extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); +extern void bio_advance(struct bio *, unsigned); + extern void bio_init(struct bio *); extern void bio_reset(struct bio *); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cdf11191e645..c178d25e588b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -197,6 +197,8 @@ enum rq_flag_bits { REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK +#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) + /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) -- cgit From f73a1c7d117d07a96d89475066188a2b79e53c48 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 25 Sep 2012 15:05:12 -0700 Subject: block: Add bio_end_sector() Just a little convenience macro - main reason to add it now is preparing for immutable bio vecs, it'll reduce the size of the patch that puts bi_sector/bi_size/bi_idx into a struct bvec_iter. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Lars Ellenberg CC: Jiri Kosina CC: Alasdair Kergon CC: dm-devel@redhat.com CC: Neil Brown CC: Martin Schwidefsky CC: Heiko Carstens CC: linux-s390@vger.kernel.org CC: Chris Mason CC: Steven Whitehouse Acked-by: Steven Whitehouse --- block/blk-core.c | 2 +- block/cfq-iosched.c | 7 ++----- block/deadline-iosched.c | 2 +- drivers/block/brd.c | 3 +-- drivers/block/pktcdvd.c | 6 +++--- drivers/md/dm-stripe.c | 2 +- drivers/md/dm-verity.c | 2 +- drivers/md/faulty.c | 6 ++---- drivers/md/linear.c | 3 +-- drivers/md/raid1.c | 4 ++-- drivers/md/raid5.c | 14 +++++++------- drivers/s390/block/dcssblk.c | 3 +-- fs/btrfs/extent_io.c | 3 +-- fs/gfs2/lops.c | 2 +- include/linux/bio.h | 1 + 15 files changed, 26 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 86a1afeef606..7236b826f4a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1586,7 +1586,7 @@ static void handle_bad_sector(struct bio *bio) printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", bdevname(bio->bi_bdev, b), bio->bi_rw, - (unsigned long long)bio->bi_sector + bio_sectors(bio), + (unsigned long long)bio_end_sector(bio), (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); set_bit(BIO_EOF, &bio->bi_flags); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4f0ade74cfd0..d5cd3131c57a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) return NULL; cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); - if (cfqq) { - sector_t sector = bio->bi_sector + bio_sectors(bio); - - return elv_rb_find(&cfqq->sort_list, sector); - } + if (cfqq) + return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio)); return NULL; } diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 90037b5eb17f..ba19a3afab79 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -132,7 +132,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) * check for front merge */ if (dd->front_merges) { - sector_t sector = bio->bi_sector + bio_sectors(bio); + sector_t sector = bio_end_sector(bio); __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); if (__rq) { diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 531ceb31d0ff..f1a29f8e9d33 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -334,8 +334,7 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) int err = -EIO; sector = bio->bi_sector; - if (sector + (bio->bi_size >> SECTOR_SHIFT) > - get_capacity(bdev->bd_disk)) + if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) goto out; if (unlikely(bio->bi_rw & REQ_DISCARD)) { diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 2e7de7a59bfc..26938e8e2fc3 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -901,7 +901,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) pd->iosched.successive_reads += bio->bi_size >> 10; else { pd->iosched.successive_reads = 0; - pd->iosched.last_write = bio->bi_sector + bio_sectors(bio); + pd->iosched.last_write = bio_end_sector(bio); } if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { if (pd->read_speed == pd->write_speed) { @@ -2454,7 +2454,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) zone = ZONE(bio->bi_sector, pd); VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", (unsigned long long)bio->bi_sector, - (unsigned long long)(bio->bi_sector + bio_sectors(bio))); + (unsigned long long)bio_end_sector(bio)); /* Check if we have to split the bio */ { @@ -2462,7 +2462,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) sector_t last_zone; int first_sectors; - last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd); + last_zone = ZONE(bio_end_sector(bio) - 1, pd); if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index d8837d313f54..ea5e878a30b9 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -258,7 +258,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, sector_t begin, end; stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin); - stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio), + stripe_map_range_sector(sc, bio_end_sector(bio), target_stripe, &end); if (begin < end) { bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 6ad538375c3c..923115d08baa 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -472,7 +472,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) return -EIO; } - if ((bio->bi_sector + bio_sectors(bio)) >> + if (bio_end_sector(bio) >> (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { DMERR_LIMIT("io out of range"); return -EIO; diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 5e7dc772f5de..3193aefe982b 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -185,8 +185,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) return; } - if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9), - WRITE)) + if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE)) failit = 1; if (check_mode(conf, WritePersistent)) { add_sector(conf, bio->bi_sector, WritePersistent); @@ -196,8 +195,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) failit = 1; } else { /* read request */ - if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9), - READ)) + if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ)) failit = 1; if (check_mode(conf, ReadTransient)) failit = 1; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 21014836bdbf..f03fabd2b37b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -317,8 +317,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) bio_io_error(bio); return; } - if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > - tmp_dev->end_sector)) { + if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) { /* This bio crosses a device boundary, so we have to * split it. */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fd86b372692d..4d8c2e0a6bad 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1018,7 +1018,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) md_write_start(mddev, bio); /* wait on superblock update early */ if (bio_data_dir(bio) == WRITE && - bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo && + bio_end_sector(bio) > mddev->suspend_lo && bio->bi_sector < mddev->suspend_hi) { /* As the suspend_* range is controlled by * userspace, we want an interruptible @@ -1029,7 +1029,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) flush_signals(current); prepare_to_wait(&conf->wait_barrier, &w, TASK_INTERRUPTIBLE); - if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo || + if (bio_end_sector(bio) <= mddev->suspend_lo || bio->bi_sector >= mddev->suspend_hi) break; schedule(); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3ee2912889e7..68706970d217 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2384,11 +2384,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in } else bip = &sh->dev[dd_idx].toread; while (*bip && (*bip)->bi_sector < bi->bi_sector) { - if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) + if (bio_end_sector(*bip) > bi->bi_sector) goto overlap; bip = & (*bip)->bi_next; } - if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9)) + if (*bip && (*bip)->bi_sector < bio_end_sector(bi)) goto overlap; BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); @@ -2404,8 +2404,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && bi && bi->bi_sector <= sector; bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { - if (bi->bi_sector + (bi->bi_size>>9) >= sector) - sector = bi->bi_sector + (bi->bi_size>>9); + if (bio_end_sector(bi) >= sector) + sector = bio_end_sector(bi); } if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); @@ -3941,7 +3941,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) 0, &dd_idx, NULL); - end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9); + end_sector = bio_end_sector(align_bi); rcu_read_lock(); rdev = rcu_dereference(conf->disks[dd_idx].replacement); if (!rdev || test_bit(Faulty, &rdev->flags) || @@ -4216,7 +4216,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) } logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); - last_sector = bi->bi_sector + (bi->bi_size>>9); + last_sector = bio_end_sector(bi); bi->bi_next = NULL; bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ @@ -4679,7 +4679,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); sector = raid5_compute_sector(conf, logical_sector, 0, &dd_idx, NULL); - last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); + last_sector = bio_end_sector(raid_bio); for (; logical_sector < last_sector; logical_sector += STRIPE_SECTORS, diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b6ad0de07930..12d08b4529e9 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -826,8 +826,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0) /* Request is not page-aligned. */ goto fail; - if (((bio->bi_size >> 9) + bio->bi_sector) - > get_capacity(bio->bi_bdev->bd_disk)) { + if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) { /* Request beyond end of DCSS segment. */ goto fail; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f173c5af6461..bed072aa461f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2527,8 +2527,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (old_compressed) contig = bio->bi_sector == sector; else - contig = bio->bi_sector + (bio->bi_size >> 9) == - sector; + contig = bio_end_sector(bio) == sector; if (prev_bio_flags != bio_flags || !contig || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index a5055977a214..5c37ef982390 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -300,7 +300,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno) u64 nblk; if (bio) { - nblk = bio->bi_sector + bio_sectors(bio); + nblk = bio_end_sector(bio); nblk >>= sdp->sd_fsb2bb_shift; if (blkno == nblk) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index fcb4dba2d8ea..20507eb7c979 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -67,6 +67,7 @@ #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) +#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) static inline unsigned int bio_cur_bytes(struct bio *bio) { -- cgit From 9e882242c6193ae6f416f2d8d8db0d9126bd996b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:41:12 -0700 Subject: block: Add submit_bio_wait(), remove from md Random cleanup - this code was duplicated and it's not really specific to md. Also added the ability to return the actual error code. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown Acked-by: Tejun Heo --- drivers/md/raid1.c | 19 ------------------- drivers/md/raid10.c | 19 ------------------- fs/bio.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 1 + 4 files changed, 37 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f741c9fe25c8..800748d585ca 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2059,25 +2059,6 @@ static void fix_read_error(struct r1conf *conf, int read_disk, } } -static void bi_complete(struct bio *bio, int error) -{ - complete((struct completion *)bio->bi_private); -} - -static int submit_bio_wait(int rw, struct bio *bio) -{ - struct completion event; - rw |= REQ_SYNC; - - init_completion(&event); - bio->bi_private = &event; - bio->bi_end_io = bi_complete; - submit_bio(rw, bio); - wait_for_completion(&event); - - return test_bit(BIO_UPTODATE, &bio->bi_flags); -} - static int narrow_write_error(struct r1bio *r1_bio, int i) { struct mddev *mddev = r1_bio->mddev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6ffb6c08aec5..434586d43115 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2529,25 +2529,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 } } -static void bi_complete(struct bio *bio, int error) -{ - complete((struct completion *)bio->bi_private); -} - -static int submit_bio_wait(int rw, struct bio *bio) -{ - struct completion event; - rw |= REQ_SYNC; - - init_completion(&event); - bio->bi_private = &event; - bio->bi_end_io = bi_complete; - submit_bio(rw, bio); - wait_for_completion(&event); - - return test_bit(BIO_UPTODATE, &bio->bi_flags); -} - static int narrow_write_error(struct r10bio *r10_bio, int i) { struct bio *bio = r10_bio->master_bio; diff --git a/fs/bio.c b/fs/bio.c index f1b4c1651089..4ce24ee5dcd0 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -752,6 +752,42 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, } EXPORT_SYMBOL(bio_add_page); +struct submit_bio_ret { + struct completion event; + int error; +}; + +static void submit_bio_wait_endio(struct bio *bio, int error) +{ + struct submit_bio_ret *ret = bio->bi_private; + + ret->error = error; + complete(&ret->event); +} + +/** + * submit_bio_wait - submit a bio, and wait until it completes + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bio: The &struct bio which describes the I/O + * + * Simple wrapper around submit_bio(). Returns 0 on success, or the error from + * bio_endio() on failure. + */ +int submit_bio_wait(int rw, struct bio *bio) +{ + struct submit_bio_ret ret; + + rw |= REQ_SYNC; + init_completion(&ret.event); + bio->bi_private = &ret; + bio->bi_end_io = submit_bio_wait_endio; + submit_bio(rw, bio); + wait_for_completion(&ret.event); + + return ret.error; +} +EXPORT_SYMBOL(submit_bio_wait); + /** * bio_advance - increment/complete a bio by some number of bytes * @bio: bio to advance diff --git a/include/linux/bio.h b/include/linux/bio.h index 20507eb7c979..b20a9cd776dd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -249,6 +249,7 @@ extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); +extern int submit_bio_wait(int rw, struct bio *bio); extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *); -- cgit From 16ac3d63e74f3d6e34e42d6e523b6a61de0020f0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 13:57:51 -0700 Subject: block: Add bio_copy_data() This gets open coded quite a bit and it's tricky to get right, so make a generic version and convert some existing users over to it instead. Signed-off-by: Kent Overstreet CC: Jens Axboe --- fs/bio.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 ++ 2 files changed, 72 insertions(+) (limited to 'include/linux') diff --git a/fs/bio.c b/fs/bio.c index 4ce24ee5dcd0..e437f9aae67d 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -829,6 +829,76 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); +/** + * bio_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats + * @src and @dst as linked lists of bios. + * + * Stops when it reaches the end of either @src or @dst - that is, copies + * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). + */ +void bio_copy_data(struct bio *dst, struct bio *src) +{ + struct bio_vec *src_bv, *dst_bv; + unsigned src_offset, dst_offset, bytes; + void *src_p, *dst_p; + + src_bv = bio_iovec(src); + dst_bv = bio_iovec(dst); + + src_offset = src_bv->bv_offset; + dst_offset = dst_bv->bv_offset; + + while (1) { + if (src_offset == src_bv->bv_offset + src_bv->bv_len) { + src_bv++; + if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) { + src = src->bi_next; + if (!src) + break; + + src_bv = bio_iovec(src); + } + + src_offset = src_bv->bv_offset; + } + + if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) { + dst_bv++; + if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) { + dst = dst->bi_next; + if (!dst) + break; + + dst_bv = bio_iovec(dst); + } + + dst_offset = dst_bv->bv_offset; + } + + bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset, + src_bv->bv_offset + src_bv->bv_len - src_offset); + + src_p = kmap_atomic(src_bv->bv_page); + dst_p = kmap_atomic(dst_bv->bv_page); + + memcpy(dst_p + dst_bv->bv_offset, + src_p + src_bv->bv_offset, + bytes); + + kunmap_atomic(dst_p); + kunmap_atomic(src_p); + + src_offset += bytes; + dst_offset += bytes; + } +} +EXPORT_SYMBOL(bio_copy_data); + struct bio_map_data { struct bio_vec *iovecs; struct sg_iovec *sgvecs; diff --git a/include/linux/bio.h b/include/linux/bio.h index b20a9cd776dd..90d36c65cb70 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -286,6 +286,8 @@ static inline void bio_flush_dcache_pages(struct bio *bi) } #endif +extern void bio_copy_data(struct bio *dst, struct bio *src); + extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, -- cgit From d74c6d514fe314b8bdab58b487b25992291577ec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Feb 2013 12:23:11 -0800 Subject: block: Add bio_for_each_segment_all() __bio_for_each_segment() iterates bvecs from the specified index instead of bio->bv_idx. Currently, the only usage is to walk all the bvecs after the bio has been advanced by specifying 0 index. For immutable bvecs, we need to split these apart; bio_for_each_segment() is going to have a different implementation. This will also help document the intent of code that's using it - bio_for_each_segment_all() is only legal to use for code that owns the bio. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Neil Brown CC: Boaz Harrosh --- drivers/block/rbd.c | 2 +- drivers/md/raid1.c | 2 +- fs/bio.c | 12 ++++++------ fs/exofs/ore.c | 2 +- fs/exofs/ore_raid.c | 2 +- include/linux/bio.h | 17 ++++++++++++++--- mm/bounce.c | 2 +- 7 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6c81a4c040b9..11e179826b60 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -952,7 +952,7 @@ static struct bio *bio_clone_range(struct bio *bio_src, /* Find first affected segment... */ resid = offset; - __bio_for_each_segment(bv, bio_src, idx, 0) { + bio_for_each_segment(bv, bio_src, idx) { if (resid < bv->bv_len) break; resid -= bv->bv_len; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0a3988a25aab..853482015d3d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1291,7 +1291,7 @@ read_again: * know the original bi_idx, so we just free * them all */ - __bio_for_each_segment(bvec, mbio, j, 0) + bio_for_each_segment_all(bvec, mbio, j) bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) atomic_inc(&r1_bio->behind_remaining); diff --git a/fs/bio.c b/fs/bio.c index e437f9aae67d..618f9044c414 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -961,7 +961,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int iov_idx = 0; unsigned int iov_off = 0; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *bv_addr = page_address(bvec->bv_page); unsigned int bv_len = iovecs[i].bv_len; @@ -1143,7 +1143,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, return bio; cleanup: if (!map_data) - bio_for_each_segment(bvec, bio, i) + bio_for_each_segment_all(bvec, bio, i) __free_page(bvec->bv_page); bio_put(bio); @@ -1357,7 +1357,7 @@ static void __bio_unmap_user(struct bio *bio) /* * make sure we dirty pages we wrote to */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { if (bio_data_dir(bio) == READ) set_page_dirty_lock(bvec->bv_page); @@ -1463,7 +1463,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) int i; char *p = bmd->sgvecs[0].iov_base; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); int len = bmd->iovecs[i].bv_len; @@ -1503,7 +1503,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, if (!reading) { void *p = data; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); memcpy(addr, p, bvec->bv_len); @@ -1789,7 +1789,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index, if (index >= bio->bi_idx) index = bio->bi_vcnt - 1; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { if (i == index) { if (offset > bv->bv_offset) sectors += (offset - bv->bv_offset) / sector_sz; diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index f936cb50dc0d..b74422888604 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -401,7 +401,7 @@ static void _clear_bio(struct bio *bio) struct bio_vec *bv; unsigned i; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { unsigned this_count = bv->bv_len; if (likely(PAGE_SIZE == this_count)) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index b963f38ac298..7682b970d0f1 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -432,7 +432,7 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) if (!bio) continue; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; SetPageUptodate(page); diff --git a/include/linux/bio.h b/include/linux/bio.h index 90d36c65cb70..be2efa09f9bf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -137,16 +137,27 @@ static inline int bio_has_allocated_vec(struct bio *bio) #define bio_io_error(bio) bio_endio((bio), -EIO) /* - * drivers should not use the __ version unless they _really_ want to - * run through the entire bio and not just pending pieces + * drivers should not use the __ version unless they _really_ know what + * they're doing */ #define __bio_for_each_segment(bvl, bio, i, start_idx) \ for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ i < (bio)->bi_vcnt; \ bvl++, i++) +/* + * drivers should _never_ use the all version - the bio may have been split + * before it got to the driver and the driver won't own all of it + */ +#define bio_for_each_segment_all(bvl, bio, i) \ + for (i = 0; \ + bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ + i++) + #define bio_for_each_segment(bvl, bio, i) \ - __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) + for (i = (bio)->bi_idx; \ + bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ + i++) /* * get a reference to a bio, so it won't disappear. the intended use is diff --git a/mm/bounce.c b/mm/bounce.c index 55f512af50c7..2ee1b6fef44a 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) /* * free up bounce indirect pages used */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { org_vec = bio_orig->bi_io_vec + i; if (bvec->bv_page == org_vec->bv_page) continue; -- cgit From a07876064a0b73ab5ef1ebcf14b1cf0231c07858 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:03:28 -0700 Subject: block: Add bio_alloc_pages() More utility code to replace stuff that's getting open coded. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown --- drivers/md/raid1.c | 16 +++------------- fs/bio.c | 28 ++++++++++++++++++++++++++++ include/linux/bio.h | 1 + 3 files changed, 32 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a7ea954abe1d..aeb4e3f74791 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data) static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) { struct pool_info *pi = data; - struct page *page; struct r1bio *r1_bio; struct bio *bio; int i, j; @@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) j = 1; while(j--) { bio = r1_bio->bios[j]; - for (i = 0; i < RESYNC_PAGES; i++) { - page = alloc_page(gfp_flags); - if (unlikely(!page)) - goto out_free_pages; + bio->bi_vcnt = RESYNC_PAGES; - bio->bi_io_vec[i].bv_page = page; - bio->bi_vcnt = i+1; - } + if (bio_alloc_pages(bio, gfp_flags)) + goto out_free_bio; } /* If not user-requests, copy the page pointers to all bios */ if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { @@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) return r1_bio; -out_free_pages: - for (j=0 ; j < pi->raid_disks; j++) - for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++) - put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); - j = -1; out_free_bio: while (++j < pi->raid_disks) bio_put(r1_bio->bios[j]); diff --git a/fs/bio.c b/fs/bio.c index fe3aee90c988..e545a440d376 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -829,6 +829,34 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); +/** + * bio_alloc_pages - allocates a single page for each bvec in a bio + * @bio: bio to allocate pages for + * @gfp_mask: flags for allocation + * + * Allocates pages up to @bio->bi_vcnt. + * + * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are + * freed. + */ +int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) +{ + int i; + struct bio_vec *bv; + + bio_for_each_segment_all(bv, bio, i) { + bv->bv_page = alloc_page(gfp_mask); + if (!bv->bv_page) { + while (--bv >= bio->bi_io_vec) + __free_page(bv->bv_page); + return -ENOMEM; + } + } + + return 0; +} +EXPORT_SYMBOL(bio_alloc_pages); + /** * bio_copy_data - copy contents of data buffers from one chain of bios to * another diff --git a/include/linux/bio.h b/include/linux/bio.h index be2efa09f9bf..e25378f2f408 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -298,6 +298,7 @@ static inline void bio_flush_dcache_pages(struct bio *bi) #endif extern void bio_copy_data(struct bio *dst, struct bio *src); +extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); -- cgit From a38352e0ac02dbbd4fa464dc22d1352b5fbd06fd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 25 May 2012 13:03:11 -0700 Subject: block: Add an explicit bio flag for bios that own their bvec This is for the new bio splitting code. When we split a bio, if the split occured on a bvec boundry we reuse the bvec for the new bio. But that means bio_free() can't free it, hence the explicit flag. Signed-off-by: Kent Overstreet CC: Jens Axboe Acked-by: Tejun Heo --- fs/bio.c | 4 +++- include/linux/bio.h | 5 ----- include/linux/blk_types.h | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/bio.c b/fs/bio.c index e545a440d376..9238a54b562c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -252,7 +252,7 @@ static void bio_free(struct bio *bio) __bio_free(bio); if (bs) { - if (bio_has_allocated_vec(bio)) + if (bio_flagged(bio, BIO_OWNS_VEC)) bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); /* @@ -451,6 +451,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (unlikely(!bvl)) goto err_free; + + bio->bi_flags |= 1 << BIO_OWNS_VEC; } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } diff --git a/include/linux/bio.h b/include/linux/bio.h index e25378f2f408..794bcd0c5039 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -85,11 +85,6 @@ static inline void *bio_data(struct bio *bio) return NULL; } -static inline int bio_has_allocated_vec(struct bio *bio) -{ - return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs; -} - /* * will die */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c178d25e588b..538289ffc704 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -117,6 +117,7 @@ struct bio { * BIO_POOL_IDX() */ #define BIO_RESET_BITS 12 +#define BIO_OWNS_VEC 12 /* bio_free() should free bvec */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) -- cgit From 29ed7813ce5c4661261aeebddb1b8660e0860223 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Sep 2012 09:54:22 -0700 Subject: bio-integrity: Add explicit field for owner of bip_buf This was the only real user of BIO_CLONED, which didn't have very clear semantics. Convert to its own flag so we can get rid of BIO_CLONED. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen --- fs/bio-integrity.c | 5 ++--- include/linux/bio.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index ca7b02dbf09d..8fb42916d8a2 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -97,9 +97,7 @@ void bio_integrity_free(struct bio *bio) struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_set *bs = bio->bi_pool; - /* A cloned bio doesn't own the integrity metadata */ - if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) - && bip->bip_buf != NULL) + if (bip->bip_owns_buf) kfree(bip->bip_buf); if (bs) { @@ -386,6 +384,7 @@ int bio_integrity_prep(struct bio *bio) return -EIO; } + bip->bip_owns_buf = 1; bip->bip_buf = buf; bip->bip_size = len; bip->bip_sector = bio->bi_sector; diff --git a/include/linux/bio.h b/include/linux/bio.h index 794bcd0c5039..ef24466d8f82 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -187,6 +187,7 @@ struct bio_integrity_payload { unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_idx; /* current bip_vec index */ + unsigned bip_owns_buf:1; /* should free bip_buf */ struct work_struct bip_work; /* I/O completion */ -- cgit From 84759c6d18c5144432781ddca037d929ee9db8a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Sep 2011 21:43:05 -0700 Subject: Revert "rw_semaphore: remove up/down_read_non_owner" This reverts commit 11b80f459adaf91a712f95e7734a17655a36bf30. Bcache needs rw semaphores for cache coherency in writeback mode - writes have to take a read lock on a per cache device rw sem, and release it when the bio completes. But since this is for bios it's naturally not in the context of the process that originally took the lock. Signed-off-by: Kent Overstreet CC: Christoph Hellwig CC: David Howells --- include/linux/rwsem.h | 10 ++++++++++ kernel/rwsem.c | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 8da67d625e13..0616ffe45702 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -133,10 +133,20 @@ do { \ _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ } while (0); +/* + * Take/release a lock when not the owner will release it. + * + * [ This API should be avoided as much as possible - the + * proper abstraction for this case is completions. ] + */ +extern void down_read_non_owner(struct rw_semaphore *sem); +extern void up_read_non_owner(struct rw_semaphore *sem); #else # define down_read_nested(sem, subclass) down_read(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) +# define down_read_non_owner(sem) down_read(sem) +# define up_read_non_owner(sem) up_read(sem) #endif #endif /* _LINUX_RWSEM_H */ diff --git a/kernel/rwsem.c b/kernel/rwsem.c index b3c6c3fcd847..cfff1435bdfb 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -126,6 +126,15 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) EXPORT_SYMBOL(_down_write_nest_lock); +void down_read_non_owner(struct rw_semaphore *sem) +{ + might_sleep(); + + __down_read(sem); +} + +EXPORT_SYMBOL(down_read_non_owner); + void down_write_nested(struct rw_semaphore *sem, int subclass) { might_sleep(); @@ -136,6 +145,13 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_write_nested); +void up_read_non_owner(struct rw_semaphore *sem) +{ + __up_read(sem); +} + +EXPORT_SYMBOL(up_read_non_owner); + #endif -- cgit From cafe563591446cf80bfbc2fe3bc72a2e36cf1060 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Mar 2013 16:11:31 -0700 Subject: bcache: A block layer cache Does writethrough and writeback caching, handles unclean shutdown, and has a bunch of other nifty features motivated by real world usage. See the wiki at http://bcache.evilpiepirate.org for more. Signed-off-by: Kent Overstreet --- Documentation/ABI/testing/sysfs-block-bcache | 156 ++ Documentation/bcache.txt | 343 ++++ MAINTAINERS | 7 + drivers/md/Kconfig | 2 + drivers/md/Makefile | 1 + drivers/md/bcache/Kconfig | 42 + drivers/md/bcache/Makefile | 7 + drivers/md/bcache/alloc.c | 583 ++++++ drivers/md/bcache/bcache.h | 1232 +++++++++++++ drivers/md/bcache/bset.c | 1190 ++++++++++++ drivers/md/bcache/bset.h | 379 ++++ drivers/md/bcache/btree.c | 2503 ++++++++++++++++++++++++++ drivers/md/bcache/btree.h | 405 +++++ drivers/md/bcache/closure.c | 348 ++++ drivers/md/bcache/closure.h | 670 +++++++ drivers/md/bcache/debug.c | 563 ++++++ drivers/md/bcache/debug.h | 54 + drivers/md/bcache/io.c | 390 ++++ drivers/md/bcache/journal.c | 785 ++++++++ drivers/md/bcache/journal.h | 215 +++ drivers/md/bcache/movinggc.c | 254 +++ drivers/md/bcache/request.c | 1409 +++++++++++++++ drivers/md/bcache/request.h | 62 + drivers/md/bcache/stats.c | 245 +++ drivers/md/bcache/stats.h | 58 + drivers/md/bcache/super.c | 1941 ++++++++++++++++++++ drivers/md/bcache/sysfs.c | 817 +++++++++ drivers/md/bcache/sysfs.h | 110 ++ drivers/md/bcache/trace.c | 26 + drivers/md/bcache/util.c | 389 ++++ drivers/md/bcache/util.h | 589 ++++++ drivers/md/bcache/writeback.c | 414 +++++ include/linux/cgroup_subsys.h | 6 + include/linux/sched.h | 4 + include/trace/events/bcache.h | 271 +++ kernel/fork.c | 4 + 36 files changed, 16474 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-bcache create mode 100644 Documentation/bcache.txt create mode 100644 drivers/md/bcache/Kconfig create mode 100644 drivers/md/bcache/Makefile create mode 100644 drivers/md/bcache/alloc.c create mode 100644 drivers/md/bcache/bcache.h create mode 100644 drivers/md/bcache/bset.c create mode 100644 drivers/md/bcache/bset.h create mode 100644 drivers/md/bcache/btree.c create mode 100644 drivers/md/bcache/btree.h create mode 100644 drivers/md/bcache/closure.c create mode 100644 drivers/md/bcache/closure.h create mode 100644 drivers/md/bcache/debug.c create mode 100644 drivers/md/bcache/debug.h create mode 100644 drivers/md/bcache/io.c create mode 100644 drivers/md/bcache/journal.c create mode 100644 drivers/md/bcache/journal.h create mode 100644 drivers/md/bcache/movinggc.c create mode 100644 drivers/md/bcache/request.c create mode 100644 drivers/md/bcache/request.h create mode 100644 drivers/md/bcache/stats.c create mode 100644 drivers/md/bcache/stats.h create mode 100644 drivers/md/bcache/super.c create mode 100644 drivers/md/bcache/sysfs.c create mode 100644 drivers/md/bcache/sysfs.h create mode 100644 drivers/md/bcache/trace.c create mode 100644 drivers/md/bcache/util.c create mode 100644 drivers/md/bcache/util.h create mode 100644 drivers/md/bcache/writeback.c create mode 100644 include/trace/events/bcache.h (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-block-bcache b/Documentation/ABI/testing/sysfs-block-bcache new file mode 100644 index 000000000000..9e4bbc5d51fd --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-bcache @@ -0,0 +1,156 @@ +What: /sys/block//bcache/unregister +Date: November 2010 +Contact: Kent Overstreet +Description: + A write to this file causes the backing device or cache to be + unregistered. If a backing device had dirty data in the cache, + writeback mode is automatically disabled and all dirty data is + flushed before the device is unregistered. Caches unregister + all associated backing devices before unregistering themselves. + +What: /sys/block//bcache/clear_stats +Date: November 2010 +Contact: Kent Overstreet +Description: + Writing to this file resets all the statistics for the device. + +What: /sys/block//bcache/cache +Date: November 2010 +Contact: Kent Overstreet +Description: + For a backing device that has cache, a symlink to + the bcache/ dir of that cache. + +What: /sys/block//bcache/cache_hits +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: integer number of full cache hits, + counted per bio. A partial cache hit counts as a miss. + +What: /sys/block//bcache/cache_misses +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: integer number of cache misses. + +What: /sys/block//bcache/cache_hit_ratio +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: cache hits as a percentage. + +What: /sys/block//bcache/sequential_cutoff +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: Threshold past which sequential IO will + skip the cache. Read and written as bytes in human readable + units (i.e. echo 10M > sequntial_cutoff). + +What: /sys/block//bcache/bypassed +Date: November 2010 +Contact: Kent Overstreet +Description: + Sum of all reads and writes that have bypassed the cache (due + to the sequential cutoff). Expressed as bytes in human + readable units. + +What: /sys/block//bcache/writeback +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: When on, writeback caching is enabled and + writes will be buffered in the cache. When off, caching is in + writethrough mode; reads and writes will be added to the + cache but no write buffering will take place. + +What: /sys/block//bcache/writeback_running +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: when off, dirty data will not be written + from the cache to the backing device. The cache will still be + used to buffer writes until it is mostly full, at which point + writes transparently revert to writethrough mode. Intended only + for benchmarking/testing. + +What: /sys/block//bcache/writeback_delay +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: In writeback mode, when dirty data is + written to the cache and the cache held no dirty data for that + backing device, writeback from cache to backing device starts + after this delay, expressed as an integer number of seconds. + +What: /sys/block//bcache/writeback_percent +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: If nonzero, writeback from cache to + backing device only takes place when more than this percentage + of the cache is used, allowing more write coalescing to take + place and reducing total number of writes sent to the backing + device. Integer between 0 and 40. + +What: /sys/block//bcache/synchronous +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, a boolean that allows synchronous mode to be + switched on and off. In synchronous mode all writes are ordered + such that the cache can reliably recover from unclean shutdown; + if disabled bcache will not generally wait for writes to + complete but if the cache is not shut down cleanly all data + will be discarded from the cache. Should not be turned off with + writeback caching enabled. + +What: /sys/block//bcache/discard +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, a boolean allowing discard/TRIM to be turned off + or back on if the device supports it. + +What: /sys/block//bcache/bucket_size +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, bucket size in human readable units, as set at + cache creation time; should match the erase block size of the + SSD for optimal performance. + +What: /sys/block//bcache/nbuckets +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, the number of usable buckets. + +What: /sys/block//bcache/tree_depth +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, height of the btree excluding leaf nodes (i.e. a + one node tree will have a depth of 0). + +What: /sys/block//bcache/btree_cache_size +Date: November 2010 +Contact: Kent Overstreet +Description: + Number of btree buckets/nodes that are currently cached in + memory; cache dynamically grows and shrinks in response to + memory pressure from the rest of the system. + +What: /sys/block//bcache/written +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, total amount of data in human readable units + written to the cache, excluding all metadata. + +What: /sys/block//bcache/btree_written +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, sum of all btree writes in human readable units. diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt new file mode 100644 index 000000000000..533307d52c87 --- /dev/null +++ b/Documentation/bcache.txt @@ -0,0 +1,343 @@ +Say you've got a big slow raid 6, and an X-25E or three. Wouldn't it be +nice if you could use them as cache... Hence bcache. + +Wiki and git repositories are at: + http://bcache.evilpiepirate.org + http://evilpiepirate.org/git/linux-bcache.git + http://evilpiepirate.org/git/bcache-tools.git + +It's designed around the performance characteristics of SSDs - it only allocates +in erase block sized buckets, and it uses a hybrid btree/log to track cached +extants (which can be anywhere from a single sector to the bucket size). It's +designed to avoid random writes at all costs; it fills up an erase block +sequentially, then issues a discard before reusing it. + +Both writethrough and writeback caching are supported. Writeback defaults to +off, but can be switched on and off arbitrarily at runtime. Bcache goes to +great lengths to protect your data - it reliably handles unclean shutdown. (It +doesn't even have a notion of a clean shutdown; bcache simply doesn't return +writes as completed until they're on stable storage). + +Writeback caching can use most of the cache for buffering writes - writing +dirty data to the backing device is always done sequentially, scanning from the +start to the end of the index. + +Since random IO is what SSDs excel at, there generally won't be much benefit +to caching large sequential IO. Bcache detects sequential IO and skips it; +it also keeps a rolling average of the IO sizes per task, and as long as the +average is above the cutoff it will skip all IO from that task - instead of +caching the first 512k after every seek. Backups and large file copies should +thus entirely bypass the cache. + +In the event of a data IO error on the flash it will try to recover by reading +from disk or invalidating cache entries. For unrecoverable errors (meta data +or dirty data), caching is automatically disabled; if dirty data was present +in the cache it first disables writeback caching and waits for all dirty data +to be flushed. + +Getting started: +You'll need make-bcache from the bcache-tools repository. Both the cache device +and backing device must be formatted before use. + make-bcache -B /dev/sdb + make-bcache -C /dev/sdc + +make-bcache has the ability to format multiple devices at the same time - if +you format your backing devices and cache device at the same time, you won't +have to manually attach: + make-bcache -B /dev/sda /dev/sdb -C /dev/sdc + +To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register: + + echo /dev/sdb > /sys/fs/bcache/register + echo /dev/sdc > /sys/fs/bcache/register + +To register your bcache devices automatically, you could add something like +this to an init script: + + echo /dev/sd* > /sys/fs/bcache/register_quiet + +It'll look for bcache superblocks and ignore everything that doesn't have one. + +Registering the backing device makes the bcache show up in /dev; you can now +format it and use it as normal. But the first time using a new bcache device, +it'll be running in passthrough mode until you attach it to a cache. See the +section on attaching. + +The devices show up at /dev/bcacheN, and can be controlled via sysfs from +/sys/block/bcacheN/bcache: + + mkfs.ext4 /dev/bcache0 + mount /dev/bcache0 /mnt + +Cache devices are managed as sets; multiple caches per set isn't supported yet +but will allow for mirroring of metadata and dirty data in the future. Your new +cache set shows up as /sys/fs/bcache/ + +ATTACHING: + +After your cache device and backing device are registered, the backing device +must be attached to your cache set to enable caching. Attaching a backing +device to a cache set is done thusly, with the UUID of the cache set in +/sys/fs/bcache: + + echo > /sys/block/bcache0/bcache/attach + +This only has to be done once. The next time you reboot, just reregister all +your bcache devices. If a backing device has data in a cache somewhere, the +/dev/bcache# device won't be created until the cache shows up - particularly +important if you have writeback caching turned on. + +If you're booting up and your cache device is gone and never coming back, you +can force run the backing device: + + echo 1 > /sys/block/sdb/bcache/running + +(You need to use /sys/block/sdb (or whatever your backing device is called), not +/sys/block/bcache0, because bcache0 doesn't exist yet. If you're using a +partition, the bcache directory would be at /sys/block/sdb/sdb2/bcache) + +The backing device will still use that cache set if it shows up in the future, +but all the cached data will be invalidated. If there was dirty data in the +cache, don't expect the filesystem to be recoverable - you will have massive +filesystem corruption, though ext4's fsck does work miracles. + +SYSFS - BACKING DEVICE: + +attach + Echo the UUID of a cache set to this file to enable caching. + +cache_mode + Can be one of either writethrough, writeback, writearound or none. + +clear_stats + Writing to this file resets the running total stats (not the day/hour/5 minute + decaying versions). + +detach + Write to this file to detach from a cache set. If there is dirty data in the + cache, it will be flushed first. + +dirty_data + Amount of dirty data for this backing device in the cache. Continuously + updated unlike the cache set's version, but may be slightly off. + +label + Name of underlying device. + +readahead + Size of readahead that should be performed. Defaults to 0. If set to e.g. + 1M, it will round cache miss reads up to that size, but without overlapping + existing cache entries. + +running + 1 if bcache is running (i.e. whether the /dev/bcache device exists, whether + it's in passthrough mode or caching). + +sequential_cutoff + A sequential IO will bypass the cache once it passes this threshhold; the + most recent 128 IOs are tracked so sequential IO can be detected even when + it isn't all done at once. + +sequential_merge + If non zero, bcache keeps a list of the last 128 requests submitted to compare + against all new requests to determine which new requests are sequential + continuations of previous requests for the purpose of determining sequential + cutoff. This is necessary if the sequential cutoff value is greater than the + maximum acceptable sequential size for any single request. + +state + The backing device can be in one of four different states: + + no cache: Has never been attached to a cache set. + + clean: Part of a cache set, and there is no cached dirty data. + + dirty: Part of a cache set, and there is cached dirty data. + + inconsistent: The backing device was forcibly run by the user when there was + dirty data cached but the cache set was unavailable; whatever data was on the + backing device has likely been corrupted. + +stop + Write to this file to shut down the bcache device and close the backing + device. + +writeback_delay + When dirty data is written to the cache and it previously did not contain + any, waits some number of seconds before initiating writeback. Defaults to + 30. + +writeback_percent + If nonzero, bcache tries to keep around this percentage of the cache dirty by + throttling background writeback and using a PD controller to smoothly adjust + the rate. + +writeback_rate + Rate in sectors per second - if writeback_percent is nonzero, background + writeback is throttled to this rate. Continuously adjusted by bcache but may + also be set by the user. + +writeback_running + If off, writeback of dirty data will not take place at all. Dirty data will + still be added to the cache until it is mostly full; only meant for + benchmarking. Defaults to on. + +SYSFS - BACKING DEVICE STATS: + +There are directories with these numbers for a running total, as well as +versions that decay over the past day, hour and 5 minutes; they're also +aggregated in the cache set directory as well. + +bypassed + Amount of IO (both reads and writes) that has bypassed the cache + +cache_hits +cache_misses +cache_hit_ratio + Hits and misses are counted per individual IO as bcache sees them; a + partial hit is counted as a miss. + +cache_bypass_hits +cache_bypass_misses + Hits and misses for IO that is intended to skip the cache are still counted, + but broken out here. + +cache_miss_collisions + Counts instances where data was going to be inserted into the cache from a + cache miss, but raced with a write and data was already present (usually 0 + since the synchronization for cache misses was rewritten) + +cache_readaheads + Count of times readahead occured. + +SYSFS - CACHE SET: + +average_key_size + Average data per key in the btree. + +bdev<0..n> + Symlink to each of the attached backing devices. + +block_size + Block size of the cache devices. + +btree_cache_size + Amount of memory currently used by the btree cache + +bucket_size + Size of buckets + +cache<0..n> + Symlink to each of the cache devices comprising this cache set. + +cache_available_percent + Percentage of cache device free. + +clear_stats + Clears the statistics associated with this cache + +dirty_data + Amount of dirty data is in the cache (updated when garbage collection runs). + +flash_vol_create + Echoing a size to this file (in human readable units, k/M/G) creates a thinly + provisioned volume backed by the cache set. + +io_error_halflife +io_error_limit + These determines how many errors we accept before disabling the cache. + Each error is decayed by the half life (in # ios). If the decaying count + reaches io_error_limit dirty data is written out and the cache is disabled. + +journal_delay_ms + Journal writes will delay for up to this many milliseconds, unless a cache + flush happens sooner. Defaults to 100. + +root_usage_percent + Percentage of the root btree node in use. If this gets too high the node + will split, increasing the tree depth. + +stop + Write to this file to shut down the cache set - waits until all attached + backing devices have been shut down. + +tree_depth + Depth of the btree (A single node btree has depth 0). + +unregister + Detaches all backing devices and closes the cache devices; if dirty data is + present it will disable writeback caching and wait for it to be flushed. + +SYSFS - CACHE SET INTERNAL: + +This directory also exposes timings for a number of internal operations, with +separate files for average duration, average frequency, last occurence and max +duration: garbage collection, btree read, btree node sorts and btree splits. + +active_journal_entries + Number of journal entries that are newer than the index. + +btree_nodes + Total nodes in the btree. + +btree_used_percent + Average fraction of btree in use. + +bset_tree_stats + Statistics about the auxiliary search trees + +btree_cache_max_chain + Longest chain in the btree node cache's hash table + +cache_read_races + Counts instances where while data was being read from the cache, the bucket + was reused and invalidated - i.e. where the pointer was stale after the read + completed. When this occurs the data is reread from the backing device. + +trigger_gc + Writing to this file forces garbage collection to run. + +SYSFS - CACHE DEVICE: + +block_size + Minimum granularity of writes - should match hardware sector size. + +btree_written + Sum of all btree writes, in (kilo/mega/giga) bytes + +bucket_size + Size of buckets + +cache_replacement_policy + One of either lru, fifo or random. + +discard + Boolean; if on a discard/TRIM will be issued to each bucket before it is + reused. Defaults to off, since SATA TRIM is an unqueued command (and thus + slow). + +freelist_percent + Size of the freelist as a percentage of nbuckets. Can be written to to + increase the number of buckets kept on the freelist, which lets you + artificially reduce the size of the cache at runtime. Mostly for testing + purposes (i.e. testing how different size caches affect your hit rate), but + since buckets are discarded when they move on to the freelist will also make + the SSD's garbage collection easier by effectively giving it more reserved + space. + +io_errors + Number of errors that have occured, decayed by io_error_halflife. + +metadata_written + Sum of all non data writes (btree writes and all other metadata). + +nbuckets + Total buckets in this cache + +priority_stats + Statistics about how recently data in the cache has been accessed. This can + reveal your working set size. + +written + Sum of all data that has been written to the cache; comparison with + btree_written gives the amount of write inflation in bcache. diff --git a/MAINTAINERS b/MAINTAINERS index 50b4d735f961..64b849620b52 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1616,6 +1616,13 @@ W: http://www.baycom.org/~tom/ham/ham.html S: Maintained F: drivers/net/hamradio/baycom* +BCACHE (BLOCK LAYER CACHE) +M: Kent Overstreet +L: linux-bcache@vger.kernel.org +W: http://bcache.evilpiepirate.org +S: Maintained: +F: drivers/md/bcache/ + BEFS FILE SYSTEM S: Orphan F: Documentation/filesystems/befs.txt diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4d8d90b4fe78..3bfc8f1da9fe 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -174,6 +174,8 @@ config MD_FAULTY In unsure, say N. +source "drivers/md/bcache/Kconfig" + config BLK_DEV_DM tristate "Device mapper support" ---help--- diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 7ceeaefc0e95..1439fd4ad9b1 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MD_RAID10) += raid10.o obj-$(CONFIG_MD_RAID456) += raid456.o obj-$(CONFIG_MD_MULTIPATH) += multipath.o obj-$(CONFIG_MD_FAULTY) += faulty.o +obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig new file mode 100644 index 000000000000..05c220d05e23 --- /dev/null +++ b/drivers/md/bcache/Kconfig @@ -0,0 +1,42 @@ + +config BCACHE + tristate "Block device as cache" + select CLOSURES + ---help--- + Allows a block device to be used as cache for other devices; uses + a btree for indexing and the layout is optimized for SSDs. + + See Documentation/bcache.txt for details. + +config BCACHE_DEBUG + bool "Bcache debugging" + depends on BCACHE + ---help--- + Don't select this option unless you're a developer + + Enables extra debugging tools (primarily a fuzz tester) + +config BCACHE_EDEBUG + bool "Extended runtime checks" + depends on BCACHE + ---help--- + Don't select this option unless you're a developer + + Enables extra runtime checks which significantly affect performance + +config BCACHE_CLOSURES_DEBUG + bool "Debug closures" + depends on BCACHE + select DEBUG_FS + ---help--- + Keeps all active closures in a linked list and provides a debugfs + interface to list them, which makes it possible to see asynchronous + operations that get stuck. + +# cgroup code needs to be updated: +# +#config CGROUP_BCACHE +# bool "Cgroup controls for bcache" +# depends on BCACHE && BLK_CGROUP +# ---help--- +# TODO diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile new file mode 100644 index 000000000000..0e9c82523be6 --- /dev/null +++ b/drivers/md/bcache/Makefile @@ -0,0 +1,7 @@ + +obj-$(CONFIG_BCACHE) += bcache.o + +bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\ + movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o + +CFLAGS_request.o += -Iblock diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c new file mode 100644 index 000000000000..ed18115e078e --- /dev/null +++ b/drivers/md/bcache/alloc.c @@ -0,0 +1,583 @@ +/* + * Primary bucket allocation code + * + * Copyright 2012 Google, Inc. + * + * Allocation in bcache is done in terms of buckets: + * + * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in + * btree pointers - they must match for the pointer to be considered valid. + * + * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a + * bucket simply by incrementing its gen. + * + * The gens (along with the priorities; it's really the gens are important but + * the code is named as if it's the priorities) are written in an arbitrary list + * of buckets on disk, with a pointer to them in the journal header. + * + * When we invalidate a bucket, we have to write its new gen to disk and wait + * for that write to complete before we use it - otherwise after a crash we + * could have pointers that appeared to be good but pointed to data that had + * been overwritten. + * + * Since the gens and priorities are all stored contiguously on disk, we can + * batch this up: We fill up the free_inc list with freshly invalidated buckets, + * call prio_write(), and when prio_write() finishes we pull buckets off the + * free_inc list and optionally discard them. + * + * free_inc isn't the only freelist - if it was, we'd often to sleep while + * priorities and gens were being written before we could allocate. c->free is a + * smaller freelist, and buckets on that list are always ready to be used. + * + * If we've got discards enabled, that happens when a bucket moves from the + * free_inc list to the free list. + * + * There is another freelist, because sometimes we have buckets that we know + * have nothing pointing into them - these we can reuse without waiting for + * priorities to be rewritten. These come from freed btree nodes and buckets + * that garbage collection discovered no longer had valid keys pointing into + * them (because they were overwritten). That's the unused list - buckets on the + * unused list move to the free list, optionally being discarded in the process. + * + * It's also important to ensure that gens don't wrap around - with respect to + * either the oldest gen in the btree or the gen on disk. This is quite + * difficult to do in practice, but we explicitly guard against it anyways - if + * a bucket is in danger of wrapping around we simply skip invalidating it that + * time around, and we garbage collect or rewrite the priorities sooner than we + * would have otherwise. + * + * bch_bucket_alloc() allocates a single bucket from a specific cache. + * + * bch_bucket_alloc_set() allocates one or more buckets from different caches + * out of a cache set. + * + * free_some_buckets() drives all the processes described above. It's called + * from bch_bucket_alloc() and a few other places that need to make sure free + * buckets are ready. + * + * invalidate_buckets_(lru|fifo)() find buckets that are available to be + * invalidated, and then invalidate them and stick them on the free_inc list - + * in either lru or fifo order. + */ + +#include "bcache.h" +#include "btree.h" + +#include + +#define MAX_IN_FLIGHT_DISCARDS 8U + +/* Bucket heap / gen */ + +uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) +{ + uint8_t ret = ++b->gen; + + ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b)); + WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX); + + if (CACHE_SYNC(&ca->set->sb)) { + ca->need_save_prio = max(ca->need_save_prio, + bucket_disk_gen(b)); + WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX); + } + + return ret; +} + +void bch_rescale_priorities(struct cache_set *c, int sectors) +{ + struct cache *ca; + struct bucket *b; + unsigned next = c->nbuckets * c->sb.bucket_size / 1024; + unsigned i; + int r; + + atomic_sub(sectors, &c->rescale); + + do { + r = atomic_read(&c->rescale); + + if (r >= 0) + return; + } while (atomic_cmpxchg(&c->rescale, r, r + next) != r); + + mutex_lock(&c->bucket_lock); + + c->min_prio = USHRT_MAX; + + for_each_cache(ca, c, i) + for_each_bucket(b, ca) + if (b->prio && + b->prio != BTREE_PRIO && + !atomic_read(&b->pin)) { + b->prio--; + c->min_prio = min(c->min_prio, b->prio); + } + + mutex_unlock(&c->bucket_lock); +} + +/* Discard/TRIM */ + +struct discard { + struct list_head list; + struct work_struct work; + struct cache *ca; + long bucket; + + struct bio bio; + struct bio_vec bv; +}; + +static void discard_finish(struct work_struct *w) +{ + struct discard *d = container_of(w, struct discard, work); + struct cache *ca = d->ca; + char buf[BDEVNAME_SIZE]; + + if (!test_bit(BIO_UPTODATE, &d->bio.bi_flags)) { + pr_notice("discard error on %s, disabling", + bdevname(ca->bdev, buf)); + d->ca->discard = 0; + } + + mutex_lock(&ca->set->bucket_lock); + + fifo_push(&ca->free, d->bucket); + list_add(&d->list, &ca->discards); + atomic_dec(&ca->discards_in_flight); + + mutex_unlock(&ca->set->bucket_lock); + + closure_wake_up(&ca->set->bucket_wait); + wake_up(&ca->set->alloc_wait); + + closure_put(&ca->set->cl); +} + +static void discard_endio(struct bio *bio, int error) +{ + struct discard *d = container_of(bio, struct discard, bio); + schedule_work(&d->work); +} + +static void do_discard(struct cache *ca, long bucket) +{ + struct discard *d = list_first_entry(&ca->discards, + struct discard, list); + + list_del(&d->list); + d->bucket = bucket; + + atomic_inc(&ca->discards_in_flight); + closure_get(&ca->set->cl); + + bio_init(&d->bio); + + d->bio.bi_sector = bucket_to_sector(ca->set, d->bucket); + d->bio.bi_bdev = ca->bdev; + d->bio.bi_rw = REQ_WRITE|REQ_DISCARD; + d->bio.bi_max_vecs = 1; + d->bio.bi_io_vec = d->bio.bi_inline_vecs; + d->bio.bi_size = bucket_bytes(ca); + d->bio.bi_end_io = discard_endio; + bio_set_prio(&d->bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + + submit_bio(0, &d->bio); +} + +/* Allocation */ + +static inline bool can_inc_bucket_gen(struct bucket *b) +{ + return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX && + bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX; +} + +bool bch_bucket_add_unused(struct cache *ca, struct bucket *b) +{ + BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b)); + + if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] && + CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) + return false; + + b->prio = 0; + + if (can_inc_bucket_gen(b) && + fifo_push(&ca->unused, b - ca->buckets)) { + atomic_inc(&b->pin); + return true; + } + + return false; +} + +static bool can_invalidate_bucket(struct cache *ca, struct bucket *b) +{ + return GC_MARK(b) == GC_MARK_RECLAIMABLE && + !atomic_read(&b->pin) && + can_inc_bucket_gen(b); +} + +static void invalidate_one_bucket(struct cache *ca, struct bucket *b) +{ + bch_inc_gen(ca, b); + b->prio = INITIAL_PRIO; + atomic_inc(&b->pin); + fifo_push(&ca->free_inc, b - ca->buckets); +} + +static void invalidate_buckets_lru(struct cache *ca) +{ + unsigned bucket_prio(struct bucket *b) + { + return ((unsigned) (b->prio - ca->set->min_prio)) * + GC_SECTORS_USED(b); + } + + bool bucket_max_cmp(struct bucket *l, struct bucket *r) + { + return bucket_prio(l) < bucket_prio(r); + } + + bool bucket_min_cmp(struct bucket *l, struct bucket *r) + { + return bucket_prio(l) > bucket_prio(r); + } + + struct bucket *b; + ssize_t i; + + ca->heap.used = 0; + + for_each_bucket(b, ca) { + if (!can_invalidate_bucket(ca, b)) + continue; + + if (!GC_SECTORS_USED(b)) { + if (!bch_bucket_add_unused(ca, b)) + return; + } else { + if (!heap_full(&ca->heap)) + heap_add(&ca->heap, b, bucket_max_cmp); + else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { + ca->heap.data[0] = b; + heap_sift(&ca->heap, 0, bucket_max_cmp); + } + } + } + + if (ca->heap.used * 2 < ca->heap.size) + bch_queue_gc(ca->set); + + for (i = ca->heap.used / 2 - 1; i >= 0; --i) + heap_sift(&ca->heap, i, bucket_min_cmp); + + while (!fifo_full(&ca->free_inc)) { + if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { + /* We don't want to be calling invalidate_buckets() + * multiple times when it can't do anything + */ + ca->invalidate_needs_gc = 1; + bch_queue_gc(ca->set); + return; + } + + invalidate_one_bucket(ca, b); + } +} + +static void invalidate_buckets_fifo(struct cache *ca) +{ + struct bucket *b; + size_t checked = 0; + + while (!fifo_full(&ca->free_inc)) { + if (ca->fifo_last_bucket < ca->sb.first_bucket || + ca->fifo_last_bucket >= ca->sb.nbuckets) + ca->fifo_last_bucket = ca->sb.first_bucket; + + b = ca->buckets + ca->fifo_last_bucket++; + + if (can_invalidate_bucket(ca, b)) + invalidate_one_bucket(ca, b); + + if (++checked >= ca->sb.nbuckets) { + ca->invalidate_needs_gc = 1; + bch_queue_gc(ca->set); + return; + } + } +} + +static void invalidate_buckets_random(struct cache *ca) +{ + struct bucket *b; + size_t checked = 0; + + while (!fifo_full(&ca->free_inc)) { + size_t n; + get_random_bytes(&n, sizeof(n)); + + n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket); + n += ca->sb.first_bucket; + + b = ca->buckets + n; + + if (can_invalidate_bucket(ca, b)) + invalidate_one_bucket(ca, b); + + if (++checked >= ca->sb.nbuckets / 2) { + ca->invalidate_needs_gc = 1; + bch_queue_gc(ca->set); + return; + } + } +} + +static void invalidate_buckets(struct cache *ca) +{ + if (ca->invalidate_needs_gc) + return; + + switch (CACHE_REPLACEMENT(&ca->sb)) { + case CACHE_REPLACEMENT_LRU: + invalidate_buckets_lru(ca); + break; + case CACHE_REPLACEMENT_FIFO: + invalidate_buckets_fifo(ca); + break; + case CACHE_REPLACEMENT_RANDOM: + invalidate_buckets_random(ca); + break; + } +} + +#define allocator_wait(ca, cond) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + while (!(cond)) { \ + prepare_to_wait(&ca->set->alloc_wait, \ + &__wait, TASK_INTERRUPTIBLE); \ + \ + mutex_unlock(&(ca)->set->bucket_lock); \ + if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ + finish_wait(&ca->set->alloc_wait, &__wait); \ + closure_return(cl); \ + } \ + \ + schedule(); \ + __set_current_state(TASK_RUNNING); \ + mutex_lock(&(ca)->set->bucket_lock); \ + } \ + \ + finish_wait(&ca->set->alloc_wait, &__wait); \ +} while (0) + +void bch_allocator_thread(struct closure *cl) +{ + struct cache *ca = container_of(cl, struct cache, alloc); + + mutex_lock(&ca->set->bucket_lock); + + while (1) { + while (1) { + long bucket; + + if ((!atomic_read(&ca->set->prio_blocked) || + !CACHE_SYNC(&ca->set->sb)) && + !fifo_empty(&ca->unused)) + fifo_pop(&ca->unused, bucket); + else if (!fifo_empty(&ca->free_inc)) + fifo_pop(&ca->free_inc, bucket); + else + break; + + allocator_wait(ca, (int) fifo_free(&ca->free) > + atomic_read(&ca->discards_in_flight)); + + if (ca->discard) { + allocator_wait(ca, !list_empty(&ca->discards)); + do_discard(ca, bucket); + } else { + fifo_push(&ca->free, bucket); + closure_wake_up(&ca->set->bucket_wait); + } + } + + allocator_wait(ca, ca->set->gc_mark_valid); + invalidate_buckets(ca); + + allocator_wait(ca, !atomic_read(&ca->set->prio_blocked) || + !CACHE_SYNC(&ca->set->sb)); + + if (CACHE_SYNC(&ca->set->sb) && + (!fifo_empty(&ca->free_inc) || + ca->need_save_prio > 64)) { + bch_prio_write(ca); + } + } +} + +long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) +{ + long r = -1; +again: + wake_up(&ca->set->alloc_wait); + + if (fifo_used(&ca->free) > ca->watermark[watermark] && + fifo_pop(&ca->free, r)) { + struct bucket *b = ca->buckets + r; +#ifdef CONFIG_BCACHE_EDEBUG + size_t iter; + long i; + + for (iter = 0; iter < prio_buckets(ca) * 2; iter++) + BUG_ON(ca->prio_buckets[iter] == (uint64_t) r); + + fifo_for_each(i, &ca->free, iter) + BUG_ON(i == r); + fifo_for_each(i, &ca->free_inc, iter) + BUG_ON(i == r); + fifo_for_each(i, &ca->unused, iter) + BUG_ON(i == r); +#endif + BUG_ON(atomic_read(&b->pin) != 1); + + SET_GC_SECTORS_USED(b, ca->sb.bucket_size); + + if (watermark <= WATERMARK_METADATA) { + SET_GC_MARK(b, GC_MARK_METADATA); + b->prio = BTREE_PRIO; + } else { + SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + b->prio = INITIAL_PRIO; + } + + return r; + } + + pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu", + atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free), + fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + + if (cl) { + closure_wait(&ca->set->bucket_wait, cl); + + if (closure_blocking(cl)) { + mutex_unlock(&ca->set->bucket_lock); + closure_sync(cl); + mutex_lock(&ca->set->bucket_lock); + goto again; + } + } + + return -1; +} + +void bch_bucket_free(struct cache_set *c, struct bkey *k) +{ + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) { + struct bucket *b = PTR_BUCKET(c, k, i); + + SET_GC_MARK(b, 0); + SET_GC_SECTORS_USED(b, 0); + bch_bucket_add_unused(PTR_CACHE(c, k, i), b); + } +} + +int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, + struct bkey *k, int n, struct closure *cl) +{ + int i; + + lockdep_assert_held(&c->bucket_lock); + BUG_ON(!n || n > c->caches_loaded || n > 8); + + bkey_init(k); + + /* sort by free space/prio of oldest data in caches */ + + for (i = 0; i < n; i++) { + struct cache *ca = c->cache_by_alloc[i]; + long b = bch_bucket_alloc(ca, watermark, cl); + + if (b == -1) + goto err; + + k->ptr[i] = PTR(ca->buckets[b].gen, + bucket_to_sector(c, b), + ca->sb.nr_this_dev); + + SET_KEY_PTRS(k, i + 1); + } + + return 0; +err: + bch_bucket_free(c, k); + __bkey_put(c, k); + return -1; +} + +int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, + struct bkey *k, int n, struct closure *cl) +{ + int ret; + mutex_lock(&c->bucket_lock); + ret = __bch_bucket_alloc_set(c, watermark, k, n, cl); + mutex_unlock(&c->bucket_lock); + return ret; +} + +/* Init */ + +void bch_cache_allocator_exit(struct cache *ca) +{ + struct discard *d; + + while (!list_empty(&ca->discards)) { + d = list_first_entry(&ca->discards, struct discard, list); + cancel_work_sync(&d->work); + list_del(&d->list); + kfree(d); + } +} + +int bch_cache_allocator_init(struct cache *ca) +{ + unsigned i; + + /* + * Reserve: + * Prio/gen writes first + * Then 8 for btree allocations + * Then half for the moving garbage collector + */ + + ca->watermark[WATERMARK_PRIO] = 0; + + ca->watermark[WATERMARK_METADATA] = prio_buckets(ca); + + ca->watermark[WATERMARK_MOVINGGC] = 8 + + ca->watermark[WATERMARK_METADATA]; + + ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + + ca->watermark[WATERMARK_MOVINGGC]; + + for (i = 0; i < MAX_IN_FLIGHT_DISCARDS; i++) { + struct discard *d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->ca = ca; + INIT_WORK(&d->work, discard_finish); + list_add(&d->list, &ca->discards); + } + + return 0; +} diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h new file mode 100644 index 000000000000..d01a553f63f3 --- /dev/null +++ b/drivers/md/bcache/bcache.h @@ -0,0 +1,1232 @@ +#ifndef _BCACHE_H +#define _BCACHE_H + +/* + * SOME HIGH LEVEL CODE DOCUMENTATION: + * + * Bcache mostly works with cache sets, cache devices, and backing devices. + * + * Support for multiple cache devices hasn't quite been finished off yet, but + * it's about 95% plumbed through. A cache set and its cache devices is sort of + * like a md raid array and its component devices. Most of the code doesn't care + * about individual cache devices, the main abstraction is the cache set. + * + * Multiple cache devices is intended to give us the ability to mirror dirty + * cached data and metadata, without mirroring clean cached data. + * + * Backing devices are different, in that they have a lifetime independent of a + * cache set. When you register a newly formatted backing device it'll come up + * in passthrough mode, and then you can attach and detach a backing device from + * a cache set at runtime - while it's mounted and in use. Detaching implicitly + * invalidates any cached data for that backing device. + * + * A cache set can have multiple (many) backing devices attached to it. + * + * There's also flash only volumes - this is the reason for the distinction + * between struct cached_dev and struct bcache_device. A flash only volume + * works much like a bcache device that has a backing device, except the + * "cached" data is always dirty. The end result is that we get thin + * provisioning with very little additional code. + * + * Flash only volumes work but they're not production ready because the moving + * garbage collector needs more work. More on that later. + * + * BUCKETS/ALLOCATION: + * + * Bcache is primarily designed for caching, which means that in normal + * operation all of our available space will be allocated. Thus, we need an + * efficient way of deleting things from the cache so we can write new things to + * it. + * + * To do this, we first divide the cache device up into buckets. A bucket is the + * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+ + * works efficiently. + * + * Each bucket has a 16 bit priority, and an 8 bit generation associated with + * it. The gens and priorities for all the buckets are stored contiguously and + * packed on disk (in a linked list of buckets - aside from the superblock, all + * of bcache's metadata is stored in buckets). + * + * The priority is used to implement an LRU. We reset a bucket's priority when + * we allocate it or on cache it, and every so often we decrement the priority + * of each bucket. It could be used to implement something more sophisticated, + * if anyone ever gets around to it. + * + * The generation is used for invalidating buckets. Each pointer also has an 8 + * bit generation embedded in it; for a pointer to be considered valid, its gen + * must match the gen of the bucket it points into. Thus, to reuse a bucket all + * we have to do is increment its gen (and write its new gen to disk; we batch + * this up). + * + * Bcache is entirely COW - we never write twice to a bucket, even buckets that + * contain metadata (including btree nodes). + * + * THE BTREE: + * + * Bcache is in large part design around the btree. + * + * At a high level, the btree is just an index of key -> ptr tuples. + * + * Keys represent extents, and thus have a size field. Keys also have a variable + * number of pointers attached to them (potentially zero, which is handy for + * invalidating the cache). + * + * The key itself is an inode:offset pair. The inode number corresponds to a + * backing device or a flash only volume. The offset is the ending offset of the + * extent within the inode - not the starting offset; this makes lookups + * slightly more convenient. + * + * Pointers contain the cache device id, the offset on that device, and an 8 bit + * generation number. More on the gen later. + * + * Index lookups are not fully abstracted - cache lookups in particular are + * still somewhat mixed in with the btree code, but things are headed in that + * direction. + * + * Updates are fairly well abstracted, though. There are two different ways of + * updating the btree; insert and replace. + * + * BTREE_INSERT will just take a list of keys and insert them into the btree - + * overwriting (possibly only partially) any extents they overlap with. This is + * used to update the index after a write. + * + * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is + * overwriting a key that matches another given key. This is used for inserting + * data into the cache after a cache miss, and for background writeback, and for + * the moving garbage collector. + * + * There is no "delete" operation; deleting things from the index is + * accomplished by either by invalidating pointers (by incrementing a bucket's + * gen) or by inserting a key with 0 pointers - which will overwrite anything + * previously present at that location in the index. + * + * This means that there are always stale/invalid keys in the btree. They're + * filtered out by the code that iterates through a btree node, and removed when + * a btree node is rewritten. + * + * BTREE NODES: + * + * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and + * free smaller than a bucket - so, that's how big our btree nodes are. + * + * (If buckets are really big we'll only use part of the bucket for a btree node + * - no less than 1/4th - but a bucket still contains no more than a single + * btree node. I'd actually like to change this, but for now we rely on the + * bucket's gen for deleting btree nodes when we rewrite/split a node.) + * + * Anyways, btree nodes are big - big enough to be inefficient with a textbook + * btree implementation. + * + * The way this is solved is that btree nodes are internally log structured; we + * can append new keys to an existing btree node without rewriting it. This + * means each set of keys we write is sorted, but the node is not. + * + * We maintain this log structure in memory - keeping 1Mb of keys sorted would + * be expensive, and we have to distinguish between the keys we have written and + * the keys we haven't. So to do a lookup in a btree node, we have to search + * each sorted set. But we do merge written sets together lazily, so the cost of + * these extra searches is quite low (normally most of the keys in a btree node + * will be in one big set, and then there'll be one or two sets that are much + * smaller). + * + * This log structure makes bcache's btree more of a hybrid between a + * conventional btree and a compacting data structure, with some of the + * advantages of both. + * + * GARBAGE COLLECTION: + * + * We can't just invalidate any bucket - it might contain dirty data or + * metadata. If it once contained dirty data, other writes might overwrite it + * later, leaving no valid pointers into that bucket in the index. + * + * Thus, the primary purpose of garbage collection is to find buckets to reuse. + * It also counts how much valid data it each bucket currently contains, so that + * allocation can reuse buckets sooner when they've been mostly overwritten. + * + * It also does some things that are really internal to the btree + * implementation. If a btree node contains pointers that are stale by more than + * some threshold, it rewrites the btree node to avoid the bucket's generation + * wrapping around. It also merges adjacent btree nodes if they're empty enough. + * + * THE JOURNAL: + * + * Bcache's journal is not necessary for consistency; we always strictly + * order metadata writes so that the btree and everything else is consistent on + * disk in the event of an unclean shutdown, and in fact bcache had writeback + * caching (with recovery from unclean shutdown) before journalling was + * implemented. + * + * Rather, the journal is purely a performance optimization; we can't complete a + * write until we've updated the index on disk, otherwise the cache would be + * inconsistent in the event of an unclean shutdown. This means that without the + * journal, on random write workloads we constantly have to update all the leaf + * nodes in the btree, and those writes will be mostly empty (appending at most + * a few keys each) - highly inefficient in terms of amount of metadata writes, + * and it puts more strain on the various btree resorting/compacting code. + * + * The journal is just a log of keys we've inserted; on startup we just reinsert + * all the keys in the open journal entries. That means that when we're updating + * a node in the btree, we can wait until a 4k block of keys fills up before + * writing them out. + * + * For simplicity, we only journal updates to leaf nodes; updates to parent + * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth + * the complexity to deal with journalling them (in particular, journal replay) + * - updates to non leaf nodes just happen synchronously (see btree_split()). + */ + +#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "closure.h" + +struct bucket { + atomic_t pin; + uint16_t prio; + uint8_t gen; + uint8_t disk_gen; + uint8_t last_gc; /* Most out of date gen in the btree */ + uint8_t gc_gen; + uint16_t gc_mark; +}; + +/* + * I'd use bitfields for these, but I don't trust the compiler not to screw me + * as multiple threads touch struct bucket without locking + */ + +BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); +#define GC_MARK_RECLAIMABLE 0 +#define GC_MARK_DIRTY 1 +#define GC_MARK_METADATA 2 +BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); + +struct bkey { + uint64_t high; + uint64_t low; + uint64_t ptr[]; +}; + +/* Enough for a key with 6 pointers */ +#define BKEY_PAD 8 + +#define BKEY_PADDED(key) \ + union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; } + +/* Version 1: Backing device + * Version 2: Seed pointer into btree node checksum + * Version 3: New UUID format + */ +#define BCACHE_SB_VERSION 3 + +#define SB_SECTOR 8 +#define SB_SIZE 4096 +#define SB_LABEL_SIZE 32 +#define SB_JOURNAL_BUCKETS 256U +/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ +#define MAX_CACHES_PER_SET 8 + +#define BDEV_DATA_START 16 /* sectors */ + +struct cache_sb { + uint64_t csum; + uint64_t offset; /* sector where this sb was written */ + uint64_t version; +#define CACHE_BACKING_DEV 1 + + uint8_t magic[16]; + + uint8_t uuid[16]; + union { + uint8_t set_uuid[16]; + uint64_t set_magic; + }; + uint8_t label[SB_LABEL_SIZE]; + + uint64_t flags; + uint64_t seq; + uint64_t pad[8]; + + uint64_t nbuckets; /* device size */ + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ + + uint16_t nr_in_set; + uint16_t nr_this_dev; + + uint32_t last_mount; /* time_t */ + + uint16_t first_bucket; + union { + uint16_t njournal_buckets; + uint16_t keys; + }; + uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */ +}; + +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +#define CACHE_REPLACEMENT_LRU 0U +#define CACHE_REPLACEMENT_FIFO 1U +#define CACHE_REPLACEMENT_RANDOM 2U + +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +#define CACHE_MODE_WRITETHROUGH 0U +#define CACHE_MODE_WRITEBACK 1U +#define CACHE_MODE_WRITEAROUND 2U +#define CACHE_MODE_NONE 3U +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U + +/* Version 1: Seed pointer into btree node checksum + */ +#define BCACHE_BSET_VERSION 1 + +/* + * This is the on disk format for btree nodes - a btree node on disk is a list + * of these; within each set the keys are sorted + */ +struct bset { + uint64_t csum; + uint64_t magic; + uint64_t seq; + uint32_t version; + uint32_t keys; + + union { + struct bkey start[0]; + uint64_t d[0]; + }; +}; + +/* + * On disk format for priorities and gens - see super.c near prio_write() for + * more. + */ +struct prio_set { + uint64_t csum; + uint64_t magic; + uint64_t seq; + uint32_t version; + uint32_t pad; + + uint64_t next_bucket; + + struct bucket_disk { + uint16_t prio; + uint8_t gen; + } __attribute((packed)) data[]; +}; + +struct uuid_entry { + union { + struct { + uint8_t uuid[16]; + uint8_t label[32]; + uint32_t first_reg; + uint32_t last_reg; + uint32_t invalidated; + + uint32_t flags; + /* Size of flash only volumes */ + uint64_t sectors; + }; + + uint8_t pad[128]; + }; +}; + +BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); + +#include "journal.h" +#include "stats.h" +struct search; +struct btree; +struct keybuf; + +struct keybuf_key { + struct rb_node node; + BKEY_PADDED(key); + void *private; +}; + +typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); + +struct keybuf { + keybuf_pred_fn *key_predicate; + + struct bkey last_scanned; + spinlock_t lock; + + /* + * Beginning and end of range in rb tree - so that we can skip taking + * lock and checking the rb tree when we need to check for overlapping + * keys. + */ + struct bkey start; + struct bkey end; + + struct rb_root keys; + +#define KEYBUF_NR 100 + DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); +}; + +struct bio_split_pool { + struct bio_set *bio_split; + mempool_t *bio_split_hook; +}; + +struct bio_split_hook { + struct closure cl; + struct bio_split_pool *p; + struct bio *bio; + bio_end_io_t *bi_end_io; + void *bi_private; +}; + +struct bcache_device { + struct closure cl; + + struct kobject kobj; + + struct cache_set *c; + unsigned id; +#define BCACHEDEVNAME_SIZE 12 + char name[BCACHEDEVNAME_SIZE]; + + struct gendisk *disk; + + /* If nonzero, we're closing */ + atomic_t closing; + + /* If nonzero, we're detaching/unregistering from cache set */ + atomic_t detaching; + + atomic_long_t sectors_dirty; + unsigned long sectors_dirty_gc; + unsigned long sectors_dirty_last; + long sectors_dirty_derivative; + + mempool_t *unaligned_bvec; + struct bio_set *bio_split; + + unsigned data_csum:1; + + int (*cache_miss)(struct btree *, struct search *, + struct bio *, unsigned); + int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long); + + struct bio_split_pool bio_split_hook; +}; + +struct io { + /* Used to track sequential IO so it can be skipped */ + struct hlist_node hash; + struct list_head lru; + + unsigned long jiffies; + unsigned sequential; + sector_t last; +}; + +struct cached_dev { + struct list_head list; + struct bcache_device disk; + struct block_device *bdev; + + struct cache_sb sb; + struct bio sb_bio; + struct bio_vec sb_bv[1]; + struct closure_with_waitlist sb_write; + + /* Refcount on the cache set. Always nonzero when we're caching. */ + atomic_t count; + struct work_struct detach; + + /* + * Device might not be running if it's dirty and the cache set hasn't + * showed up yet. + */ + atomic_t running; + + /* + * Writes take a shared lock from start to finish; scanning for dirty + * data to refill the rb tree requires an exclusive lock. + */ + struct rw_semaphore writeback_lock; + + /* + * Nonzero, and writeback has a refcount (d->count), iff there is dirty + * data in the cache. Protected by writeback_lock; must have an + * shared lock to set and exclusive lock to clear. + */ + atomic_t has_dirty; + + struct ratelimit writeback_rate; + struct delayed_work writeback_rate_update; + + /* + * Internal to the writeback code, so read_dirty() can keep track of + * where it's at. + */ + sector_t last_read; + + /* Number of writeback bios in flight */ + atomic_t in_flight; + struct closure_with_timer writeback; + struct closure_waitlist writeback_wait; + + struct keybuf writeback_keys; + + /* For tracking sequential IO */ +#define RECENT_IO_BITS 7 +#define RECENT_IO (1 << RECENT_IO_BITS) + struct io io[RECENT_IO]; + struct hlist_head io_hash[RECENT_IO + 1]; + struct list_head io_lru; + spinlock_t io_lock; + + struct cache_accounting accounting; + + /* The rest of this all shows up in sysfs */ + unsigned sequential_cutoff; + unsigned readahead; + + unsigned sequential_merge:1; + unsigned verify:1; + + unsigned writeback_metadata:1; + unsigned writeback_running:1; + unsigned char writeback_percent; + unsigned writeback_delay; + + int writeback_rate_change; + int64_t writeback_rate_derivative; + uint64_t writeback_rate_target; + + unsigned writeback_rate_update_seconds; + unsigned writeback_rate_d_term; + unsigned writeback_rate_p_term_inverse; + unsigned writeback_rate_d_smooth; +}; + +enum alloc_watermarks { + WATERMARK_PRIO, + WATERMARK_METADATA, + WATERMARK_MOVINGGC, + WATERMARK_NONE, + WATERMARK_MAX +}; + +struct cache { + struct cache_set *set; + struct cache_sb sb; + struct bio sb_bio; + struct bio_vec sb_bv[1]; + + struct kobject kobj; + struct block_device *bdev; + + unsigned watermark[WATERMARK_MAX]; + + struct closure alloc; + struct workqueue_struct *alloc_workqueue; + + struct closure prio; + struct prio_set *disk_buckets; + + /* + * When allocating new buckets, prio_write() gets first dibs - since we + * may not be allocate at all without writing priorities and gens. + * prio_buckets[] contains the last buckets we wrote priorities to (so + * gc can mark them as metadata), prio_next[] contains the buckets + * allocated for the next prio write. + */ + uint64_t *prio_buckets; + uint64_t *prio_last_buckets; + + /* + * free: Buckets that are ready to be used + * + * free_inc: Incoming buckets - these are buckets that currently have + * cached data in them, and we can't reuse them until after we write + * their new gen to disk. After prio_write() finishes writing the new + * gens/prios, they'll be moved to the free list (and possibly discarded + * in the process) + * + * unused: GC found nothing pointing into these buckets (possibly + * because all the data they contained was overwritten), so we only + * need to discard them before they can be moved to the free list. + */ + DECLARE_FIFO(long, free); + DECLARE_FIFO(long, free_inc); + DECLARE_FIFO(long, unused); + + size_t fifo_last_bucket; + + /* Allocation stuff: */ + struct bucket *buckets; + + DECLARE_HEAP(struct bucket *, heap); + + /* + * max(gen - disk_gen) for all buckets. When it gets too big we have to + * call prio_write() to keep gens from wrapping. + */ + uint8_t need_save_prio; + unsigned gc_move_threshold; + + /* + * If nonzero, we know we aren't going to find any buckets to invalidate + * until a gc finishes - otherwise we could pointlessly burn a ton of + * cpu + */ + unsigned invalidate_needs_gc:1; + + bool discard; /* Get rid of? */ + + /* + * We preallocate structs for issuing discards to buckets, and keep them + * on this list when they're not in use; do_discard() issues discards + * whenever there's work to do and is called by free_some_buckets() and + * when a discard finishes. + */ + atomic_t discards_in_flight; + struct list_head discards; + + struct journal_device journal; + + /* The rest of this all shows up in sysfs */ +#define IO_ERROR_SHIFT 20 + atomic_t io_errors; + atomic_t io_count; + + atomic_long_t meta_sectors_written; + atomic_long_t btree_sectors_written; + atomic_long_t sectors_written; + + struct bio_split_pool bio_split_hook; +}; + +struct gc_stat { + size_t nodes; + size_t key_bytes; + + size_t nkeys; + uint64_t data; /* sectors */ + uint64_t dirty; /* sectors */ + unsigned in_use; /* percent */ +}; + +/* + * Flag bits, for how the cache set is shutting down, and what phase it's at: + * + * CACHE_SET_UNREGISTERING means we're not just shutting down, we're detaching + * all the backing devices first (their cached data gets invalidated, and they + * won't automatically reattach). + * + * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; + * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. + * flushing dirty data). + * + * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down the + * allocation thread. + */ +#define CACHE_SET_UNREGISTERING 0 +#define CACHE_SET_STOPPING 1 +#define CACHE_SET_STOPPING_2 2 + +struct cache_set { + struct closure cl; + + struct list_head list; + struct kobject kobj; + struct kobject internal; + struct dentry *debug; + struct cache_accounting accounting; + + unsigned long flags; + + struct cache_sb sb; + + struct cache *cache[MAX_CACHES_PER_SET]; + struct cache *cache_by_alloc[MAX_CACHES_PER_SET]; + int caches_loaded; + + struct bcache_device **devices; + struct list_head cached_devs; + uint64_t cached_dev_sectors; + struct closure caching; + + struct closure_with_waitlist sb_write; + + mempool_t *search; + mempool_t *bio_meta; + struct bio_set *bio_split; + + /* For the btree cache */ + struct shrinker shrink; + + /* For the allocator itself */ + wait_queue_head_t alloc_wait; + + /* For the btree cache and anything allocation related */ + struct mutex bucket_lock; + + /* log2(bucket_size), in sectors */ + unsigned short bucket_bits; + + /* log2(block_size), in sectors */ + unsigned short block_bits; + + /* + * Default number of pages for a new btree node - may be less than a + * full bucket + */ + unsigned btree_pages; + + /* + * Lists of struct btrees; lru is the list for structs that have memory + * allocated for actual btree node, freed is for structs that do not. + * + * We never free a struct btree, except on shutdown - we just put it on + * the btree_cache_freed list and reuse it later. This simplifies the + * code, and it doesn't cost us much memory as the memory usage is + * dominated by buffers that hold the actual btree node data and those + * can be freed - and the number of struct btrees allocated is + * effectively bounded. + * + * btree_cache_freeable effectively is a small cache - we use it because + * high order page allocations can be rather expensive, and it's quite + * common to delete and allocate btree nodes in quick succession. It + * should never grow past ~2-3 nodes in practice. + */ + struct list_head btree_cache; + struct list_head btree_cache_freeable; + struct list_head btree_cache_freed; + + /* Number of elements in btree_cache + btree_cache_freeable lists */ + unsigned bucket_cache_used; + + /* + * If we need to allocate memory for a new btree node and that + * allocation fails, we can cannibalize another node in the btree cache + * to satisfy the allocation. However, only one thread can be doing this + * at a time, for obvious reasons - try_harder and try_wait are + * basically a lock for this that we can wait on asynchronously. The + * btree_root() macro releases the lock when it returns. + */ + struct closure *try_harder; + struct closure_waitlist try_wait; + uint64_t try_harder_start; + + /* + * When we free a btree node, we increment the gen of the bucket the + * node is in - but we can't rewrite the prios and gens until we + * finished whatever it is we were doing, otherwise after a crash the + * btree node would be freed but for say a split, we might not have the + * pointers to the new nodes inserted into the btree yet. + * + * This is a refcount that blocks prio_write() until the new keys are + * written. + */ + atomic_t prio_blocked; + struct closure_waitlist bucket_wait; + + /* + * For any bio we don't skip we subtract the number of sectors from + * rescale; when it hits 0 we rescale all the bucket priorities. + */ + atomic_t rescale; + /* + * When we invalidate buckets, we use both the priority and the amount + * of good data to determine which buckets to reuse first - to weight + * those together consistently we keep track of the smallest nonzero + * priority of any bucket. + */ + uint16_t min_prio; + + /* + * max(gen - gc_gen) for all buckets. When it gets too big we have to gc + * to keep gens from wrapping around. + */ + uint8_t need_gc; + struct gc_stat gc_stats; + size_t nbuckets; + + struct closure_with_waitlist gc; + /* Where in the btree gc currently is */ + struct bkey gc_done; + + /* + * The allocation code needs gc_mark in struct bucket to be correct, but + * it's not while a gc is in progress. Protected by bucket_lock. + */ + int gc_mark_valid; + + /* Counts how many sectors bio_insert has added to the cache */ + atomic_t sectors_to_gc; + + struct closure moving_gc; + struct closure_waitlist moving_gc_wait; + struct keybuf moving_gc_keys; + /* Number of moving GC bios in flight */ + atomic_t in_flight; + + struct btree *root; + +#ifdef CONFIG_BCACHE_DEBUG + struct btree *verify_data; + struct mutex verify_lock; +#endif + + unsigned nr_uuids; + struct uuid_entry *uuids; + BKEY_PADDED(uuid_bucket); + struct closure_with_waitlist uuid_write; + + /* + * A btree node on disk could have too many bsets for an iterator to fit + * on the stack - this is a single element mempool for btree_read_work() + */ + struct mutex fill_lock; + struct btree_iter *fill_iter; + + /* + * btree_sort() is a merge sort and requires temporary space - single + * element mempool + */ + struct mutex sort_lock; + struct bset *sort; + + /* List of buckets we're currently writing data to */ + struct list_head data_buckets; + spinlock_t data_bucket_lock; + + struct journal journal; + +#define CONGESTED_MAX 1024 + unsigned congested_last_us; + atomic_t congested; + + /* The rest of this all shows up in sysfs */ + unsigned congested_read_threshold_us; + unsigned congested_write_threshold_us; + + spinlock_t sort_time_lock; + struct time_stats sort_time; + struct time_stats btree_gc_time; + struct time_stats btree_split_time; + spinlock_t btree_read_time_lock; + struct time_stats btree_read_time; + struct time_stats try_harder_time; + + atomic_long_t cache_read_races; + atomic_long_t writeback_keys_done; + atomic_long_t writeback_keys_failed; + unsigned error_limit; + unsigned error_decay; + unsigned short journal_delay_ms; + unsigned verify:1; + unsigned key_merging_disabled:1; + unsigned gc_always_rewrite:1; + unsigned shrinker_disabled:1; + unsigned copy_gc_enabled:1; + +#define BUCKET_HASH_BITS 12 + struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; +}; + +static inline bool key_merging_disabled(struct cache_set *c) +{ +#ifdef CONFIG_BCACHE_DEBUG + return c->key_merging_disabled; +#else + return 0; +#endif +} + +struct bbio { + unsigned submit_time_us; + union { + struct bkey key; + uint64_t _pad[3]; + /* + * We only need pad = 3 here because we only ever carry around a + * single pointer - i.e. the pointer we're doing io to/from. + */ + }; + struct bio bio; +}; + +static inline unsigned local_clock_us(void) +{ + return local_clock() >> 10; +} + +#define MAX_BSETS 4U + +#define BTREE_PRIO USHRT_MAX +#define INITIAL_PRIO 32768 + +#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE) +#define btree_blocks(b) \ + ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits)) + +#define btree_default_blocks(c) \ + ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) + +#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS) +#define bucket_bytes(c) ((c)->sb.bucket_size << 9) +#define block_bytes(c) ((c)->sb.block_size << 9) + +#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) +#define set_bytes(i) __set_bytes(i, i->keys) + +#define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c)) +#define set_blocks(i, c) __set_blocks(i, (i)->keys, c) + +#define node(i, j) ((struct bkey *) ((i)->d + (j))) +#define end(i) node(i, (i)->keys) + +#define index(i, b) \ + ((size_t) (((void *) i - (void *) (b)->sets[0].data) / \ + block_bytes(b->c))) + +#define btree_data_space(b) (PAGE_SIZE << (b)->page_order) + +#define prios_per_bucket(c) \ + ((bucket_bytes(c) - sizeof(struct prio_set)) / \ + sizeof(struct bucket_disk)) +#define prio_buckets(c) \ + DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) + +#define JSET_MAGIC 0x245235c1a3625032ULL +#define PSET_MAGIC 0x6750e15f87337f91ULL +#define BSET_MAGIC 0x90135c78b99e07f5ULL + +#define jset_magic(c) ((c)->sb.set_magic ^ JSET_MAGIC) +#define pset_magic(c) ((c)->sb.set_magic ^ PSET_MAGIC) +#define bset_magic(c) ((c)->sb.set_magic ^ BSET_MAGIC) + +/* Bkey fields: all units are in sectors */ + +#define KEY_FIELD(name, field, offset, size) \ + BITMASK(name, struct bkey, field, offset, size) + +#define PTR_FIELD(name, offset, size) \ + static inline uint64_t name(const struct bkey *k, unsigned i) \ + { return (k->ptr[i] >> offset) & ~(((uint64_t) ~0) << size); } \ + \ + static inline void SET_##name(struct bkey *k, unsigned i, uint64_t v)\ + { \ + k->ptr[i] &= ~(~((uint64_t) ~0 << size) << offset); \ + k->ptr[i] |= v << offset; \ + } + +KEY_FIELD(KEY_PTRS, high, 60, 3) +KEY_FIELD(HEADER_SIZE, high, 58, 2) +KEY_FIELD(KEY_CSUM, high, 56, 2) +KEY_FIELD(KEY_PINNED, high, 55, 1) +KEY_FIELD(KEY_DIRTY, high, 36, 1) + +KEY_FIELD(KEY_SIZE, high, 20, 16) +KEY_FIELD(KEY_INODE, high, 0, 20) + +/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ + +static inline uint64_t KEY_OFFSET(const struct bkey *k) +{ + return k->low; +} + +static inline void SET_KEY_OFFSET(struct bkey *k, uint64_t v) +{ + k->low = v; +} + +PTR_FIELD(PTR_DEV, 51, 12) +PTR_FIELD(PTR_OFFSET, 8, 43) +PTR_FIELD(PTR_GEN, 0, 8) + +#define PTR_CHECK_DEV ((1 << 12) - 1) + +#define PTR(gen, offset, dev) \ + ((((uint64_t) dev) << 51) | ((uint64_t) offset) << 8 | gen) + +static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) +{ + return s >> c->bucket_bits; +} + +static inline sector_t bucket_to_sector(struct cache_set *c, size_t b) +{ + return ((sector_t) b) << c->bucket_bits; +} + +static inline sector_t bucket_remainder(struct cache_set *c, sector_t s) +{ + return s & (c->sb.bucket_size - 1); +} + +static inline struct cache *PTR_CACHE(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + return c->cache[PTR_DEV(k, ptr)]; +} + +static inline size_t PTR_BUCKET_NR(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + return sector_to_bucket(c, PTR_OFFSET(k, ptr)); +} + +static inline struct bucket *PTR_BUCKET(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr); +} + +/* Btree key macros */ + +/* + * The high bit being set is a relic from when we used it to do binary + * searches - it told you where a key started. It's not used anymore, + * and can probably be safely dropped. + */ +#define KEY(dev, sector, len) (struct bkey) \ +{ \ + .high = (1ULL << 63) | ((uint64_t) (len) << 20) | (dev), \ + .low = (sector) \ +} + +static inline void bkey_init(struct bkey *k) +{ + *k = KEY(0, 0, 0); +} + +#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) +#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) +#define MAX_KEY KEY(~(~0 << 20), ((uint64_t) ~0) >> 1, 0) +#define ZERO_KEY KEY(0, 0, 0) + +/* + * This is used for various on disk data structures - cache_sb, prio_set, bset, + * jset: The checksum is _always_ the first 8 bytes of these structs + */ +#define csum_set(i) \ + crc64(((void *) (i)) + sizeof(uint64_t), \ + ((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t))) + +/* Error handling macros */ + +#define btree_bug(b, ...) \ +do { \ + if (bch_cache_set_error((b)->c, __VA_ARGS__)) \ + dump_stack(); \ +} while (0) + +#define cache_bug(c, ...) \ +do { \ + if (bch_cache_set_error(c, __VA_ARGS__)) \ + dump_stack(); \ +} while (0) + +#define btree_bug_on(cond, b, ...) \ +do { \ + if (cond) \ + btree_bug(b, __VA_ARGS__); \ +} while (0) + +#define cache_bug_on(cond, c, ...) \ +do { \ + if (cond) \ + cache_bug(c, __VA_ARGS__); \ +} while (0) + +#define cache_set_err_on(cond, c, ...) \ +do { \ + if (cond) \ + bch_cache_set_error(c, __VA_ARGS__); \ +} while (0) + +/* Looping macros */ + +#define for_each_cache(ca, cs, iter) \ + for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++) + +#define for_each_bucket(b, ca) \ + for (b = (ca)->buckets + (ca)->sb.first_bucket; \ + b < (ca)->buckets + (ca)->sb.nbuckets; b++) + +static inline void __bkey_put(struct cache_set *c, struct bkey *k) +{ + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) + atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); +} + +/* Blktrace macros */ + +#define blktrace_msg(c, fmt, ...) \ +do { \ + struct request_queue *q = bdev_get_queue(c->bdev); \ + if (q) \ + blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \ +} while (0) + +#define blktrace_msg_all(s, fmt, ...) \ +do { \ + struct cache *_c; \ + unsigned i; \ + for_each_cache(_c, (s), i) \ + blktrace_msg(_c, fmt, ##__VA_ARGS__); \ +} while (0) + +static inline void cached_dev_put(struct cached_dev *dc) +{ + if (atomic_dec_and_test(&dc->count)) + schedule_work(&dc->detach); +} + +static inline bool cached_dev_get(struct cached_dev *dc) +{ + if (!atomic_inc_not_zero(&dc->count)) + return false; + + /* Paired with the mb in cached_dev_attach */ + smp_mb__after_atomic_inc(); + return true; +} + +/* + * bucket_gc_gen() returns the difference between the bucket's current gen and + * the oldest gen of any pointer into that bucket in the btree (last_gc). + * + * bucket_disk_gen() returns the difference between the current gen and the gen + * on disk; they're both used to make sure gens don't wrap around. + */ + +static inline uint8_t bucket_gc_gen(struct bucket *b) +{ + return b->gen - b->last_gc; +} + +static inline uint8_t bucket_disk_gen(struct bucket *b) +{ + return b->gen - b->disk_gen; +} + +#define BUCKET_GC_GEN_MAX 96U +#define BUCKET_DISK_GEN_MAX 64U + +#define kobj_attribute_write(n, fn) \ + static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn) + +#define kobj_attribute_rw(n, show, store) \ + static struct kobj_attribute ksysfs_##n = \ + __ATTR(n, S_IWUSR|S_IRUSR, show, store) + +/* Forward declarations */ + +void bch_writeback_queue(struct cached_dev *); +void bch_writeback_add(struct cached_dev *, unsigned); + +void bch_count_io_errors(struct cache *, int, const char *); +void bch_bbio_count_io_errors(struct cache_set *, struct bio *, + int, const char *); +void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *); +void bch_bbio_free(struct bio *, struct cache_set *); +struct bio *bch_bbio_alloc(struct cache_set *); + +struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *); +void bch_generic_make_request(struct bio *, struct bio_split_pool *); +void __bch_submit_bbio(struct bio *, struct cache_set *); +void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); + +uint8_t bch_inc_gen(struct cache *, struct bucket *); +void bch_rescale_priorities(struct cache_set *, int); +bool bch_bucket_add_unused(struct cache *, struct bucket *); +void bch_allocator_thread(struct closure *); + +long bch_bucket_alloc(struct cache *, unsigned, struct closure *); +void bch_bucket_free(struct cache_set *, struct bkey *); + +int __bch_bucket_alloc_set(struct cache_set *, unsigned, + struct bkey *, int, struct closure *); +int bch_bucket_alloc_set(struct cache_set *, unsigned, + struct bkey *, int, struct closure *); + +__printf(2, 3) +bool bch_cache_set_error(struct cache_set *, const char *, ...); + +void bch_prio_write(struct cache *); +void bch_write_bdev_super(struct cached_dev *, struct closure *); + +extern struct workqueue_struct *bcache_wq, *bch_gc_wq; +extern const char * const bch_cache_modes[]; +extern struct mutex bch_register_lock; +extern struct list_head bch_cache_sets; + +extern struct kobj_type bch_cached_dev_ktype; +extern struct kobj_type bch_flash_dev_ktype; +extern struct kobj_type bch_cache_set_ktype; +extern struct kobj_type bch_cache_set_internal_ktype; +extern struct kobj_type bch_cache_ktype; + +void bch_cached_dev_release(struct kobject *); +void bch_flash_dev_release(struct kobject *); +void bch_cache_set_release(struct kobject *); +void bch_cache_release(struct kobject *); + +int bch_uuid_write(struct cache_set *); +void bcache_write_super(struct cache_set *); + +int bch_flash_dev_create(struct cache_set *c, uint64_t size); + +int bch_cached_dev_attach(struct cached_dev *, struct cache_set *); +void bch_cached_dev_detach(struct cached_dev *); +void bch_cached_dev_run(struct cached_dev *); +void bcache_device_stop(struct bcache_device *); + +void bch_cache_set_unregister(struct cache_set *); +void bch_cache_set_stop(struct cache_set *); + +struct cache_set *bch_cache_set_alloc(struct cache_sb *); +void bch_btree_cache_free(struct cache_set *); +int bch_btree_cache_alloc(struct cache_set *); +void bch_writeback_init_cached_dev(struct cached_dev *); +void bch_moving_init_cache_set(struct cache_set *); + +void bch_cache_allocator_exit(struct cache *ca); +int bch_cache_allocator_init(struct cache *ca); + +void bch_debug_exit(void); +int bch_debug_init(struct kobject *); +void bch_writeback_exit(void); +int bch_writeback_init(void); +void bch_request_exit(void); +int bch_request_init(void); +void bch_btree_exit(void); +int bch_btree_init(void); + +#endif /* _BCACHE_H */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c new file mode 100644 index 000000000000..bb0f7ae14b3c --- /dev/null +++ b/drivers/md/bcache/bset.c @@ -0,0 +1,1190 @@ +/* + * Code for working with individual keys, and sorted sets of keys with in a + * btree node + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" + +#include + +/* Keylists */ + +void bch_keylist_copy(struct keylist *dest, struct keylist *src) +{ + *dest = *src; + + if (src->list == src->d) { + size_t n = (uint64_t *) src->top - src->d; + dest->top = (struct bkey *) &dest->d[n]; + dest->list = dest->d; + } +} + +int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) +{ + unsigned oldsize = (uint64_t *) l->top - l->list; + unsigned newsize = oldsize + 2 + nptrs; + uint64_t *new; + + /* The journalling code doesn't handle the case where the keys to insert + * is bigger than an empty write: If we just return -ENOMEM here, + * bio_insert() and bio_invalidate() will insert the keys created so far + * and finish the rest when the keylist is empty. + */ + if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset)) + return -ENOMEM; + + newsize = roundup_pow_of_two(newsize); + + if (newsize <= KEYLIST_INLINE || + roundup_pow_of_two(oldsize) == newsize) + return 0; + + new = krealloc(l->list == l->d ? NULL : l->list, + sizeof(uint64_t) * newsize, GFP_NOIO); + + if (!new) + return -ENOMEM; + + if (l->list == l->d) + memcpy(new, l->list, sizeof(uint64_t) * KEYLIST_INLINE); + + l->list = new; + l->top = (struct bkey *) (&l->list[oldsize]); + + return 0; +} + +struct bkey *bch_keylist_pop(struct keylist *l) +{ + struct bkey *k = l->bottom; + + if (k == l->top) + return NULL; + + while (bkey_next(k) != l->top) + k = bkey_next(k); + + return l->top = k; +} + +/* Pointer validation */ + +bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) +{ + unsigned i; + + if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) + goto bad; + + if (!level && KEY_SIZE(k) > KEY_OFFSET(k)) + goto bad; + + if (!KEY_SIZE(k)) + return true; + + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(c, k, i)) { + struct cache *ca = PTR_CACHE(c, k, i); + size_t bucket = PTR_BUCKET_NR(c, k, i); + size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); + + if (KEY_SIZE(k) + r > c->sb.bucket_size || + bucket < ca->sb.first_bucket || + bucket >= ca->sb.nbuckets) + goto bad; + } + + return false; +bad: + cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k)); + return true; +} + +bool bch_ptr_bad(struct btree *b, const struct bkey *k) +{ + struct bucket *g; + unsigned i, stale; + + if (!bkey_cmp(k, &ZERO_KEY) || + !KEY_PTRS(k) || + bch_ptr_invalid(b, k)) + return true; + + if (KEY_PTRS(k) && PTR_DEV(k, 0) == PTR_CHECK_DEV) + return true; + + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(b->c, k, i)) { + g = PTR_BUCKET(b->c, k, i); + stale = ptr_stale(b->c, k, i); + + btree_bug_on(stale > 96, b, + "key too stale: %i, need_gc %u", + stale, b->c->need_gc); + + btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), + b, "stale dirty pointer"); + + if (stale) + return true; + +#ifdef CONFIG_BCACHE_EDEBUG + if (!mutex_trylock(&b->c->bucket_lock)) + continue; + + if (b->level) { + if (KEY_DIRTY(k) || + g->prio != BTREE_PRIO || + (b->c->gc_mark_valid && + GC_MARK(g) != GC_MARK_METADATA)) + goto bug; + + } else { + if (g->prio == BTREE_PRIO) + goto bug; + + if (KEY_DIRTY(k) && + b->c->gc_mark_valid && + GC_MARK(g) != GC_MARK_DIRTY) + goto bug; + } + mutex_unlock(&b->c->bucket_lock); +#endif + } + + return false; +#ifdef CONFIG_BCACHE_EDEBUG +bug: + mutex_unlock(&b->c->bucket_lock); + btree_bug(b, "inconsistent pointer %s: bucket %li pin %i " + "prio %i gen %i last_gc %i mark %llu gc_gen %i", pkey(k), + PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + return true; +#endif +} + +/* Key/pointer manipulation */ + +void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, + unsigned i) +{ + BUG_ON(i > KEY_PTRS(src)); + + /* Only copy the header, key, and one pointer. */ + memcpy(dest, src, 2 * sizeof(uint64_t)); + dest->ptr[0] = src->ptr[i]; + SET_KEY_PTRS(dest, 1); + /* We didn't copy the checksum so clear that bit. */ + SET_KEY_CSUM(dest, 0); +} + +bool __bch_cut_front(const struct bkey *where, struct bkey *k) +{ + unsigned i, len = 0; + + if (bkey_cmp(where, &START_KEY(k)) <= 0) + return false; + + if (bkey_cmp(where, k) < 0) + len = KEY_OFFSET(k) - KEY_OFFSET(where); + else + bkey_copy_key(k, where); + + for (i = 0; i < KEY_PTRS(k); i++) + SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + KEY_SIZE(k) - len); + + BUG_ON(len > KEY_SIZE(k)); + SET_KEY_SIZE(k, len); + return true; +} + +bool __bch_cut_back(const struct bkey *where, struct bkey *k) +{ + unsigned len = 0; + + if (bkey_cmp(where, k) >= 0) + return false; + + BUG_ON(KEY_INODE(where) != KEY_INODE(k)); + + if (bkey_cmp(where, &START_KEY(k)) > 0) + len = KEY_OFFSET(where) - KEY_START(k); + + bkey_copy_key(k, where); + + BUG_ON(len > KEY_SIZE(k)); + SET_KEY_SIZE(k, len); + return true; +} + +static uint64_t merge_chksums(struct bkey *l, struct bkey *r) +{ + return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & + ~((uint64_t)1 << 63); +} + +/* Tries to merge l and r: l should be lower than r + * Returns true if we were able to merge. If we did merge, l will be the merged + * key, r will be untouched. + */ +bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) +{ + unsigned i; + + if (key_merging_disabled(b->c)) + return false; + + if (KEY_PTRS(l) != KEY_PTRS(r) || + KEY_DIRTY(l) != KEY_DIRTY(r) || + bkey_cmp(l, &START_KEY(r))) + return false; + + for (i = 0; i < KEY_PTRS(l); i++) + if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) + return false; + + /* Keys with no pointers aren't restricted to one bucket and could + * overflow KEY_SIZE + */ + if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) { + SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l)); + SET_KEY_SIZE(l, USHRT_MAX); + + bch_cut_front(l, r); + return false; + } + + if (KEY_CSUM(l)) { + if (KEY_CSUM(r)) + l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); + else + SET_KEY_CSUM(l, 0); + } + + SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); + SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r)); + + return true; +} + +/* Binary tree stuff for auxiliary search trees */ + +static unsigned inorder_next(unsigned j, unsigned size) +{ + if (j * 2 + 1 < size) { + j = j * 2 + 1; + + while (j * 2 < size) + j *= 2; + } else + j >>= ffz(j) + 1; + + return j; +} + +static unsigned inorder_prev(unsigned j, unsigned size) +{ + if (j * 2 < size) { + j = j * 2; + + while (j * 2 + 1 < size) + j = j * 2 + 1; + } else + j >>= ffs(j); + + return j; +} + +/* I have no idea why this code works... and I'm the one who wrote it + * + * However, I do know what it does: + * Given a binary tree constructed in an array (i.e. how you normally implement + * a heap), it converts a node in the tree - referenced by array index - to the + * index it would have if you did an inorder traversal. + * + * Also tested for every j, size up to size somewhere around 6 million. + * + * The binary tree starts at array index 1, not 0 + * extra is a function of size: + * extra = (size - rounddown_pow_of_two(size - 1)) << 1; + */ +static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) +{ + unsigned b = fls(j); + unsigned shift = fls(size - 1) - b; + + j ^= 1U << (b - 1); + j <<= 1; + j |= 1; + j <<= shift; + + if (j > extra) + j -= (j - extra) >> 1; + + return j; +} + +static unsigned to_inorder(unsigned j, struct bset_tree *t) +{ + return __to_inorder(j, t->size, t->extra); +} + +static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) +{ + unsigned shift; + + if (j > extra) + j += j - extra; + + shift = ffs(j); + + j >>= shift; + j |= roundup_pow_of_two(size) >> shift; + + return j; +} + +static unsigned inorder_to_tree(unsigned j, struct bset_tree *t) +{ + return __inorder_to_tree(j, t->size, t->extra); +} + +#if 0 +void inorder_test(void) +{ + unsigned long done = 0; + ktime_t start = ktime_get(); + + for (unsigned size = 2; + size < 65536000; + size++) { + unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1; + unsigned i = 1, j = rounddown_pow_of_two(size - 1); + + if (!(size % 4096)) + printk(KERN_NOTICE "loop %u, %llu per us\n", size, + done / ktime_us_delta(ktime_get(), start)); + + while (1) { + if (__inorder_to_tree(i, size, extra) != j) + panic("size %10u j %10u i %10u", size, j, i); + + if (__to_inorder(j, size, extra) != i) + panic("size %10u j %10u i %10u", size, j, i); + + if (j == rounddown_pow_of_two(size) - 1) + break; + + BUG_ON(inorder_prev(inorder_next(j, size), size) != j); + + j = inorder_next(j, size); + i++; + } + + done += size - 1; + } +} +#endif + +/* + * Cacheline/offset <-> bkey pointer arithmatic: + * + * t->tree is a binary search tree in an array; each node corresponds to a key + * in one cacheline in t->set (BSET_CACHELINE bytes). + * + * This means we don't have to store the full index of the key that a node in + * the binary tree points to; to_inorder() gives us the cacheline, and then + * bkey_float->m gives us the offset within that cacheline, in units of 8 bytes. + * + * cacheline_to_bkey() and friends abstract out all the pointer arithmatic to + * make this work. + * + * To construct the bfloat for an arbitrary key we need to know what the key + * immediately preceding it is: we have to check if the two keys differ in the + * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size + * of the previous key so we can walk backwards to it from t->tree[j]'s key. + */ + +static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline, + unsigned offset) +{ + return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8; +} + +static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k) +{ + return ((void *) k - (void *) t->data) / BSET_CACHELINE; +} + +static unsigned bkey_to_cacheline_offset(struct bkey *k) +{ + return ((size_t) k & (BSET_CACHELINE - 1)) / sizeof(uint64_t); +} + +static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j) +{ + return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m); +} + +static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j) +{ + return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]); +} + +/* + * For the write set - the one we're currently inserting keys into - we don't + * maintain a full search tree, we just keep a simple lookup table in t->prev. + */ +static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline) +{ + return cacheline_to_bkey(t, cacheline, t->prev[cacheline]); +} + +static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) +{ +#ifdef CONFIG_X86_64 + asm("shrd %[shift],%[high],%[low]" + : [low] "+Rm" (low) + : [high] "R" (high), + [shift] "ci" (shift) + : "cc"); +#else + low >>= shift; + low |= (high << 1) << (63U - shift); +#endif + return low; +} + +static inline unsigned bfloat_mantissa(const struct bkey *k, + struct bkey_float *f) +{ + const uint64_t *p = &k->low - (f->exponent >> 6); + return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK; +} + +static void make_bfloat(struct bset_tree *t, unsigned j) +{ + struct bkey_float *f = &t->tree[j]; + struct bkey *m = tree_to_bkey(t, j); + struct bkey *p = tree_to_prev_bkey(t, j); + + struct bkey *l = is_power_of_2(j) + ? t->data->start + : tree_to_prev_bkey(t, j >> ffs(j)); + + struct bkey *r = is_power_of_2(j + 1) + ? node(t->data, t->data->keys - bkey_u64s(&t->end)) + : tree_to_bkey(t, j >> (ffz(j) + 1)); + + BUG_ON(m < l || m > r); + BUG_ON(bkey_next(p) != m); + + if (KEY_INODE(l) != KEY_INODE(r)) + f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64; + else + f->exponent = fls64(r->low ^ l->low); + + f->exponent = max_t(int, f->exponent - BKEY_MANTISSA_BITS, 0); + + /* + * Setting f->exponent = 127 flags this node as failed, and causes the + * lookup code to fall back to comparing against the original key. + */ + + if (bfloat_mantissa(m, f) != bfloat_mantissa(p, f)) + f->mantissa = bfloat_mantissa(m, f) - 1; + else + f->exponent = 127; +} + +static void bset_alloc_tree(struct btree *b, struct bset_tree *t) +{ + if (t != b->sets) { + unsigned j = roundup(t[-1].size, + 64 / sizeof(struct bkey_float)); + + t->tree = t[-1].tree + j; + t->prev = t[-1].prev + j; + } + + while (t < b->sets + MAX_BSETS) + t++->size = 0; +} + +static void bset_build_unwritten_tree(struct btree *b) +{ + struct bset_tree *t = b->sets + b->nsets; + + bset_alloc_tree(b, t); + + if (t->tree != b->sets->tree + bset_tree_space(b)) { + t->prev[0] = bkey_to_cacheline_offset(t->data->start); + t->size = 1; + } +} + +static void bset_build_written_tree(struct btree *b) +{ + struct bset_tree *t = b->sets + b->nsets; + struct bkey *k = t->data->start; + unsigned j, cacheline = 1; + + bset_alloc_tree(b, t); + + t->size = min_t(unsigned, + bkey_to_cacheline(t, end(t->data)), + b->sets->tree + bset_tree_space(b) - t->tree); + + if (t->size < 2) { + t->size = 0; + return; + } + + t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1; + + /* First we figure out where the first key in each cacheline is */ + for (j = inorder_next(0, t->size); + j; + j = inorder_next(j, t->size)) { + while (bkey_to_cacheline(t, k) != cacheline) + k = bkey_next(k); + + t->prev[j] = bkey_u64s(k); + k = bkey_next(k); + cacheline++; + t->tree[j].m = bkey_to_cacheline_offset(k); + } + + while (bkey_next(k) != end(t->data)) + k = bkey_next(k); + + t->end = *k; + + /* Then we build the tree */ + for (j = inorder_next(0, t->size); + j; + j = inorder_next(j, t->size)) + make_bfloat(t, j); +} + +void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) +{ + struct bset_tree *t; + unsigned inorder, j = 1; + + for (t = b->sets; t <= &b->sets[b->nsets]; t++) + if (k < end(t->data)) + goto found_set; + + BUG(); +found_set: + if (!t->size || !bset_written(b, t)) + return; + + inorder = bkey_to_cacheline(t, k); + + if (k == t->data->start) + goto fix_left; + + if (bkey_next(k) == end(t->data)) { + t->end = *k; + goto fix_right; + } + + j = inorder_to_tree(inorder, t); + + if (j && + j < t->size && + k == tree_to_bkey(t, j)) +fix_left: do { + make_bfloat(t, j); + j = j * 2; + } while (j < t->size); + + j = inorder_to_tree(inorder + 1, t); + + if (j && + j < t->size && + k == tree_to_prev_bkey(t, j)) +fix_right: do { + make_bfloat(t, j); + j = j * 2 + 1; + } while (j < t->size); +} + +void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) +{ + struct bset_tree *t = &b->sets[b->nsets]; + unsigned shift = bkey_u64s(k); + unsigned j = bkey_to_cacheline(t, k); + + /* We're getting called from btree_split() or btree_gc, just bail out */ + if (!t->size) + return; + + /* k is the key we just inserted; we need to find the entry in the + * lookup table for the first key that is strictly greater than k: + * it's either k's cacheline or the next one + */ + if (j < t->size && + table_to_bkey(t, j) <= k) + j++; + + /* Adjust all the lookup table entries, and find a new key for any that + * have gotten too big + */ + for (; j < t->size; j++) { + t->prev[j] += shift; + + if (t->prev[j] > 7) { + k = table_to_bkey(t, j - 1); + + while (k < cacheline_to_bkey(t, j, 0)) + k = bkey_next(k); + + t->prev[j] = bkey_to_cacheline_offset(k); + } + } + + if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) + return; + + /* Possibly add a new entry to the end of the lookup table */ + + for (k = table_to_bkey(t, t->size - 1); + k != end(t->data); + k = bkey_next(k)) + if (t->size == bkey_to_cacheline(t, k)) { + t->prev[t->size] = bkey_to_cacheline_offset(k); + t->size++; + } +} + +void bch_bset_init_next(struct btree *b) +{ + struct bset *i = write_block(b); + + if (i != b->sets[0].data) { + b->sets[++b->nsets].data = i; + i->seq = b->sets[0].data->seq; + } else + get_random_bytes(&i->seq, sizeof(uint64_t)); + + i->magic = bset_magic(b->c); + i->version = 0; + i->keys = 0; + + bset_build_unwritten_tree(b); +} + +struct bset_search_iter { + struct bkey *l, *r; +}; + +static struct bset_search_iter bset_search_write_set(struct btree *b, + struct bset_tree *t, + const struct bkey *search) +{ + unsigned li = 0, ri = t->size; + + BUG_ON(!b->nsets && + t->size < bkey_to_cacheline(t, end(t->data))); + + while (li + 1 != ri) { + unsigned m = (li + ri) >> 1; + + if (bkey_cmp(table_to_bkey(t, m), search) > 0) + ri = m; + else + li = m; + } + + return (struct bset_search_iter) { + table_to_bkey(t, li), + ri < t->size ? table_to_bkey(t, ri) : end(t->data) + }; +} + +static struct bset_search_iter bset_search_tree(struct btree *b, + struct bset_tree *t, + const struct bkey *search) +{ + struct bkey *l, *r; + struct bkey_float *f; + unsigned inorder, j, n = 1; + + do { + unsigned p = n << 4; + p &= ((int) (p - t->size)) >> 31; + + prefetch(&t->tree[p]); + + j = n; + f = &t->tree[j]; + + /* + * n = (f->mantissa > bfloat_mantissa()) + * ? j * 2 + * : j * 2 + 1; + * + * We need to subtract 1 from f->mantissa for the sign bit trick + * to work - that's done in make_bfloat() + */ + if (likely(f->exponent != 127)) + n = j * 2 + (((unsigned) + (f->mantissa - + bfloat_mantissa(search, f))) >> 31); + else + n = (bkey_cmp(tree_to_bkey(t, j), search) > 0) + ? j * 2 + : j * 2 + 1; + } while (n < t->size); + + inorder = to_inorder(j, t); + + /* + * n would have been the node we recursed to - the low bit tells us if + * we recursed left or recursed right. + */ + if (n & 1) { + l = cacheline_to_bkey(t, inorder, f->m); + + if (++inorder != t->size) { + f = &t->tree[inorder_next(j, t->size)]; + r = cacheline_to_bkey(t, inorder, f->m); + } else + r = end(t->data); + } else { + r = cacheline_to_bkey(t, inorder, f->m); + + if (--inorder) { + f = &t->tree[inorder_prev(j, t->size)]; + l = cacheline_to_bkey(t, inorder, f->m); + } else + l = t->data->start; + } + + return (struct bset_search_iter) {l, r}; +} + +struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, + const struct bkey *search) +{ + struct bset_search_iter i; + + /* + * First, we search for a cacheline, then lastly we do a linear search + * within that cacheline. + * + * To search for the cacheline, there's three different possibilities: + * * The set is too small to have a search tree, so we just do a linear + * search over the whole set. + * * The set is the one we're currently inserting into; keeping a full + * auxiliary search tree up to date would be too expensive, so we + * use a much simpler lookup table to do a binary search - + * bset_search_write_set(). + * * Or we use the auxiliary search tree we constructed earlier - + * bset_search_tree() + */ + + if (unlikely(!t->size)) { + i.l = t->data->start; + i.r = end(t->data); + } else if (bset_written(b, t)) { + /* + * Each node in the auxiliary search tree covers a certain range + * of bits, and keys above and below the set it covers might + * differ outside those bits - so we have to special case the + * start and end - handle that here: + */ + + if (unlikely(bkey_cmp(search, &t->end) >= 0)) + return end(t->data); + + if (unlikely(bkey_cmp(search, t->data->start) < 0)) + return t->data->start; + + i = bset_search_tree(b, t, search); + } else + i = bset_search_write_set(b, t, search); + +#ifdef CONFIG_BCACHE_EDEBUG + BUG_ON(bset_written(b, t) && + i.l != t->data->start && + bkey_cmp(tree_to_prev_bkey(t, + inorder_to_tree(bkey_to_cacheline(t, i.l), t)), + search) > 0); + + BUG_ON(i.r != end(t->data) && + bkey_cmp(i.r, search) <= 0); +#endif + + while (likely(i.l != i.r) && + bkey_cmp(i.l, search) <= 0) + i.l = bkey_next(i.l); + + return i.l; +} + +/* Btree iterator */ + +static inline bool btree_iter_cmp(struct btree_iter_set l, + struct btree_iter_set r) +{ + int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); + + return c ? c > 0 : l.k < r.k; +} + +static inline bool btree_iter_end(struct btree_iter *iter) +{ + return !iter->used; +} + +void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, + struct bkey *end) +{ + if (k != end) + BUG_ON(!heap_add(iter, + ((struct btree_iter_set) { k, end }), + btree_iter_cmp)); +} + +struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter, + struct bkey *search, struct bset_tree *start) +{ + struct bkey *ret = NULL; + iter->size = ARRAY_SIZE(iter->data); + iter->used = 0; + + for (; start <= &b->sets[b->nsets]; start++) { + ret = bch_bset_search(b, start, search); + bch_btree_iter_push(iter, ret, end(start->data)); + } + + return ret; +} + +struct bkey *bch_btree_iter_next(struct btree_iter *iter) +{ + struct btree_iter_set unused; + struct bkey *ret = NULL; + + if (!btree_iter_end(iter)) { + ret = iter->data->k; + iter->data->k = bkey_next(iter->data->k); + + if (iter->data->k > iter->data->end) { + __WARN(); + iter->data->k = iter->data->end; + } + + if (iter->data->k == iter->data->end) + heap_pop(iter, unused, btree_iter_cmp); + else + heap_sift(iter, 0, btree_iter_cmp); + } + + return ret; +} + +struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, + struct btree *b, ptr_filter_fn fn) +{ + struct bkey *ret; + + do { + ret = bch_btree_iter_next(iter); + } while (ret && fn(b, ret)); + + return ret; +} + +struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search) +{ + struct btree_iter iter; + + bch_btree_iter_init(b, &iter, search); + return bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); +} + +/* Mergesort */ + +static void btree_sort_fixup(struct btree_iter *iter) +{ + while (iter->used > 1) { + struct btree_iter_set *top = iter->data, *i = top + 1; + struct bkey *k; + + if (iter->used > 2 && + btree_iter_cmp(i[0], i[1])) + i++; + + for (k = i->k; + k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0; + k = bkey_next(k)) + if (top->k > i->k) + __bch_cut_front(top->k, k); + else if (KEY_SIZE(k)) + bch_cut_back(&START_KEY(k), top->k); + + if (top->k < i->k || k == i->k) + break; + + heap_sift(iter, i - top, btree_iter_cmp); + } +} + +static void btree_mergesort(struct btree *b, struct bset *out, + struct btree_iter *iter, + bool fixup, bool remove_stale) +{ + struct bkey *k, *last = NULL; + bool (*bad)(struct btree *, const struct bkey *) = remove_stale + ? bch_ptr_bad + : bch_ptr_invalid; + + while (!btree_iter_end(iter)) { + if (fixup && !b->level) + btree_sort_fixup(iter); + + k = bch_btree_iter_next(iter); + if (bad(b, k)) + continue; + + if (!last) { + last = out->start; + bkey_copy(last, k); + } else if (b->level || + !bch_bkey_try_merge(b, last, k)) { + last = bkey_next(last); + bkey_copy(last, k); + } + } + + out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; + + pr_debug("sorted %i keys", out->keys); + bch_check_key_order(b, out); +} + +static void __btree_sort(struct btree *b, struct btree_iter *iter, + unsigned start, unsigned order, bool fixup) +{ + uint64_t start_time; + bool remove_stale = !b->written; + struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, + order); + if (!out) { + mutex_lock(&b->c->sort_lock); + out = b->c->sort; + order = ilog2(bucket_pages(b->c)); + } + + start_time = local_clock(); + + btree_mergesort(b, out, iter, fixup, remove_stale); + b->nsets = start; + + if (!fixup && !start && b->written) + bch_btree_verify(b, out); + + if (!start && order == b->page_order) { + /* + * Our temporary buffer is the same size as the btree node's + * buffer, we can just swap buffers instead of doing a big + * memcpy() + */ + + out->magic = bset_magic(b->c); + out->seq = b->sets[0].data->seq; + out->version = b->sets[0].data->version; + swap(out, b->sets[0].data); + + if (b->c->sort == b->sets[0].data) + b->c->sort = out; + } else { + b->sets[start].data->keys = out->keys; + memcpy(b->sets[start].data->start, out->start, + (void *) end(out) - (void *) out->start); + } + + if (out == b->c->sort) + mutex_unlock(&b->c->sort_lock); + else + free_pages((unsigned long) out, order); + + if (b->written) + bset_build_written_tree(b); + + if (!start) { + spin_lock(&b->c->sort_time_lock); + time_stats_update(&b->c->sort_time, start_time); + spin_unlock(&b->c->sort_time_lock); + } +} + +void bch_btree_sort_partial(struct btree *b, unsigned start) +{ + size_t oldsize = 0, order = b->page_order, keys = 0; + struct btree_iter iter; + __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); + + BUG_ON(b->sets[b->nsets].data == write_block(b) && + (b->sets[b->nsets].size || b->nsets)); + + if (b->written) + oldsize = bch_count_data(b); + + if (start) { + unsigned i; + + for (i = start; i <= b->nsets; i++) + keys += b->sets[i].data->keys; + + order = roundup_pow_of_two(__set_bytes(b->sets->data, keys)) / PAGE_SIZE; + if (order) + order = ilog2(order); + } + + __btree_sort(b, &iter, start, order, false); + + EBUG_ON(b->written && bch_count_data(b) != oldsize); +} + +void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) +{ + BUG_ON(!b->written); + __btree_sort(b, iter, 0, b->page_order, true); +} + +void bch_btree_sort_into(struct btree *b, struct btree *new) +{ + uint64_t start_time = local_clock(); + + struct btree_iter iter; + bch_btree_iter_init(b, &iter, NULL); + + btree_mergesort(b, new->sets->data, &iter, false, true); + + spin_lock(&b->c->sort_time_lock); + time_stats_update(&b->c->sort_time, start_time); + spin_unlock(&b->c->sort_time_lock); + + bkey_copy_key(&new->key, &b->key); + new->sets->size = 0; +} + +void bch_btree_sort_lazy(struct btree *b) +{ + if (b->nsets) { + unsigned i, j, keys = 0, total; + + for (i = 0; i <= b->nsets; i++) + keys += b->sets[i].data->keys; + + total = keys; + + for (j = 0; j < b->nsets; j++) { + if (keys * 2 < total || + keys < 1000) { + bch_btree_sort_partial(b, j); + return; + } + + keys -= b->sets[j].data->keys; + } + + /* Must sort if b->nsets == 3 or we'll overflow */ + if (b->nsets >= (MAX_BSETS - 1) - b->level) { + bch_btree_sort(b); + return; + } + } + + bset_build_written_tree(b); +} + +/* Sysfs stuff */ + +struct bset_stats { + size_t nodes; + size_t sets_written, sets_unwritten; + size_t bytes_written, bytes_unwritten; + size_t floats, failed; +}; + +static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, + struct bset_stats *stats) +{ + struct bkey *k; + unsigned i; + + stats->nodes++; + + for (i = 0; i <= b->nsets; i++) { + struct bset_tree *t = &b->sets[i]; + size_t bytes = t->data->keys * sizeof(uint64_t); + size_t j; + + if (bset_written(b, t)) { + stats->sets_written++; + stats->bytes_written += bytes; + + stats->floats += t->size - 1; + + for (j = 1; j < t->size; j++) + if (t->tree[j].exponent == 127) + stats->failed++; + } else { + stats->sets_unwritten++; + stats->bytes_unwritten += bytes; + } + } + + if (b->level) { + struct btree_iter iter; + + for_each_key_filter(b, k, &iter, bch_ptr_bad) { + int ret = btree(bset_stats, k, b, op, stats); + if (ret) + return ret; + } + } + + return 0; +} + +int bch_bset_print_stats(struct cache_set *c, char *buf) +{ + struct btree_op op; + struct bset_stats t; + int ret; + + bch_btree_op_init_stack(&op); + memset(&t, 0, sizeof(struct bset_stats)); + + ret = btree_root(bset_stats, c, &op, &t); + if (ret) + return ret; + + return snprintf(buf, PAGE_SIZE, + "btree nodes: %zu\n" + "written sets: %zu\n" + "unwritten sets: %zu\n" + "written key bytes: %zu\n" + "unwritten key bytes: %zu\n" + "floats: %zu\n" + "failed: %zu\n", + t.nodes, + t.sets_written, t.sets_unwritten, + t.bytes_written, t.bytes_unwritten, + t.floats, t.failed); +} diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h new file mode 100644 index 000000000000..57a9cff41546 --- /dev/null +++ b/drivers/md/bcache/bset.h @@ -0,0 +1,379 @@ +#ifndef _BCACHE_BSET_H +#define _BCACHE_BSET_H + +/* + * BKEYS: + * + * A bkey contains a key, a size field, a variable number of pointers, and some + * ancillary flag bits. + * + * We use two different functions for validating bkeys, bch_ptr_invalid and + * bch_ptr_bad(). + * + * bch_ptr_invalid() primarily filters out keys and pointers that would be + * invalid due to some sort of bug, whereas bch_ptr_bad() filters out keys and + * pointer that occur in normal practice but don't point to real data. + * + * The one exception to the rule that ptr_invalid() filters out invalid keys is + * that it also filters out keys of size 0 - these are keys that have been + * completely overwritten. It'd be safe to delete these in memory while leaving + * them on disk, just unnecessary work - so we filter them out when resorting + * instead. + * + * We can't filter out stale keys when we're resorting, because garbage + * collection needs to find them to ensure bucket gens don't wrap around - + * unless we're rewriting the btree node those stale keys still exist on disk. + * + * We also implement functions here for removing some number of sectors from the + * front or the back of a bkey - this is mainly used for fixing overlapping + * extents, by removing the overlapping sectors from the older key. + * + * BSETS: + * + * A bset is an array of bkeys laid out contiguously in memory in sorted order, + * along with a header. A btree node is made up of a number of these, written at + * different times. + * + * There could be many of them on disk, but we never allow there to be more than + * 4 in memory - we lazily resort as needed. + * + * We implement code here for creating and maintaining auxiliary search trees + * (described below) for searching an individial bset, and on top of that we + * implement a btree iterator. + * + * BTREE ITERATOR: + * + * Most of the code in bcache doesn't care about an individual bset - it needs + * to search entire btree nodes and iterate over them in sorted order. + * + * The btree iterator code serves both functions; it iterates through the keys + * in a btree node in sorted order, starting from either keys after a specific + * point (if you pass it a search key) or the start of the btree node. + * + * AUXILIARY SEARCH TREES: + * + * Since keys are variable length, we can't use a binary search on a bset - we + * wouldn't be able to find the start of the next key. But binary searches are + * slow anyways, due to terrible cache behaviour; bcache originally used binary + * searches and that code topped out at under 50k lookups/second. + * + * So we need to construct some sort of lookup table. Since we only insert keys + * into the last (unwritten) set, most of the keys within a given btree node are + * usually in sets that are mostly constant. We use two different types of + * lookup tables to take advantage of this. + * + * Both lookup tables share in common that they don't index every key in the + * set; they index one key every BSET_CACHELINE bytes, and then a linear search + * is used for the rest. + * + * For sets that have been written to disk and are no longer being inserted + * into, we construct a binary search tree in an array - traversing a binary + * search tree in an array gives excellent locality of reference and is very + * fast, since both children of any node are adjacent to each other in memory + * (and their grandchildren, and great grandchildren...) - this means + * prefetching can be used to great effect. + * + * It's quite useful performance wise to keep these nodes small - not just + * because they're more likely to be in L2, but also because we can prefetch + * more nodes on a single cacheline and thus prefetch more iterations in advance + * when traversing this tree. + * + * Nodes in the auxiliary search tree must contain both a key to compare against + * (we don't want to fetch the key from the set, that would defeat the purpose), + * and a pointer to the key. We use a few tricks to compress both of these. + * + * To compress the pointer, we take advantage of the fact that one node in the + * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have + * a function (to_inorder()) that takes the index of a node in a binary tree and + * returns what its index would be in an inorder traversal, so we only have to + * store the low bits of the offset. + * + * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To + * compress that, we take advantage of the fact that when we're traversing the + * search tree at every iteration we know that both our search key and the key + * we're looking for lie within some range - bounded by our previous + * comparisons. (We special case the start of a search so that this is true even + * at the root of the tree). + * + * So we know the key we're looking for is between a and b, and a and b don't + * differ higher than bit 50, we don't need to check anything higher than bit + * 50. + * + * We don't usually need the rest of the bits, either; we only need enough bits + * to partition the key range we're currently checking. Consider key n - the + * key our auxiliary search tree node corresponds to, and key p, the key + * immediately preceding n. The lowest bit we need to store in the auxiliary + * search tree is the highest bit that differs between n and p. + * + * Note that this could be bit 0 - we might sometimes need all 80 bits to do the + * comparison. But we'd really like our nodes in the auxiliary search tree to be + * of fixed size. + * + * The solution is to make them fixed size, and when we're constructing a node + * check if p and n differed in the bits we needed them to. If they don't we + * flag that node, and when doing lookups we fallback to comparing against the + * real key. As long as this doesn't happen to often (and it seems to reliably + * happen a bit less than 1% of the time), we win - even on failures, that key + * is then more likely to be in cache than if we were doing binary searches all + * the way, since we're touching so much less memory. + * + * The keys in the auxiliary search tree are stored in (software) floating + * point, with an exponent and a mantissa. The exponent needs to be big enough + * to address all the bits in the original key, but the number of bits in the + * mantissa is somewhat arbitrary; more bits just gets us fewer failures. + * + * We need 7 bits for the exponent and 3 bits for the key's offset (since keys + * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes. + * We need one node per 128 bytes in the btree node, which means the auxiliary + * search trees take up 3% as much memory as the btree itself. + * + * Constructing these auxiliary search trees is moderately expensive, and we + * don't want to be constantly rebuilding the search tree for the last set + * whenever we insert another key into it. For the unwritten set, we use a much + * simpler lookup table - it's just a flat array, so index i in the lookup table + * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing + * within each byte range works the same as with the auxiliary search trees. + * + * These are much easier to keep up to date when we insert a key - we do it + * somewhat lazily; when we shift a key up we usually just increment the pointer + * to it, only when it would overflow do we go to the trouble of finding the + * first key in that range of bytes again. + */ + +/* Btree key comparison/iteration */ + +struct btree_iter { + size_t size, used; + struct btree_iter_set { + struct bkey *k, *end; + } data[MAX_BSETS]; +}; + +struct bset_tree { + /* + * We construct a binary tree in an array as if the array + * started at 1, so that things line up on the same cachelines + * better: see comments in bset.c at cacheline_to_bkey() for + * details + */ + + /* size of the binary tree and prev array */ + unsigned size; + + /* function of size - precalculated for to_inorder() */ + unsigned extra; + + /* copy of the last key in the set */ + struct bkey end; + struct bkey_float *tree; + + /* + * The nodes in the bset tree point to specific keys - this + * array holds the sizes of the previous key. + * + * Conceptually it's a member of struct bkey_float, but we want + * to keep bkey_float to 4 bytes and prev isn't used in the fast + * path. + */ + uint8_t *prev; + + /* The actual btree node, with pointers to each sorted set */ + struct bset *data; +}; + +static __always_inline int64_t bkey_cmp(const struct bkey *l, + const struct bkey *r) +{ + return unlikely(KEY_INODE(l) != KEY_INODE(r)) + ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) + : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); +} + +static inline size_t bkey_u64s(const struct bkey *k) +{ + BUG_ON(KEY_CSUM(k) > 1); + return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0); +} + +static inline size_t bkey_bytes(const struct bkey *k) +{ + return bkey_u64s(k) * sizeof(uint64_t); +} + +static inline void bkey_copy(struct bkey *dest, const struct bkey *src) +{ + memcpy(dest, src, bkey_bytes(src)); +} + +static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) +{ + if (!src) + src = &KEY(0, 0, 0); + + SET_KEY_INODE(dest, KEY_INODE(src)); + SET_KEY_OFFSET(dest, KEY_OFFSET(src)); +} + +static inline struct bkey *bkey_next(const struct bkey *k) +{ + uint64_t *d = (void *) k; + return (struct bkey *) (d + bkey_u64s(k)); +} + +/* Keylists */ + +struct keylist { + struct bkey *top; + union { + uint64_t *list; + struct bkey *bottom; + }; + + /* Enough room for btree_split's keys without realloc */ +#define KEYLIST_INLINE 16 + uint64_t d[KEYLIST_INLINE]; +}; + +static inline void bch_keylist_init(struct keylist *l) +{ + l->top = (void *) (l->list = l->d); +} + +static inline void bch_keylist_push(struct keylist *l) +{ + l->top = bkey_next(l->top); +} + +static inline void bch_keylist_add(struct keylist *l, struct bkey *k) +{ + bkey_copy(l->top, k); + bch_keylist_push(l); +} + +static inline bool bch_keylist_empty(struct keylist *l) +{ + return l->top == (void *) l->list; +} + +static inline void bch_keylist_free(struct keylist *l) +{ + if (l->list != l->d) + kfree(l->list); +} + +void bch_keylist_copy(struct keylist *, struct keylist *); +struct bkey *bch_keylist_pop(struct keylist *); +int bch_keylist_realloc(struct keylist *, int, struct cache_set *); + +void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, + unsigned); +bool __bch_cut_front(const struct bkey *, struct bkey *); +bool __bch_cut_back(const struct bkey *, struct bkey *); + +static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) +{ + BUG_ON(bkey_cmp(where, k) > 0); + return __bch_cut_front(where, k); +} + +static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) +{ + BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); + return __bch_cut_back(where, k); +} + +const char *bch_ptr_status(struct cache_set *, const struct bkey *); +bool __bch_ptr_invalid(struct cache_set *, int level, const struct bkey *); +bool bch_ptr_bad(struct btree *, const struct bkey *); + +static inline uint8_t gen_after(uint8_t a, uint8_t b) +{ + uint8_t r = a - b; + return r > 128U ? 0 : r; +} + +static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k, + unsigned i) +{ + return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i)); +} + +static inline bool ptr_available(struct cache_set *c, const struct bkey *k, + unsigned i) +{ + return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i); +} + + +typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); + +struct bkey *bch_next_recurse_key(struct btree *, struct bkey *); +struct bkey *bch_btree_iter_next(struct btree_iter *); +struct bkey *bch_btree_iter_next_filter(struct btree_iter *, + struct btree *, ptr_filter_fn); + +void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); +struct bkey *__bch_btree_iter_init(struct btree *, struct btree_iter *, + struct bkey *, struct bset_tree *); + +/* 32 bits total: */ +#define BKEY_MID_BITS 3 +#define BKEY_EXPONENT_BITS 7 +#define BKEY_MANTISSA_BITS 22 +#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) + +struct bkey_float { + unsigned exponent:BKEY_EXPONENT_BITS; + unsigned m:BKEY_MID_BITS; + unsigned mantissa:BKEY_MANTISSA_BITS; +} __packed; + +/* + * BSET_CACHELINE was originally intended to match the hardware cacheline size - + * it used to be 64, but I realized the lookup code would touch slightly less + * memory if it was 128. + * + * It definites the number of bytes (in struct bset) per struct bkey_float in + * the auxiliar search tree - when we're done searching the bset_float tree we + * have this many bytes left that we do a linear search over. + * + * Since (after level 5) every level of the bset_tree is on a new cacheline, + * we're touching one fewer cacheline in the bset tree in exchange for one more + * cacheline in the linear search - but the linear search might stop before it + * gets to the second cacheline. + */ + +#define BSET_CACHELINE 128 +#define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE) + +#define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float)) +#define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t)) + +void bch_bset_init_next(struct btree *); + +void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); +void bch_bset_fix_lookup_table(struct btree *, struct bkey *); + +struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, + const struct bkey *); + +static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, + const struct bkey *search) +{ + return search ? __bch_bset_search(b, t, search) : t->data->start; +} + +bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); +void bch_btree_sort_lazy(struct btree *); +void bch_btree_sort_into(struct btree *, struct btree *); +void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *); +void bch_btree_sort_partial(struct btree *, unsigned); + +static inline void bch_btree_sort(struct btree *b) +{ + bch_btree_sort_partial(b, 0); +} + +int bch_bset_print_stats(struct cache_set *, char *); + +#endif diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c new file mode 100644 index 000000000000..e7bc917ef0d7 --- /dev/null +++ b/drivers/md/bcache/btree.c @@ -0,0 +1,2503 @@ +/* + * Copyright (C) 2010 Kent Overstreet + * + * Uses a block device as cache for other block devices; optimized for SSDs. + * All allocation is done in buckets, which should match the erase block size + * of the device. + * + * Buckets containing cached data are kept on a heap sorted by priority; + * bucket priority is increased on cache hit, and periodically all the buckets + * on the heap have their priority scaled down. This currently is just used as + * an LRU but in the future should allow for more intelligent heuristics. + * + * Buckets have an 8 bit counter; freeing is accomplished by incrementing the + * counter. Garbage collection is used to remove stale pointers. + * + * Indexing is done via a btree; nodes are not necessarily fully sorted, rather + * as keys are inserted we only sort the pages that have not yet been written. + * When garbage collection is run, we resort the entire node. + * + * All configuration is done via sysfs; see Documentation/bcache.txt. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include +#include + +/* + * Todo: + * register_bcache: Return errors out to userspace correctly + * + * Writeback: don't undirty key until after a cache flush + * + * Create an iterator for key pointers + * + * On btree write error, mark bucket such that it won't be freed from the cache + * + * Journalling: + * Check for bad keys in replay + * Propagate barriers + * Refcount journal entries in journal_replay + * + * Garbage collection: + * Finish incremental gc + * Gc should free old UUIDs, data for invalid UUIDs + * + * Provide a way to list backing device UUIDs we have data cached for, and + * probably how long it's been since we've seen them, and a way to invalidate + * dirty data for devices that will never be attached again + * + * Keep 1 min/5 min/15 min statistics of how busy a block device has been, so + * that based on that and how much dirty data we have we can keep writeback + * from being starved + * + * Add a tracepoint or somesuch to watch for writeback starvation + * + * When btree depth > 1 and splitting an interior node, we have to make sure + * alloc_bucket() cannot fail. This should be true but is not completely + * obvious. + * + * Make sure all allocations get charged to the root cgroup + * + * Plugging? + * + * If data write is less than hard sector size of ssd, round up offset in open + * bucket to the next whole sector + * + * Also lookup by cgroup in get_open_bucket() + * + * Superblock needs to be fleshed out for multiple cache devices + * + * Add a sysfs tunable for the number of writeback IOs in flight + * + * Add a sysfs tunable for the number of open data buckets + * + * IO tracking: Can we track when one process is doing io on behalf of another? + * IO tracking: Don't use just an average, weigh more recent stuff higher + * + * Test module load/unload + */ + +static const char * const op_types[] = { + "insert", "replace" +}; + +static const char *op_type(struct btree_op *op) +{ + return op_types[op->type]; +} + +#define MAX_NEED_GC 64 +#define MAX_SAVE_PRIO 72 + +#define PTR_DIRTY_BIT (((uint64_t) 1 << 36)) + +#define PTR_HASH(c, k) \ + (((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0)) + +struct workqueue_struct *bch_gc_wq; +static struct workqueue_struct *btree_io_wq; + +void bch_btree_op_init_stack(struct btree_op *op) +{ + memset(op, 0, sizeof(struct btree_op)); + closure_init_stack(&op->cl); + op->lock = -1; + bch_keylist_init(&op->keys); +} + +/* Btree key manipulation */ + +static void bkey_put(struct cache_set *c, struct bkey *k, int level) +{ + if ((level && KEY_OFFSET(k)) || !level) + __bkey_put(c, k); +} + +/* Btree IO */ + +static uint64_t btree_csum_set(struct btree *b, struct bset *i) +{ + uint64_t crc = b->key.ptr[0]; + void *data = (void *) i + 8, *end = end(i); + + crc = crc64_update(crc, data, end - data); + return crc ^ 0xffffffffffffffff; +} + +static void btree_bio_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct btree *b = container_of(cl, struct btree, io.cl); + + if (error) + set_btree_node_io_error(b); + + bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE) + ? "writing btree" : "reading btree"); + closure_put(cl); +} + +static void btree_bio_init(struct btree *b) +{ + BUG_ON(b->bio); + b->bio = bch_bbio_alloc(b->c); + + b->bio->bi_end_io = btree_bio_endio; + b->bio->bi_private = &b->io.cl; +} + +void bch_btree_read_done(struct closure *cl) +{ + struct btree *b = container_of(cl, struct btree, io.cl); + struct bset *i = b->sets[0].data; + struct btree_iter *iter = b->c->fill_iter; + const char *err = "bad btree header"; + BUG_ON(b->nsets || b->written); + + bch_bbio_free(b->bio, b->c); + b->bio = NULL; + + mutex_lock(&b->c->fill_lock); + iter->used = 0; + + if (btree_node_io_error(b) || + !i->seq) + goto err; + + for (; + b->written < btree_blocks(b) && i->seq == b->sets[0].data->seq; + i = write_block(b)) { + err = "unsupported bset version"; + if (i->version > BCACHE_BSET_VERSION) + goto err; + + err = "bad btree header"; + if (b->written + set_blocks(i, b->c) > btree_blocks(b)) + goto err; + + err = "bad magic"; + if (i->magic != bset_magic(b->c)) + goto err; + + err = "bad checksum"; + switch (i->version) { + case 0: + if (i->csum != csum_set(i)) + goto err; + break; + case BCACHE_BSET_VERSION: + if (i->csum != btree_csum_set(b, i)) + goto err; + break; + } + + err = "empty set"; + if (i != b->sets[0].data && !i->keys) + goto err; + + bch_btree_iter_push(iter, i->start, end(i)); + + b->written += set_blocks(i, b->c); + } + + err = "corrupted btree"; + for (i = write_block(b); + index(i, b) < btree_blocks(b); + i = ((void *) i) + block_bytes(b->c)) + if (i->seq == b->sets[0].data->seq) + goto err; + + bch_btree_sort_and_fix_extents(b, iter); + + i = b->sets[0].data; + err = "short btree key"; + if (b->sets[0].size && + bkey_cmp(&b->key, &b->sets[0].end) < 0) + goto err; + + if (b->written < btree_blocks(b)) + bch_bset_init_next(b); +out: + + mutex_unlock(&b->c->fill_lock); + + spin_lock(&b->c->btree_read_time_lock); + time_stats_update(&b->c->btree_read_time, b->io_start_time); + spin_unlock(&b->c->btree_read_time_lock); + + smp_wmb(); /* read_done is our write lock */ + set_btree_node_read_done(b); + + closure_return(cl); +err: + set_btree_node_io_error(b); + bch_cache_set_error(b->c, "%s at bucket %lu, block %zu, %u keys", + err, PTR_BUCKET_NR(b->c, &b->key, 0), + index(i, b), i->keys); + goto out; +} + +void bch_btree_read(struct btree *b) +{ + BUG_ON(b->nsets || b->written); + + if (!closure_trylock(&b->io.cl, &b->c->cl)) + BUG(); + + b->io_start_time = local_clock(); + + btree_bio_init(b); + b->bio->bi_rw = REQ_META|READ_SYNC; + b->bio->bi_size = KEY_SIZE(&b->key) << 9; + + bio_map(b->bio, b->sets[0].data); + + pr_debug("%s", pbtree(b)); + trace_bcache_btree_read(b->bio); + bch_submit_bbio(b->bio, b->c, &b->key, 0); + + continue_at(&b->io.cl, bch_btree_read_done, system_wq); +} + +static void btree_complete_write(struct btree *b, struct btree_write *w) +{ + if (w->prio_blocked && + !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked)) + wake_up(&b->c->alloc_wait); + + if (w->journal) { + atomic_dec_bug(w->journal); + __closure_wake_up(&b->c->journal.wait); + } + + if (w->owner) + closure_put(w->owner); + + w->prio_blocked = 0; + w->journal = NULL; + w->owner = NULL; +} + +static void __btree_write_done(struct closure *cl) +{ + struct btree *b = container_of(cl, struct btree, io.cl); + struct btree_write *w = btree_prev_write(b); + + bch_bbio_free(b->bio, b->c); + b->bio = NULL; + btree_complete_write(b, w); + + if (btree_node_dirty(b)) + queue_delayed_work(btree_io_wq, &b->work, + msecs_to_jiffies(30000)); + + closure_return(cl); +} + +static void btree_write_done(struct closure *cl) +{ + struct btree *b = container_of(cl, struct btree, io.cl); + struct bio_vec *bv; + int n; + + __bio_for_each_segment(bv, b->bio, n, 0) + __free_page(bv->bv_page); + + __btree_write_done(cl); +} + +static void do_btree_write(struct btree *b) +{ + struct closure *cl = &b->io.cl; + struct bset *i = b->sets[b->nsets].data; + BKEY_PADDED(key) k; + + i->version = BCACHE_BSET_VERSION; + i->csum = btree_csum_set(b, i); + + btree_bio_init(b); + b->bio->bi_rw = REQ_META|WRITE_SYNC; + b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); + bio_map(b->bio, i); + + bkey_copy(&k.key, &b->key); + SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); + + if (!bio_alloc_pages(b->bio, GFP_NOIO)) { + int j; + struct bio_vec *bv; + void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); + + bio_for_each_segment(bv, b->bio, j) + memcpy(page_address(bv->bv_page), + base + j * PAGE_SIZE, PAGE_SIZE); + + trace_bcache_btree_write(b->bio); + bch_submit_bbio(b->bio, b->c, &k.key, 0); + + continue_at(cl, btree_write_done, NULL); + } else { + b->bio->bi_vcnt = 0; + bio_map(b->bio, i); + + trace_bcache_btree_write(b->bio); + bch_submit_bbio(b->bio, b->c, &k.key, 0); + + closure_sync(cl); + __btree_write_done(cl); + } +} + +static void __btree_write(struct btree *b) +{ + struct bset *i = b->sets[b->nsets].data; + + BUG_ON(current->bio_list); + + closure_lock(&b->io, &b->c->cl); + cancel_delayed_work(&b->work); + + clear_bit(BTREE_NODE_dirty, &b->flags); + change_bit(BTREE_NODE_write_idx, &b->flags); + + bch_check_key_order(b, i); + BUG_ON(b->written && !i->keys); + + do_btree_write(b); + + pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); + + b->written += set_blocks(i, b->c); + atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, + &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); + + bch_btree_sort_lazy(b); + + if (b->written < btree_blocks(b)) + bch_bset_init_next(b); +} + +static void btree_write_work(struct work_struct *w) +{ + struct btree *b = container_of(to_delayed_work(w), struct btree, work); + + down_write(&b->lock); + + if (btree_node_dirty(b)) + __btree_write(b); + up_write(&b->lock); +} + +void bch_btree_write(struct btree *b, bool now, struct btree_op *op) +{ + struct bset *i = b->sets[b->nsets].data; + struct btree_write *w = btree_current_write(b); + + BUG_ON(b->written && + (b->written >= btree_blocks(b) || + i->seq != b->sets[0].data->seq || + !i->keys)); + + if (!btree_node_dirty(b)) { + set_btree_node_dirty(b); + queue_delayed_work(btree_io_wq, &b->work, + msecs_to_jiffies(30000)); + } + + w->prio_blocked += b->prio_blocked; + b->prio_blocked = 0; + + if (op && op->journal && !b->level) { + if (w->journal && + journal_pin_cmp(b->c, w, op)) { + atomic_dec_bug(w->journal); + w->journal = NULL; + } + + if (!w->journal) { + w->journal = op->journal; + atomic_inc(w->journal); + } + } + + if (current->bio_list) + return; + + /* Force write if set is too big */ + if (now || + b->level || + set_bytes(i) > PAGE_SIZE - 48) { + if (op && now) { + /* Must wait on multiple writes */ + BUG_ON(w->owner); + w->owner = &op->cl; + closure_get(&op->cl); + } + + __btree_write(b); + } + BUG_ON(!b->written); +} + +/* + * Btree in memory cache - allocation/freeing + * mca -> memory cache + */ + +static void mca_reinit(struct btree *b) +{ + unsigned i; + + b->flags = 0; + b->written = 0; + b->nsets = 0; + + for (i = 0; i < MAX_BSETS; i++) + b->sets[i].size = 0; + /* + * Second loop starts at 1 because b->sets[0]->data is the memory we + * allocated + */ + for (i = 1; i < MAX_BSETS; i++) + b->sets[i].data = NULL; +} + +#define mca_reserve(c) (((c->root && c->root->level) \ + ? c->root->level : 1) * 8 + 16) +#define mca_can_free(c) \ + max_t(int, 0, c->bucket_cache_used - mca_reserve(c)) + +static void mca_data_free(struct btree *b) +{ + struct bset_tree *t = b->sets; + BUG_ON(!closure_is_unlocked(&b->io.cl)); + + if (bset_prev_bytes(b) < PAGE_SIZE) + kfree(t->prev); + else + free_pages((unsigned long) t->prev, + get_order(bset_prev_bytes(b))); + + if (bset_tree_bytes(b) < PAGE_SIZE) + kfree(t->tree); + else + free_pages((unsigned long) t->tree, + get_order(bset_tree_bytes(b))); + + free_pages((unsigned long) t->data, b->page_order); + + t->prev = NULL; + t->tree = NULL; + t->data = NULL; + list_move(&b->list, &b->c->btree_cache_freed); + b->c->bucket_cache_used--; +} + +static void mca_bucket_free(struct btree *b) +{ + BUG_ON(btree_node_dirty(b)); + + b->key.ptr[0] = 0; + hlist_del_init_rcu(&b->hash); + list_move(&b->list, &b->c->btree_cache_freeable); +} + +static unsigned btree_order(struct bkey *k) +{ + return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1); +} + +static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) +{ + struct bset_tree *t = b->sets; + BUG_ON(t->data); + + b->page_order = max_t(unsigned, + ilog2(b->c->btree_pages), + btree_order(k)); + + t->data = (void *) __get_free_pages(gfp, b->page_order); + if (!t->data) + goto err; + + t->tree = bset_tree_bytes(b) < PAGE_SIZE + ? kmalloc(bset_tree_bytes(b), gfp) + : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); + if (!t->tree) + goto err; + + t->prev = bset_prev_bytes(b) < PAGE_SIZE + ? kmalloc(bset_prev_bytes(b), gfp) + : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); + if (!t->prev) + goto err; + + list_move(&b->list, &b->c->btree_cache); + b->c->bucket_cache_used++; + return; +err: + mca_data_free(b); +} + +static struct btree *mca_bucket_alloc(struct cache_set *c, + struct bkey *k, gfp_t gfp) +{ + struct btree *b = kzalloc(sizeof(struct btree), gfp); + if (!b) + return NULL; + + init_rwsem(&b->lock); + lockdep_set_novalidate_class(&b->lock); + INIT_LIST_HEAD(&b->list); + INIT_DELAYED_WORK(&b->work, btree_write_work); + b->c = c; + closure_init_unlocked(&b->io); + + mca_data_alloc(b, k, gfp); + return b; +} + +static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) +{ + lockdep_assert_held(&b->c->bucket_lock); + + if (!down_write_trylock(&b->lock)) + return -ENOMEM; + + if (b->page_order < min_order) { + rw_unlock(true, b); + return -ENOMEM; + } + + BUG_ON(btree_node_dirty(b) && !b->sets[0].data); + + if (cl && btree_node_dirty(b)) + bch_btree_write(b, true, NULL); + + if (cl) + closure_wait_event_async(&b->io.wait, cl, + atomic_read(&b->io.cl.remaining) == -1); + + if (btree_node_dirty(b) || + !closure_is_unlocked(&b->io.cl) || + work_pending(&b->work.work)) { + rw_unlock(true, b); + return -EAGAIN; + } + + return 0; +} + +static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + struct cache_set *c = container_of(shrink, struct cache_set, shrink); + struct btree *b, *t; + unsigned long i, nr = sc->nr_to_scan; + + if (c->shrinker_disabled) + return 0; + + if (c->try_harder) + return 0; + + /* + * If nr == 0, we're supposed to return the number of items we have + * cached. Not allowed to return -1. + */ + if (!nr) + return mca_can_free(c) * c->btree_pages; + + /* Return -1 if we can't do anything right now */ + if (sc->gfp_mask & __GFP_WAIT) + mutex_lock(&c->bucket_lock); + else if (!mutex_trylock(&c->bucket_lock)) + return -1; + + nr /= c->btree_pages; + nr = min_t(unsigned long, nr, mca_can_free(c)); + + i = 0; + list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { + if (!nr) + break; + + if (++i > 3 && + !mca_reap(b, NULL, 0)) { + mca_data_free(b); + rw_unlock(true, b); + --nr; + } + } + + /* + * Can happen right when we first start up, before we've read in any + * btree nodes + */ + if (list_empty(&c->btree_cache)) + goto out; + + for (i = 0; nr && i < c->bucket_cache_used; i++) { + b = list_first_entry(&c->btree_cache, struct btree, list); + list_rotate_left(&c->btree_cache); + + if (!b->accessed && + !mca_reap(b, NULL, 0)) { + mca_bucket_free(b); + mca_data_free(b); + rw_unlock(true, b); + --nr; + } else + b->accessed = 0; + } +out: + nr = mca_can_free(c) * c->btree_pages; + mutex_unlock(&c->bucket_lock); + return nr; +} + +void bch_btree_cache_free(struct cache_set *c) +{ + struct btree *b; + struct closure cl; + closure_init_stack(&cl); + + if (c->shrink.list.next) + unregister_shrinker(&c->shrink); + + mutex_lock(&c->bucket_lock); + +#ifdef CONFIG_BCACHE_DEBUG + if (c->verify_data) + list_move(&c->verify_data->list, &c->btree_cache); +#endif + + list_splice(&c->btree_cache_freeable, + &c->btree_cache); + + while (!list_empty(&c->btree_cache)) { + b = list_first_entry(&c->btree_cache, struct btree, list); + + if (btree_node_dirty(b)) + btree_complete_write(b, btree_current_write(b)); + clear_bit(BTREE_NODE_dirty, &b->flags); + + mca_data_free(b); + } + + while (!list_empty(&c->btree_cache_freed)) { + b = list_first_entry(&c->btree_cache_freed, + struct btree, list); + list_del(&b->list); + cancel_delayed_work_sync(&b->work); + kfree(b); + } + + mutex_unlock(&c->bucket_lock); +} + +int bch_btree_cache_alloc(struct cache_set *c) +{ + unsigned i; + + /* XXX: doesn't check for errors */ + + closure_init_unlocked(&c->gc); + + for (i = 0; i < mca_reserve(c); i++) + mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); + + list_splice_init(&c->btree_cache, + &c->btree_cache_freeable); + +#ifdef CONFIG_BCACHE_DEBUG + mutex_init(&c->verify_lock); + + c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); + + if (c->verify_data && + c->verify_data->sets[0].data) + list_del_init(&c->verify_data->list); + else + c->verify_data = NULL; +#endif + + c->shrink.shrink = bch_mca_shrink; + c->shrink.seeks = 4; + c->shrink.batch = c->btree_pages * 2; + register_shrinker(&c->shrink); + + return 0; +} + +/* Btree in memory cache - hash table */ + +static struct hlist_head *mca_hash(struct cache_set *c, struct bkey *k) +{ + return &c->bucket_hash[hash_32(PTR_HASH(c, k), BUCKET_HASH_BITS)]; +} + +static struct btree *mca_find(struct cache_set *c, struct bkey *k) +{ + struct btree *b; + + rcu_read_lock(); + hlist_for_each_entry_rcu(b, mca_hash(c, k), hash) + if (PTR_HASH(c, &b->key) == PTR_HASH(c, k)) + goto out; + b = NULL; +out: + rcu_read_unlock(); + return b; +} + +static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, + int level, struct closure *cl) +{ + int ret = -ENOMEM; + struct btree *i; + + if (!cl) + return ERR_PTR(-ENOMEM); + + /* + * Trying to free up some memory - i.e. reuse some btree nodes - may + * require initiating IO to flush the dirty part of the node. If we're + * running under generic_make_request(), that IO will never finish and + * we would deadlock. Returning -EAGAIN causes the cache lookup code to + * punt to workqueue and retry. + */ + if (current->bio_list) + return ERR_PTR(-EAGAIN); + + if (c->try_harder && c->try_harder != cl) { + closure_wait_event_async(&c->try_wait, cl, !c->try_harder); + return ERR_PTR(-EAGAIN); + } + + /* XXX: tracepoint */ + c->try_harder = cl; + c->try_harder_start = local_clock(); +retry: + list_for_each_entry_reverse(i, &c->btree_cache, list) { + int r = mca_reap(i, cl, btree_order(k)); + if (!r) + return i; + if (r != -ENOMEM) + ret = r; + } + + if (ret == -EAGAIN && + closure_blocking(cl)) { + mutex_unlock(&c->bucket_lock); + closure_sync(cl); + mutex_lock(&c->bucket_lock); + goto retry; + } + + return ERR_PTR(ret); +} + +/* + * We can only have one thread cannibalizing other cached btree nodes at a time, + * or we'll deadlock. We use an open coded mutex to ensure that, which a + * cannibalize_bucket() will take. This means every time we unlock the root of + * the btree, we need to release this lock if we have it held. + */ +void bch_cannibalize_unlock(struct cache_set *c, struct closure *cl) +{ + if (c->try_harder == cl) { + time_stats_update(&c->try_harder_time, c->try_harder_start); + c->try_harder = NULL; + __closure_wake_up(&c->try_wait); + } +} + +static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, + int level, struct closure *cl) +{ + struct btree *b; + + lockdep_assert_held(&c->bucket_lock); + + if (mca_find(c, k)) + return NULL; + + /* btree_free() doesn't free memory; it sticks the node on the end of + * the list. Check if there's any freed nodes there: + */ + list_for_each_entry(b, &c->btree_cache_freeable, list) + if (!mca_reap(b, NULL, btree_order(k))) + goto out; + + /* We never free struct btree itself, just the memory that holds the on + * disk node. Check the freed list before allocating a new one: + */ + list_for_each_entry(b, &c->btree_cache_freed, list) + if (!mca_reap(b, NULL, 0)) { + mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO); + if (!b->sets[0].data) + goto err; + else + goto out; + } + + b = mca_bucket_alloc(c, k, __GFP_NOWARN|GFP_NOIO); + if (!b) + goto err; + + BUG_ON(!down_write_trylock(&b->lock)); + if (!b->sets->data) + goto err; +out: + BUG_ON(!closure_is_unlocked(&b->io.cl)); + + bkey_copy(&b->key, k); + list_move(&b->list, &c->btree_cache); + hlist_del_init_rcu(&b->hash); + hlist_add_head_rcu(&b->hash, mca_hash(c, k)); + + lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); + b->level = level; + + mca_reinit(b); + + return b; +err: + if (b) + rw_unlock(true, b); + + b = mca_cannibalize(c, k, level, cl); + if (!IS_ERR(b)) + goto out; + + return b; +} + +/** + * bch_btree_node_get - find a btree node in the cache and lock it, reading it + * in from disk if necessary. + * + * If IO is necessary, it uses the closure embedded in struct btree_op to wait; + * if that closure is in non blocking mode, will return -EAGAIN. + * + * The btree node will have either a read or a write lock held, depending on + * level and op->lock. + */ +struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k, + int level, struct btree_op *op) +{ + int i = 0; + bool write = level <= op->lock; + struct btree *b; + + BUG_ON(level < 0); +retry: + b = mca_find(c, k); + + if (!b) { + mutex_lock(&c->bucket_lock); + b = mca_alloc(c, k, level, &op->cl); + mutex_unlock(&c->bucket_lock); + + if (!b) + goto retry; + if (IS_ERR(b)) + return b; + + bch_btree_read(b); + + if (!write) + downgrade_write(&b->lock); + } else { + rw_lock(write, b, level); + if (PTR_HASH(c, &b->key) != PTR_HASH(c, k)) { + rw_unlock(write, b); + goto retry; + } + BUG_ON(b->level != level); + } + + b->accessed = 1; + + for (; i <= b->nsets && b->sets[i].size; i++) { + prefetch(b->sets[i].tree); + prefetch(b->sets[i].data); + } + + for (; i <= b->nsets; i++) + prefetch(b->sets[i].data); + + if (!closure_wait_event(&b->io.wait, &op->cl, + btree_node_read_done(b))) { + rw_unlock(write, b); + b = ERR_PTR(-EAGAIN); + } else if (btree_node_io_error(b)) { + rw_unlock(write, b); + b = ERR_PTR(-EIO); + } else + BUG_ON(!b->written); + + return b; +} + +static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) +{ + struct btree *b; + + mutex_lock(&c->bucket_lock); + b = mca_alloc(c, k, level, NULL); + mutex_unlock(&c->bucket_lock); + + if (!IS_ERR_OR_NULL(b)) { + bch_btree_read(b); + rw_unlock(true, b); + } +} + +/* Btree alloc */ + +static void btree_node_free(struct btree *b, struct btree_op *op) +{ + unsigned i; + + /* + * The BUG_ON() in btree_node_get() implies that we must have a write + * lock on parent to free or even invalidate a node + */ + BUG_ON(op->lock <= b->level); + BUG_ON(b == b->c->root); + pr_debug("bucket %s", pbtree(b)); + + if (btree_node_dirty(b)) + btree_complete_write(b, btree_current_write(b)); + clear_bit(BTREE_NODE_dirty, &b->flags); + + if (b->prio_blocked && + !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) + closure_wake_up(&b->c->bucket_wait); + + b->prio_blocked = 0; + + cancel_delayed_work(&b->work); + + mutex_lock(&b->c->bucket_lock); + + for (i = 0; i < KEY_PTRS(&b->key); i++) { + BUG_ON(atomic_read(&PTR_BUCKET(b->c, &b->key, i)->pin)); + + bch_inc_gen(PTR_CACHE(b->c, &b->key, i), + PTR_BUCKET(b->c, &b->key, i)); + } + + bch_bucket_free(b->c, &b->key); + mca_bucket_free(b); + mutex_unlock(&b->c->bucket_lock); +} + +struct btree *bch_btree_node_alloc(struct cache_set *c, int level, + struct closure *cl) +{ + BKEY_PADDED(key) k; + struct btree *b = ERR_PTR(-EAGAIN); + + mutex_lock(&c->bucket_lock); +retry: + if (__bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, cl)) + goto err; + + SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS); + + b = mca_alloc(c, &k.key, level, cl); + if (IS_ERR(b)) + goto err_free; + + if (!b) { + cache_bug(c, "Tried to allocate bucket" + " that was in btree cache"); + __bkey_put(c, &k.key); + goto retry; + } + + set_btree_node_read_done(b); + b->accessed = 1; + bch_bset_init_next(b); + + mutex_unlock(&c->bucket_lock); + return b; +err_free: + bch_bucket_free(c, &k.key); + __bkey_put(c, &k.key); +err: + mutex_unlock(&c->bucket_lock); + return b; +} + +static struct btree *btree_node_alloc_replacement(struct btree *b, + struct closure *cl) +{ + struct btree *n = bch_btree_node_alloc(b->c, b->level, cl); + if (!IS_ERR_OR_NULL(n)) + bch_btree_sort_into(b, n); + + return n; +} + +/* Garbage collection */ + +uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) +{ + uint8_t stale = 0; + unsigned i; + struct bucket *g; + + /* + * ptr_invalid() can't return true for the keys that mark btree nodes as + * freed, but since ptr_bad() returns true we'll never actually use them + * for anything and thus we don't want mark their pointers here + */ + if (!bkey_cmp(k, &ZERO_KEY)) + return stale; + + for (i = 0; i < KEY_PTRS(k); i++) { + if (!ptr_available(c, k, i)) + continue; + + g = PTR_BUCKET(c, k, i); + + if (gen_after(g->gc_gen, PTR_GEN(k, i))) + g->gc_gen = PTR_GEN(k, i); + + if (ptr_stale(c, k, i)) { + stale = max(stale, ptr_stale(c, k, i)); + continue; + } + + cache_bug_on(GC_MARK(g) && + (GC_MARK(g) == GC_MARK_METADATA) != (level != 0), + c, "inconsistent ptrs: mark = %llu, level = %i", + GC_MARK(g), level); + + if (level) + SET_GC_MARK(g, GC_MARK_METADATA); + else if (KEY_DIRTY(k)) + SET_GC_MARK(g, GC_MARK_DIRTY); + + /* guard against overflow */ + SET_GC_SECTORS_USED(g, min_t(unsigned, + GC_SECTORS_USED(g) + KEY_SIZE(k), + (1 << 14) - 1)); + + BUG_ON(!GC_SECTORS_USED(g)); + } + + return stale; +} + +#define btree_mark_key(b, k) __bch_btree_mark_key(b->c, b->level, k) + +static int btree_gc_mark_node(struct btree *b, unsigned *keys, + struct gc_stat *gc) +{ + uint8_t stale = 0; + unsigned last_dev = -1; + struct bcache_device *d = NULL; + struct bkey *k; + struct btree_iter iter; + struct bset_tree *t; + + gc->nodes++; + + for_each_key_filter(b, k, &iter, bch_ptr_invalid) { + if (last_dev != KEY_INODE(k)) { + last_dev = KEY_INODE(k); + + d = KEY_INODE(k) < b->c->nr_uuids + ? b->c->devices[last_dev] + : NULL; + } + + stale = max(stale, btree_mark_key(b, k)); + + if (bch_ptr_bad(b, k)) + continue; + + *keys += bkey_u64s(k); + + gc->key_bytes += bkey_u64s(k); + gc->nkeys++; + + gc->data += KEY_SIZE(k); + if (KEY_DIRTY(k)) { + gc->dirty += KEY_SIZE(k); + if (d) + d->sectors_dirty_gc += KEY_SIZE(k); + } + } + + for (t = b->sets; t <= &b->sets[b->nsets]; t++) + btree_bug_on(t->size && + bset_written(b, t) && + bkey_cmp(&b->key, &t->end) < 0, + b, "found short btree key in gc"); + + return stale; +} + +static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k, + struct btree_op *op) +{ + /* + * We block priorities from being written for the duration of garbage + * collection, so we can't sleep in btree_alloc() -> + * bch_bucket_alloc_set(), or we'd risk deadlock - so we don't pass it + * our closure. + */ + struct btree *n = btree_node_alloc_replacement(b, NULL); + + if (!IS_ERR_OR_NULL(n)) { + swap(b, n); + + memcpy(k->ptr, b->key.ptr, + sizeof(uint64_t) * KEY_PTRS(&b->key)); + + __bkey_put(b->c, &b->key); + atomic_inc(&b->c->prio_blocked); + b->prio_blocked++; + + btree_node_free(n, op); + up_write(&n->lock); + } + + return b; +} + +/* + * Leaving this at 2 until we've got incremental garbage collection done; it + * could be higher (and has been tested with 4) except that garbage collection + * could take much longer, adversely affecting latency. + */ +#define GC_MERGE_NODES 2U + +struct gc_merge_info { + struct btree *b; + struct bkey *k; + unsigned keys; +}; + +static void btree_gc_coalesce(struct btree *b, struct btree_op *op, + struct gc_stat *gc, struct gc_merge_info *r) +{ + unsigned nodes = 0, keys = 0, blocks; + int i; + + while (nodes < GC_MERGE_NODES && r[nodes].b) + keys += r[nodes++].keys; + + blocks = btree_default_blocks(b->c) * 2 / 3; + + if (nodes < 2 || + __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) + return; + + for (i = nodes - 1; i >= 0; --i) { + if (r[i].b->written) + r[i].b = btree_gc_alloc(r[i].b, r[i].k, op); + + if (r[i].b->written) + return; + } + + for (i = nodes - 1; i > 0; --i) { + struct bset *n1 = r[i].b->sets->data; + struct bset *n2 = r[i - 1].b->sets->data; + struct bkey *k, *last = NULL; + + keys = 0; + + if (i == 1) { + /* + * Last node we're not getting rid of - we're getting + * rid of the node at r[0]. Have to try and fit all of + * the remaining keys into this node; we can't ensure + * they will always fit due to rounding and variable + * length keys (shouldn't be possible in practice, + * though) + */ + if (__set_blocks(n1, n1->keys + r->keys, + b->c) > btree_blocks(r[i].b)) + return; + + keys = n2->keys; + last = &r->b->key; + } else + for (k = n2->start; + k < end(n2); + k = bkey_next(k)) { + if (__set_blocks(n1, n1->keys + keys + + bkey_u64s(k), b->c) > blocks) + break; + + last = k; + keys += bkey_u64s(k); + } + + BUG_ON(__set_blocks(n1, n1->keys + keys, + b->c) > btree_blocks(r[i].b)); + + if (last) { + bkey_copy_key(&r[i].b->key, last); + bkey_copy_key(r[i].k, last); + } + + memcpy(end(n1), + n2->start, + (void *) node(n2, keys) - (void *) n2->start); + + n1->keys += keys; + + memmove(n2->start, + node(n2, keys), + (void *) end(n2) - (void *) node(n2, keys)); + + n2->keys -= keys; + + r[i].keys = n1->keys; + r[i - 1].keys = n2->keys; + } + + btree_node_free(r->b, op); + up_write(&r->b->lock); + + pr_debug("coalesced %u nodes", nodes); + + gc->nodes--; + nodes--; + + memmove(&r[0], &r[1], sizeof(struct gc_merge_info) * nodes); + memset(&r[nodes], 0, sizeof(struct gc_merge_info)); +} + +static int btree_gc_recurse(struct btree *b, struct btree_op *op, + struct closure *writes, struct gc_stat *gc) +{ + void write(struct btree *r) + { + if (!r->written) + bch_btree_write(r, true, op); + else if (btree_node_dirty(r)) { + BUG_ON(btree_current_write(r)->owner); + btree_current_write(r)->owner = writes; + closure_get(writes); + + bch_btree_write(r, true, NULL); + } + + up_write(&r->lock); + } + + int ret = 0, stale; + unsigned i; + struct gc_merge_info r[GC_MERGE_NODES]; + + memset(r, 0, sizeof(r)); + + while ((r->k = bch_next_recurse_key(b, &b->c->gc_done))) { + r->b = bch_btree_node_get(b->c, r->k, b->level - 1, op); + + if (IS_ERR(r->b)) { + ret = PTR_ERR(r->b); + break; + } + + r->keys = 0; + stale = btree_gc_mark_node(r->b, &r->keys, gc); + + if (!b->written && + (r->b->level || stale > 10 || + b->c->gc_always_rewrite)) + r->b = btree_gc_alloc(r->b, r->k, op); + + if (r->b->level) + ret = btree_gc_recurse(r->b, op, writes, gc); + + if (ret) { + write(r->b); + break; + } + + bkey_copy_key(&b->c->gc_done, r->k); + + if (!b->written) + btree_gc_coalesce(b, op, gc, r); + + if (r[GC_MERGE_NODES - 1].b) + write(r[GC_MERGE_NODES - 1].b); + + memmove(&r[1], &r[0], + sizeof(struct gc_merge_info) * (GC_MERGE_NODES - 1)); + + /* When we've got incremental GC working, we'll want to do + * if (should_resched()) + * return -EAGAIN; + */ + cond_resched(); +#if 0 + if (need_resched()) { + ret = -EAGAIN; + break; + } +#endif + } + + for (i = 1; i < GC_MERGE_NODES && r[i].b; i++) + write(r[i].b); + + /* Might have freed some children, must remove their keys */ + if (!b->written) + bch_btree_sort(b); + + return ret; +} + +static int bch_btree_gc_root(struct btree *b, struct btree_op *op, + struct closure *writes, struct gc_stat *gc) +{ + struct btree *n = NULL; + unsigned keys = 0; + int ret = 0, stale = btree_gc_mark_node(b, &keys, gc); + + if (b->level || stale > 10) + n = btree_node_alloc_replacement(b, NULL); + + if (!IS_ERR_OR_NULL(n)) + swap(b, n); + + if (b->level) + ret = btree_gc_recurse(b, op, writes, gc); + + if (!b->written || btree_node_dirty(b)) { + atomic_inc(&b->c->prio_blocked); + b->prio_blocked++; + bch_btree_write(b, true, n ? op : NULL); + } + + if (!IS_ERR_OR_NULL(n)) { + closure_sync(&op->cl); + bch_btree_set_root(b); + btree_node_free(n, op); + rw_unlock(true, b); + } + + return ret; +} + +static void btree_gc_start(struct cache_set *c) +{ + struct cache *ca; + struct bucket *b; + struct bcache_device **d; + unsigned i; + + if (!c->gc_mark_valid) + return; + + mutex_lock(&c->bucket_lock); + + c->gc_mark_valid = 0; + c->gc_done = ZERO_KEY; + + for_each_cache(ca, c, i) + for_each_bucket(b, ca) { + b->gc_gen = b->gen; + if (!atomic_read(&b->pin)) + SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + } + + for (d = c->devices; + d < c->devices + c->nr_uuids; + d++) + if (*d) + (*d)->sectors_dirty_gc = 0; + + mutex_unlock(&c->bucket_lock); +} + +size_t bch_btree_gc_finish(struct cache_set *c) +{ + size_t available = 0; + struct bucket *b; + struct cache *ca; + struct bcache_device **d; + unsigned i; + + mutex_lock(&c->bucket_lock); + + set_gc_sectors(c); + c->gc_mark_valid = 1; + c->need_gc = 0; + + if (c->root) + for (i = 0; i < KEY_PTRS(&c->root->key); i++) + SET_GC_MARK(PTR_BUCKET(c, &c->root->key, i), + GC_MARK_METADATA); + + for (i = 0; i < KEY_PTRS(&c->uuid_bucket); i++) + SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i), + GC_MARK_METADATA); + + for_each_cache(ca, c, i) { + uint64_t *i; + + ca->invalidate_needs_gc = 0; + + for (i = ca->sb.d; i < ca->sb.d + ca->sb.keys; i++) + SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); + + for (i = ca->prio_buckets; + i < ca->prio_buckets + prio_buckets(ca) * 2; i++) + SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); + + for_each_bucket(b, ca) { + b->last_gc = b->gc_gen; + c->need_gc = max(c->need_gc, bucket_gc_gen(b)); + + if (!atomic_read(&b->pin) && + GC_MARK(b) == GC_MARK_RECLAIMABLE) { + available++; + if (!GC_SECTORS_USED(b)) + bch_bucket_add_unused(ca, b); + } + } + } + + for (d = c->devices; + d < c->devices + c->nr_uuids; + d++) + if (*d) { + unsigned long last = + atomic_long_read(&((*d)->sectors_dirty)); + long difference = (*d)->sectors_dirty_gc - last; + + pr_debug("sectors dirty off by %li", difference); + + (*d)->sectors_dirty_last += difference; + + atomic_long_set(&((*d)->sectors_dirty), + (*d)->sectors_dirty_gc); + } + + mutex_unlock(&c->bucket_lock); + return available; +} + +static void bch_btree_gc(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, gc.cl); + int ret; + unsigned long available; + struct gc_stat stats; + struct closure writes; + struct btree_op op; + + uint64_t start_time = local_clock(); + trace_bcache_gc_start(c->sb.set_uuid); + blktrace_msg_all(c, "Starting gc"); + + memset(&stats, 0, sizeof(struct gc_stat)); + closure_init_stack(&writes); + bch_btree_op_init_stack(&op); + op.lock = SHRT_MAX; + + btree_gc_start(c); + + ret = btree_root(gc_root, c, &op, &writes, &stats); + closure_sync(&op.cl); + closure_sync(&writes); + + if (ret) { + blktrace_msg_all(c, "Stopped gc"); + pr_warn("gc failed!"); + + continue_at(cl, bch_btree_gc, bch_gc_wq); + } + + /* Possibly wait for new UUIDs or whatever to hit disk */ + bch_journal_meta(c, &op.cl); + closure_sync(&op.cl); + + available = bch_btree_gc_finish(c); + + time_stats_update(&c->btree_gc_time, start_time); + + stats.key_bytes *= sizeof(uint64_t); + stats.dirty <<= 9; + stats.data <<= 9; + stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; + memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); + blktrace_msg_all(c, "Finished gc"); + + trace_bcache_gc_end(c->sb.set_uuid); + wake_up(&c->alloc_wait); + closure_wake_up(&c->bucket_wait); + + continue_at(cl, bch_moving_gc, bch_gc_wq); +} + +void bch_queue_gc(struct cache_set *c) +{ + closure_trylock_call(&c->gc.cl, bch_btree_gc, bch_gc_wq, &c->cl); +} + +/* Initial partial gc */ + +static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, + unsigned long **seen) +{ + int ret; + unsigned i; + struct bkey *k; + struct bucket *g; + struct btree_iter iter; + + for_each_key_filter(b, k, &iter, bch_ptr_invalid) { + for (i = 0; i < KEY_PTRS(k); i++) { + if (!ptr_available(b->c, k, i)) + continue; + + g = PTR_BUCKET(b->c, k, i); + + if (!__test_and_set_bit(PTR_BUCKET_NR(b->c, k, i), + seen[PTR_DEV(k, i)]) || + !ptr_stale(b->c, k, i)) { + g->gen = PTR_GEN(k, i); + + if (b->level) + g->prio = BTREE_PRIO; + else if (g->prio == BTREE_PRIO) + g->prio = INITIAL_PRIO; + } + } + + btree_mark_key(b, k); + } + + if (b->level) { + k = bch_next_recurse_key(b, &ZERO_KEY); + + while (k) { + struct bkey *p = bch_next_recurse_key(b, k); + if (p) + btree_node_prefetch(b->c, p, b->level - 1); + + ret = btree(check_recurse, k, b, op, seen); + if (ret) + return ret; + + k = p; + } + } + + return 0; +} + +int bch_btree_check(struct cache_set *c, struct btree_op *op) +{ + int ret = -ENOMEM; + unsigned i; + unsigned long *seen[MAX_CACHES_PER_SET]; + + memset(seen, 0, sizeof(seen)); + + for (i = 0; c->cache[i]; i++) { + size_t n = DIV_ROUND_UP(c->cache[i]->sb.nbuckets, 8); + seen[i] = kmalloc(n, GFP_KERNEL); + if (!seen[i]) + goto err; + + /* Disables the seen array until prio_read() uses it too */ + memset(seen[i], 0xFF, n); + } + + ret = btree_root(check_recurse, c, op, seen); +err: + for (i = 0; i < MAX_CACHES_PER_SET; i++) + kfree(seen[i]); + return ret; +} + +/* Btree insertion */ + +static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) +{ + struct bset *i = b->sets[b->nsets].data; + + memmove((uint64_t *) where + bkey_u64s(insert), + where, + (void *) end(i) - (void *) where); + + i->keys += bkey_u64s(insert); + bkey_copy(where, insert); + bch_bset_fix_lookup_table(b, where); +} + +static bool fix_overlapping_extents(struct btree *b, + struct bkey *insert, + struct btree_iter *iter, + struct btree_op *op) +{ + void subtract_dirty(struct bkey *k, int sectors) + { + struct bcache_device *d = b->c->devices[KEY_INODE(k)]; + + if (KEY_DIRTY(k) && d) + atomic_long_sub(sectors, &d->sectors_dirty); + } + + unsigned old_size, sectors_found = 0; + + while (1) { + struct bkey *k = bch_btree_iter_next(iter); + if (!k || + bkey_cmp(&START_KEY(k), insert) >= 0) + break; + + if (bkey_cmp(k, &START_KEY(insert)) <= 0) + continue; + + old_size = KEY_SIZE(k); + + /* + * We might overlap with 0 size extents; we can't skip these + * because if they're in the set we're inserting to we have to + * adjust them so they don't overlap with the key we're + * inserting. But we don't want to check them for BTREE_REPLACE + * operations. + */ + + if (op->type == BTREE_REPLACE && + KEY_SIZE(k)) { + /* + * k might have been split since we inserted/found the + * key we're replacing + */ + unsigned i; + uint64_t offset = KEY_START(k) - + KEY_START(&op->replace); + + /* But it must be a subset of the replace key */ + if (KEY_START(k) < KEY_START(&op->replace) || + KEY_OFFSET(k) > KEY_OFFSET(&op->replace)) + goto check_failed; + + /* We didn't find a key that we were supposed to */ + if (KEY_START(k) > KEY_START(insert) + sectors_found) + goto check_failed; + + if (KEY_PTRS(&op->replace) != KEY_PTRS(k)) + goto check_failed; + + /* skip past gen */ + offset <<= 8; + + BUG_ON(!KEY_PTRS(&op->replace)); + + for (i = 0; i < KEY_PTRS(&op->replace); i++) + if (k->ptr[i] != op->replace.ptr[i] + offset) + goto check_failed; + + sectors_found = KEY_OFFSET(k) - KEY_START(insert); + } + + if (bkey_cmp(insert, k) < 0 && + bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) { + /* + * We overlapped in the middle of an existing key: that + * means we have to split the old key. But we have to do + * slightly different things depending on whether the + * old key has been written out yet. + */ + + struct bkey *top; + + subtract_dirty(k, KEY_SIZE(insert)); + + if (bkey_written(b, k)) { + /* + * We insert a new key to cover the top of the + * old key, and the old key is modified in place + * to represent the bottom split. + * + * It's completely arbitrary whether the new key + * is the top or the bottom, but it has to match + * up with what btree_sort_fixup() does - it + * doesn't check for this kind of overlap, it + * depends on us inserting a new key for the top + * here. + */ + top = bch_bset_search(b, &b->sets[b->nsets], + insert); + shift_keys(b, top, k); + } else { + BKEY_PADDED(key) temp; + bkey_copy(&temp.key, k); + shift_keys(b, k, &temp.key); + top = bkey_next(k); + } + + bch_cut_front(insert, top); + bch_cut_back(&START_KEY(insert), k); + bch_bset_fix_invalidated_key(b, k); + return false; + } + + if (bkey_cmp(insert, k) < 0) { + bch_cut_front(insert, k); + } else { + if (bkey_written(b, k) && + bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) { + /* + * Completely overwrote, so we don't have to + * invalidate the binary search tree + */ + bch_cut_front(k, k); + } else { + __bch_cut_back(&START_KEY(insert), k); + bch_bset_fix_invalidated_key(b, k); + } + } + + subtract_dirty(k, old_size - KEY_SIZE(k)); + } + +check_failed: + if (op->type == BTREE_REPLACE) { + if (!sectors_found) { + op->insert_collision = true; + return true; + } else if (sectors_found < KEY_SIZE(insert)) { + SET_KEY_OFFSET(insert, KEY_OFFSET(insert) - + (KEY_SIZE(insert) - sectors_found)); + SET_KEY_SIZE(insert, sectors_found); + } + } + + return false; +} + +static bool btree_insert_key(struct btree *b, struct btree_op *op, + struct bkey *k) +{ + struct bset *i = b->sets[b->nsets].data; + struct bkey *m, *prev; + const char *status = "insert"; + + BUG_ON(bkey_cmp(k, &b->key) > 0); + BUG_ON(b->level && !KEY_PTRS(k)); + BUG_ON(!b->level && !KEY_OFFSET(k)); + + if (!b->level) { + struct btree_iter iter; + struct bkey search = KEY(KEY_INODE(k), KEY_START(k), 0); + + /* + * bset_search() returns the first key that is strictly greater + * than the search key - but for back merging, we want to find + * the first key that is greater than or equal to KEY_START(k) - + * unless KEY_START(k) is 0. + */ + if (KEY_OFFSET(&search)) + SET_KEY_OFFSET(&search, KEY_OFFSET(&search) - 1); + + prev = NULL; + m = bch_btree_iter_init(b, &iter, &search); + + if (fix_overlapping_extents(b, k, &iter, op)) + return false; + + while (m != end(i) && + bkey_cmp(k, &START_KEY(m)) > 0) + prev = m, m = bkey_next(m); + + if (key_merging_disabled(b->c)) + goto insert; + + /* prev is in the tree, if we merge we're done */ + status = "back merging"; + if (prev && + bch_bkey_try_merge(b, prev, k)) + goto merged; + + status = "overwrote front"; + if (m != end(i) && + KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m)) + goto copy; + + status = "front merge"; + if (m != end(i) && + bch_bkey_try_merge(b, k, m)) + goto copy; + } else + m = bch_bset_search(b, &b->sets[b->nsets], k); + +insert: shift_keys(b, m, k); +copy: bkey_copy(m, k); +merged: + bch_check_keys(b, "%s for %s at %s: %s", status, + op_type(op), pbtree(b), pkey(k)); + bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status, + op_type(op), pbtree(b), pkey(k)); + + if (b->level && !KEY_OFFSET(k)) + b->prio_blocked++; + + pr_debug("%s for %s at %s: %s", status, + op_type(op), pbtree(b), pkey(k)); + + return true; +} + +bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) +{ + bool ret = false; + struct bkey *k; + unsigned oldsize = bch_count_data(b); + + while ((k = bch_keylist_pop(&op->keys))) { + bkey_put(b->c, k, b->level); + ret |= btree_insert_key(b, op, k); + } + + BUG_ON(bch_count_data(b) < oldsize); + return ret; +} + +bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, + struct bio *bio) +{ + bool ret = false; + uint64_t btree_ptr = b->key.ptr[0]; + unsigned long seq = b->seq; + BKEY_PADDED(k) tmp; + + rw_unlock(false, b); + rw_lock(true, b, b->level); + + if (b->key.ptr[0] != btree_ptr || + b->seq != seq + 1 || + should_split(b)) + goto out; + + op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio)); + + SET_KEY_PTRS(&op->replace, 1); + get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t)); + + SET_PTR_DEV(&op->replace, 0, PTR_CHECK_DEV); + + bkey_copy(&tmp.k, &op->replace); + + BUG_ON(op->type != BTREE_INSERT); + BUG_ON(!btree_insert_key(b, op, &tmp.k)); + bch_btree_write(b, false, NULL); + ret = true; +out: + downgrade_write(&b->lock); + return ret; +} + +static int btree_split(struct btree *b, struct btree_op *op) +{ + bool split, root = b == b->c->root; + struct btree *n1, *n2 = NULL, *n3 = NULL; + uint64_t start_time = local_clock(); + + if (b->level) + set_closure_blocking(&op->cl); + + n1 = btree_node_alloc_replacement(b, &op->cl); + if (IS_ERR(n1)) + goto err; + + split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; + + pr_debug("%ssplitting at %s keys %i", split ? "" : "not ", + pbtree(b), n1->sets[0].data->keys); + + if (split) { + unsigned keys = 0; + + n2 = bch_btree_node_alloc(b->c, b->level, &op->cl); + if (IS_ERR(n2)) + goto err_free1; + + if (root) { + n3 = bch_btree_node_alloc(b->c, b->level + 1, &op->cl); + if (IS_ERR(n3)) + goto err_free2; + } + + bch_btree_insert_keys(n1, op); + + /* Has to be a linear search because we don't have an auxiliary + * search tree yet + */ + + while (keys < (n1->sets[0].data->keys * 3) / 5) + keys += bkey_u64s(node(n1->sets[0].data, keys)); + + bkey_copy_key(&n1->key, node(n1->sets[0].data, keys)); + keys += bkey_u64s(node(n1->sets[0].data, keys)); + + n2->sets[0].data->keys = n1->sets[0].data->keys - keys; + n1->sets[0].data->keys = keys; + + memcpy(n2->sets[0].data->start, + end(n1->sets[0].data), + n2->sets[0].data->keys * sizeof(uint64_t)); + + bkey_copy_key(&n2->key, &b->key); + + bch_keylist_add(&op->keys, &n2->key); + bch_btree_write(n2, true, op); + rw_unlock(true, n2); + } else + bch_btree_insert_keys(n1, op); + + bch_keylist_add(&op->keys, &n1->key); + bch_btree_write(n1, true, op); + + if (n3) { + bkey_copy_key(&n3->key, &MAX_KEY); + bch_btree_insert_keys(n3, op); + bch_btree_write(n3, true, op); + + closure_sync(&op->cl); + bch_btree_set_root(n3); + rw_unlock(true, n3); + } else if (root) { + op->keys.top = op->keys.bottom; + closure_sync(&op->cl); + bch_btree_set_root(n1); + } else { + unsigned i; + + bkey_copy(op->keys.top, &b->key); + bkey_copy_key(op->keys.top, &ZERO_KEY); + + for (i = 0; i < KEY_PTRS(&b->key); i++) { + uint8_t g = PTR_BUCKET(b->c, &b->key, i)->gen + 1; + + SET_PTR_GEN(op->keys.top, i, g); + } + + bch_keylist_push(&op->keys); + closure_sync(&op->cl); + atomic_inc(&b->c->prio_blocked); + } + + rw_unlock(true, n1); + btree_node_free(b, op); + + time_stats_update(&b->c->btree_split_time, start_time); + + return 0; +err_free2: + __bkey_put(n2->c, &n2->key); + btree_node_free(n2, op); + rw_unlock(true, n2); +err_free1: + __bkey_put(n1->c, &n1->key); + btree_node_free(n1, op); + rw_unlock(true, n1); +err: + if (n3 == ERR_PTR(-EAGAIN) || + n2 == ERR_PTR(-EAGAIN) || + n1 == ERR_PTR(-EAGAIN)) + return -EAGAIN; + + pr_warn("couldn't split"); + return -ENOMEM; +} + +static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op, + struct keylist *stack_keys) +{ + if (b->level) { + int ret; + struct bkey *insert = op->keys.bottom; + struct bkey *k = bch_next_recurse_key(b, &START_KEY(insert)); + + if (!k) { + btree_bug(b, "no key to recurse on at level %i/%i", + b->level, b->c->root->level); + + op->keys.top = op->keys.bottom; + return -EIO; + } + + if (bkey_cmp(insert, k) > 0) { + unsigned i; + + if (op->type == BTREE_REPLACE) { + __bkey_put(b->c, insert); + op->keys.top = op->keys.bottom; + op->insert_collision = true; + return 0; + } + + for (i = 0; i < KEY_PTRS(insert); i++) + atomic_inc(&PTR_BUCKET(b->c, insert, i)->pin); + + bkey_copy(stack_keys->top, insert); + + bch_cut_back(k, insert); + bch_cut_front(k, stack_keys->top); + + bch_keylist_push(stack_keys); + } + + ret = btree(insert_recurse, k, b, op, stack_keys); + if (ret) + return ret; + } + + if (!bch_keylist_empty(&op->keys)) { + if (should_split(b)) { + if (op->lock <= b->c->root->level) { + BUG_ON(b->level); + op->lock = b->c->root->level + 1; + return -EINTR; + } + return btree_split(b, op); + } + + BUG_ON(write_block(b) != b->sets[b->nsets].data); + + if (bch_btree_insert_keys(b, op)) + bch_btree_write(b, false, op); + } + + return 0; +} + +int bch_btree_insert(struct btree_op *op, struct cache_set *c) +{ + int ret = 0; + struct keylist stack_keys; + + /* + * Don't want to block with the btree locked unless we have to, + * otherwise we get deadlocks with try_harder and between split/gc + */ + clear_closure_blocking(&op->cl); + + BUG_ON(bch_keylist_empty(&op->keys)); + bch_keylist_copy(&stack_keys, &op->keys); + bch_keylist_init(&op->keys); + + while (!bch_keylist_empty(&stack_keys) || + !bch_keylist_empty(&op->keys)) { + if (bch_keylist_empty(&op->keys)) { + bch_keylist_add(&op->keys, + bch_keylist_pop(&stack_keys)); + op->lock = 0; + } + + ret = btree_root(insert_recurse, c, op, &stack_keys); + + if (ret == -EAGAIN) { + ret = 0; + closure_sync(&op->cl); + } else if (ret) { + struct bkey *k; + + pr_err("error %i trying to insert key for %s", + ret, op_type(op)); + + while ((k = bch_keylist_pop(&stack_keys) ?: + bch_keylist_pop(&op->keys))) + bkey_put(c, k, 0); + } + } + + bch_keylist_free(&stack_keys); + + if (op->journal) + atomic_dec_bug(op->journal); + op->journal = NULL; + return ret; +} + +void bch_btree_set_root(struct btree *b) +{ + unsigned i; + + BUG_ON(!b->written); + + for (i = 0; i < KEY_PTRS(&b->key); i++) + BUG_ON(PTR_BUCKET(b->c, &b->key, i)->prio != BTREE_PRIO); + + mutex_lock(&b->c->bucket_lock); + list_del_init(&b->list); + mutex_unlock(&b->c->bucket_lock); + + b->c->root = b; + __bkey_put(b->c, &b->key); + + bch_journal_meta(b->c, NULL); + pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); +} + +/* Cache lookup */ + +static int submit_partial_cache_miss(struct btree *b, struct btree_op *op, + struct bkey *k) +{ + struct search *s = container_of(op, struct search, op); + struct bio *bio = &s->bio.bio; + int ret = 0; + + while (!ret && + !op->lookup_done) { + unsigned sectors = INT_MAX; + + if (KEY_INODE(k) == op->inode) { + if (KEY_START(k) <= bio->bi_sector) + break; + + sectors = min_t(uint64_t, sectors, + KEY_START(k) - bio->bi_sector); + } + + ret = s->d->cache_miss(b, s, bio, sectors); + } + + return ret; +} + +/* + * Read from a single key, handling the initial cache miss if the key starts in + * the middle of the bio + */ +static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, + struct bkey *k) +{ + struct search *s = container_of(op, struct search, op); + struct bio *bio = &s->bio.bio; + unsigned ptr; + struct bio *n; + + int ret = submit_partial_cache_miss(b, op, k); + if (ret || op->lookup_done) + return ret; + + /* XXX: figure out best pointer - for multiple cache devices */ + ptr = 0; + + PTR_BUCKET(b->c, k, ptr)->prio = INITIAL_PRIO; + + while (!op->lookup_done && + KEY_INODE(k) == op->inode && + bio->bi_sector < KEY_OFFSET(k)) { + struct bkey *bio_key; + sector_t sector = PTR_OFFSET(k, ptr) + + (bio->bi_sector - KEY_START(k)); + unsigned sectors = min_t(uint64_t, INT_MAX, + KEY_OFFSET(k) - bio->bi_sector); + + n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + if (!n) + return -EAGAIN; + + if (n == bio) + op->lookup_done = true; + + bio_key = &container_of(n, struct bbio, bio)->key; + + /* + * The bucket we're reading from might be reused while our bio + * is in flight, and we could then end up reading the wrong + * data. + * + * We guard against this by checking (in cache_read_endio()) if + * the pointer is stale again; if so, we treat it as an error + * and reread from the backing device (but we don't pass that + * error up anywhere). + */ + + bch_bkey_copy_single_ptr(bio_key, k, ptr); + SET_PTR_OFFSET(bio_key, 0, sector); + + n->bi_end_io = bch_cache_read_endio; + n->bi_private = &s->cl; + + trace_bcache_cache_hit(n); + __bch_submit_bbio(n, b->c); + } + + return 0; +} + +int bch_btree_search_recurse(struct btree *b, struct btree_op *op) +{ + struct search *s = container_of(op, struct search, op); + struct bio *bio = &s->bio.bio; + + int ret = 0; + struct bkey *k; + struct btree_iter iter; + bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0)); + + pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode, + (uint64_t) bio->bi_sector); + + do { + k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); + if (!k) { + /* + * b->key would be exactly what we want, except that + * pointers to btree nodes have nonzero size - we + * wouldn't go far enough + */ + + ret = submit_partial_cache_miss(b, op, + &KEY(KEY_INODE(&b->key), + KEY_OFFSET(&b->key), 0)); + break; + } + + ret = b->level + ? btree(search_recurse, k, b, op) + : submit_partial_cache_hit(b, op, k); + } while (!ret && + !op->lookup_done); + + return ret; +} + +/* Keybuf code */ + +static inline int keybuf_cmp(struct keybuf_key *l, struct keybuf_key *r) +{ + /* Overlapping keys compare equal */ + if (bkey_cmp(&l->key, &START_KEY(&r->key)) <= 0) + return -1; + if (bkey_cmp(&START_KEY(&l->key), &r->key) >= 0) + return 1; + return 0; +} + +static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, + struct keybuf_key *r) +{ + return clamp_t(int64_t, bkey_cmp(&l->key, &r->key), -1, 1); +} + +static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, + struct keybuf *buf, struct bkey *end) +{ + struct btree_iter iter; + bch_btree_iter_init(b, &iter, &buf->last_scanned); + + while (!array_freelist_empty(&buf->freelist)) { + struct bkey *k = bch_btree_iter_next_filter(&iter, b, + bch_ptr_bad); + + if (!b->level) { + if (!k) { + buf->last_scanned = b->key; + break; + } + + buf->last_scanned = *k; + if (bkey_cmp(&buf->last_scanned, end) >= 0) + break; + + if (buf->key_predicate(buf, k)) { + struct keybuf_key *w; + + pr_debug("%s", pkey(k)); + + spin_lock(&buf->lock); + + w = array_alloc(&buf->freelist); + + w->private = NULL; + bkey_copy(&w->key, k); + + if (RB_INSERT(&buf->keys, w, node, keybuf_cmp)) + array_free(&buf->freelist, w); + + spin_unlock(&buf->lock); + } + } else { + if (!k) + break; + + btree(refill_keybuf, k, b, op, buf, end); + /* + * Might get an error here, but can't really do anything + * and it'll get logged elsewhere. Just read what we + * can. + */ + + if (bkey_cmp(&buf->last_scanned, end) >= 0) + break; + + cond_resched(); + } + } + + return 0; +} + +void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, + struct bkey *end) +{ + struct bkey start = buf->last_scanned; + struct btree_op op; + bch_btree_op_init_stack(&op); + + cond_resched(); + + btree_root(refill_keybuf, c, &op, buf, end); + closure_sync(&op.cl); + + pr_debug("found %s keys from %llu:%llu to %llu:%llu", + RB_EMPTY_ROOT(&buf->keys) ? "no" : + array_freelist_empty(&buf->freelist) ? "some" : "a few", + KEY_INODE(&start), KEY_OFFSET(&start), + KEY_INODE(&buf->last_scanned), KEY_OFFSET(&buf->last_scanned)); + + spin_lock(&buf->lock); + + if (!RB_EMPTY_ROOT(&buf->keys)) { + struct keybuf_key *w; + w = RB_FIRST(&buf->keys, struct keybuf_key, node); + buf->start = START_KEY(&w->key); + + w = RB_LAST(&buf->keys, struct keybuf_key, node); + buf->end = w->key; + } else { + buf->start = MAX_KEY; + buf->end = MAX_KEY; + } + + spin_unlock(&buf->lock); +} + +static void __bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w) +{ + rb_erase(&w->node, &buf->keys); + array_free(&buf->freelist, w); +} + +void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w) +{ + spin_lock(&buf->lock); + __bch_keybuf_del(buf, w); + spin_unlock(&buf->lock); +} + +bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, + struct bkey *end) +{ + bool ret = false; + struct keybuf_key *p, *w, s; + s.key = *start; + + if (bkey_cmp(end, &buf->start) <= 0 || + bkey_cmp(start, &buf->end) >= 0) + return false; + + spin_lock(&buf->lock); + w = RB_GREATER(&buf->keys, s, node, keybuf_nonoverlapping_cmp); + + while (w && bkey_cmp(&START_KEY(&w->key), end) < 0) { + p = w; + w = RB_NEXT(w, node); + + if (p->private) + ret = true; + else + __bch_keybuf_del(buf, p); + } + + spin_unlock(&buf->lock); + return ret; +} + +struct keybuf_key *bch_keybuf_next(struct keybuf *buf) +{ + struct keybuf_key *w; + spin_lock(&buf->lock); + + w = RB_FIRST(&buf->keys, struct keybuf_key, node); + + while (w && w->private) + w = RB_NEXT(w, node); + + if (w) + w->private = ERR_PTR(-EINTR); + + spin_unlock(&buf->lock); + return w; +} + +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, + struct keybuf *buf, + struct bkey *end) +{ + struct keybuf_key *ret; + + while (1) { + ret = bch_keybuf_next(buf); + if (ret) + break; + + if (bkey_cmp(&buf->last_scanned, end) >= 0) { + pr_debug("scan finished"); + break; + } + + bch_refill_keybuf(c, buf, end); + } + + return ret; +} + +void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn) +{ + buf->key_predicate = fn; + buf->last_scanned = MAX_KEY; + buf->keys = RB_ROOT; + + spin_lock_init(&buf->lock); + array_allocator_init(&buf->freelist); +} + +void bch_btree_exit(void) +{ + if (btree_io_wq) + destroy_workqueue(btree_io_wq); + if (bch_gc_wq) + destroy_workqueue(bch_gc_wq); +} + +int __init bch_btree_init(void) +{ + if (!(bch_gc_wq = create_singlethread_workqueue("bch_btree_gc")) || + !(btree_io_wq = create_singlethread_workqueue("bch_btree_io"))) + return -ENOMEM; + + return 0; +} diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h new file mode 100644 index 000000000000..af4a7092a28c --- /dev/null +++ b/drivers/md/bcache/btree.h @@ -0,0 +1,405 @@ +#ifndef _BCACHE_BTREE_H +#define _BCACHE_BTREE_H + +/* + * THE BTREE: + * + * At a high level, bcache's btree is relatively standard b+ tree. All keys and + * pointers are in the leaves; interior nodes only have pointers to the child + * nodes. + * + * In the interior nodes, a struct bkey always points to a child btree node, and + * the key is the highest key in the child node - except that the highest key in + * an interior node is always MAX_KEY. The size field refers to the size on disk + * of the child node - this would allow us to have variable sized btree nodes + * (handy for keeping the depth of the btree 1 by expanding just the root). + * + * Btree nodes are themselves log structured, but this is hidden fairly + * thoroughly. Btree nodes on disk will in practice have extents that overlap + * (because they were written at different times), but in memory we never have + * overlapping extents - when we read in a btree node from disk, the first thing + * we do is resort all the sets of keys with a mergesort, and in the same pass + * we check for overlapping extents and adjust them appropriately. + * + * struct btree_op is a central interface to the btree code. It's used for + * specifying read vs. write locking, and the embedded closure is used for + * waiting on IO or reserve memory. + * + * BTREE CACHE: + * + * Btree nodes are cached in memory; traversing the btree might require reading + * in btree nodes which is handled mostly transparently. + * + * bch_btree_node_get() looks up a btree node in the cache and reads it in from + * disk if necessary. This function is almost never called directly though - the + * btree() macro is used to get a btree node, call some function on it, and + * unlock the node after the function returns. + * + * The root is special cased - it's taken out of the cache's lru (thus pinning + * it in memory), so we can find the root of the btree by just dereferencing a + * pointer instead of looking it up in the cache. This makes locking a bit + * tricky, since the root pointer is protected by the lock in the btree node it + * points to - the btree_root() macro handles this. + * + * In various places we must be able to allocate memory for multiple btree nodes + * in order to make forward progress. To do this we use the btree cache itself + * as a reserve; if __get_free_pages() fails, we'll find a node in the btree + * cache we can reuse. We can't allow more than one thread to be doing this at a + * time, so there's a lock, implemented by a pointer to the btree_op closure - + * this allows the btree_root() macro to implicitly release this lock. + * + * BTREE IO: + * + * Btree nodes never have to be explicitly read in; bch_btree_node_get() handles + * this. + * + * For writing, we have two btree_write structs embeddded in struct btree - one + * write in flight, and one being set up, and we toggle between them. + * + * Writing is done with a single function - bch_btree_write() really serves two + * different purposes and should be broken up into two different functions. When + * passing now = false, it merely indicates that the node is now dirty - calling + * it ensures that the dirty keys will be written at some point in the future. + * + * When passing now = true, bch_btree_write() causes a write to happen + * "immediately" (if there was already a write in flight, it'll cause the write + * to happen as soon as the previous write completes). It returns immediately + * though - but it takes a refcount on the closure in struct btree_op you passed + * to it, so a closure_sync() later can be used to wait for the write to + * complete. + * + * This is handy because btree_split() and garbage collection can issue writes + * in parallel, reducing the amount of time they have to hold write locks. + * + * LOCKING: + * + * When traversing the btree, we may need write locks starting at some level - + * inserting a key into the btree will typically only require a write lock on + * the leaf node. + * + * This is specified with the lock field in struct btree_op; lock = 0 means we + * take write locks at level <= 0, i.e. only leaf nodes. bch_btree_node_get() + * checks this field and returns the node with the appropriate lock held. + * + * If, after traversing the btree, the insertion code discovers it has to split + * then it must restart from the root and take new locks - to do this it changes + * the lock field and returns -EINTR, which causes the btree_root() macro to + * loop. + * + * Handling cache misses require a different mechanism for upgrading to a write + * lock. We do cache lookups with only a read lock held, but if we get a cache + * miss and we wish to insert this data into the cache, we have to insert a + * placeholder key to detect races - otherwise, we could race with a write and + * overwrite the data that was just written to the cache with stale data from + * the backing device. + * + * For this we use a sequence number that write locks and unlocks increment - to + * insert the check key it unlocks the btree node and then takes a write lock, + * and fails if the sequence number doesn't match. + */ + +#include "bset.h" +#include "debug.h" + +struct btree_write { + struct closure *owner; + atomic_t *journal; + + /* If btree_split() frees a btree node, it writes a new pointer to that + * btree node indicating it was freed; it takes a refcount on + * c->prio_blocked because we can't write the gens until the new + * pointer is on disk. This allows btree_write_endio() to release the + * refcount that btree_split() took. + */ + int prio_blocked; +}; + +struct btree { + /* Hottest entries first */ + struct hlist_node hash; + + /* Key/pointer for this btree node */ + BKEY_PADDED(key); + + /* Single bit - set when accessed, cleared by shrinker */ + unsigned long accessed; + unsigned long seq; + struct rw_semaphore lock; + struct cache_set *c; + + unsigned long flags; + uint16_t written; /* would be nice to kill */ + uint8_t level; + uint8_t nsets; + uint8_t page_order; + + /* + * Set of sorted keys - the real btree node - plus a binary search tree + * + * sets[0] is special; set[0]->tree, set[0]->prev and set[0]->data point + * to the memory we have allocated for this btree node. Additionally, + * set[0]->data points to the entire btree node as it exists on disk. + */ + struct bset_tree sets[MAX_BSETS]; + + /* Used to refcount bio splits, also protects b->bio */ + struct closure_with_waitlist io; + + /* Gets transferred to w->prio_blocked - see the comment there */ + int prio_blocked; + + struct list_head list; + struct delayed_work work; + + uint64_t io_start_time; + struct btree_write writes[2]; + struct bio *bio; +}; + +#define BTREE_FLAG(flag) \ +static inline bool btree_node_ ## flag(struct btree *b) \ +{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ + \ +static inline void set_btree_node_ ## flag(struct btree *b) \ +{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \ + +enum btree_flags { + BTREE_NODE_read_done, + BTREE_NODE_io_error, + BTREE_NODE_dirty, + BTREE_NODE_write_idx, +}; + +BTREE_FLAG(read_done); +BTREE_FLAG(io_error); +BTREE_FLAG(dirty); +BTREE_FLAG(write_idx); + +static inline struct btree_write *btree_current_write(struct btree *b) +{ + return b->writes + btree_node_write_idx(b); +} + +static inline struct btree_write *btree_prev_write(struct btree *b) +{ + return b->writes + (btree_node_write_idx(b) ^ 1); +} + +static inline unsigned bset_offset(struct btree *b, struct bset *i) +{ + return (((size_t) i) - ((size_t) b->sets->data)) >> 9; +} + +static inline struct bset *write_block(struct btree *b) +{ + return ((void *) b->sets[0].data) + b->written * block_bytes(b->c); +} + +static inline bool bset_written(struct btree *b, struct bset_tree *t) +{ + return t->data < write_block(b); +} + +static inline bool bkey_written(struct btree *b, struct bkey *k) +{ + return k < write_block(b)->start; +} + +static inline void set_gc_sectors(struct cache_set *c) +{ + atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 8); +} + +static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k) +{ + return __bch_ptr_invalid(b->c, b->level, k); +} + +static inline struct bkey *bch_btree_iter_init(struct btree *b, + struct btree_iter *iter, + struct bkey *search) +{ + return __bch_btree_iter_init(b, iter, search, b->sets); +} + +/* Looping macros */ + +#define for_each_cached_btree(b, c, iter) \ + for (iter = 0; \ + iter < ARRAY_SIZE((c)->bucket_hash); \ + iter++) \ + hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash) + +#define for_each_key_filter(b, k, iter, filter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next_filter((iter), b, filter));) + +#define for_each_key(b, k, iter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next(iter));) + +/* Recursing down the btree */ + +struct btree_op { + struct closure cl; + struct cache_set *c; + + /* Journal entry we have a refcount on */ + atomic_t *journal; + + /* Bio to be inserted into the cache */ + struct bio *cache_bio; + + unsigned inode; + + uint16_t write_prio; + + /* Btree level at which we start taking write locks */ + short lock; + + /* Btree insertion type */ + enum { + BTREE_INSERT, + BTREE_REPLACE + } type:8; + + unsigned csum:1; + unsigned skip:1; + unsigned flush_journal:1; + + unsigned insert_data_done:1; + unsigned lookup_done:1; + unsigned insert_collision:1; + + /* Anything after this point won't get zeroed in do_bio_hook() */ + + /* Keys to be inserted */ + struct keylist keys; + BKEY_PADDED(replace); +}; + +void bch_btree_op_init_stack(struct btree_op *); + +static inline void rw_lock(bool w, struct btree *b, int level) +{ + w ? down_write_nested(&b->lock, level + 1) + : down_read_nested(&b->lock, level + 1); + if (w) + b->seq++; +} + +static inline void rw_unlock(bool w, struct btree *b) +{ +#ifdef CONFIG_BCACHE_EDEBUG + unsigned i; + + if (w && + b->key.ptr[0] && + btree_node_read_done(b)) + for (i = 0; i <= b->nsets; i++) + bch_check_key_order(b, b->sets[i].data); +#endif + + if (w) + b->seq++; + (w ? up_write : up_read)(&b->lock); +} + +#define insert_lock(s, b) ((b)->level <= (s)->lock) + +/* + * These macros are for recursing down the btree - they handle the details of + * locking and looking up nodes in the cache for you. They're best treated as + * mere syntax when reading code that uses them. + * + * op->lock determines whether we take a read or a write lock at a given depth. + * If you've got a read lock and find that you need a write lock (i.e. you're + * going to have to split), set op->lock and return -EINTR; btree_root() will + * call you again and you'll have the correct lock. + */ + +/** + * btree - recurse down the btree on a specified key + * @fn: function to call, which will be passed the child node + * @key: key to recurse on + * @b: parent btree node + * @op: pointer to struct btree_op + */ +#define btree(fn, key, b, op, ...) \ +({ \ + int _r, l = (b)->level - 1; \ + bool _w = l <= (op)->lock; \ + struct btree *_b = bch_btree_node_get((b)->c, key, l, op); \ + if (!IS_ERR(_b)) { \ + _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ + rw_unlock(_w, _b); \ + } else \ + _r = PTR_ERR(_b); \ + _r; \ +}) + +/** + * btree_root - call a function on the root of the btree + * @fn: function to call, which will be passed the child node + * @c: cache set + * @op: pointer to struct btree_op + */ +#define btree_root(fn, c, op, ...) \ +({ \ + int _r = -EINTR; \ + do { \ + struct btree *_b = (c)->root; \ + bool _w = insert_lock(op, _b); \ + rw_lock(_w, _b, _b->level); \ + if (_b == (c)->root && \ + _w == insert_lock(op, _b)) \ + _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ + rw_unlock(_w, _b); \ + bch_cannibalize_unlock(c, &(op)->cl); \ + } while (_r == -EINTR); \ + \ + _r; \ +}) + +static inline bool should_split(struct btree *b) +{ + struct bset *i = write_block(b); + return b->written >= btree_blocks(b) || + (i->seq == b->sets[0].data->seq && + b->written + __set_blocks(i, i->keys + 15, b->c) + > btree_blocks(b)); +} + +void bch_btree_read_done(struct closure *); +void bch_btree_read(struct btree *); +void bch_btree_write(struct btree *b, bool now, struct btree_op *op); + +void bch_cannibalize_unlock(struct cache_set *, struct closure *); +void bch_btree_set_root(struct btree *); +struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *); +struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, + int, struct btree_op *); + +bool bch_btree_insert_keys(struct btree *, struct btree_op *); +bool bch_btree_insert_check_key(struct btree *, struct btree_op *, + struct bio *); +int bch_btree_insert(struct btree_op *, struct cache_set *); + +int bch_btree_search_recurse(struct btree *, struct btree_op *); + +void bch_queue_gc(struct cache_set *); +size_t bch_btree_gc_finish(struct cache_set *); +void bch_moving_gc(struct closure *); +int bch_btree_check(struct cache_set *, struct btree_op *); +uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); + +void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *); +void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *); +bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, + struct bkey *); +void bch_keybuf_del(struct keybuf *, struct keybuf_key *); +struct keybuf_key *bch_keybuf_next(struct keybuf *); +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, + struct keybuf *, struct bkey *); + +#endif diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c new file mode 100644 index 000000000000..d6fbec0f8484 --- /dev/null +++ b/drivers/md/bcache/closure.c @@ -0,0 +1,348 @@ +/* + * Asynchronous refcounty things + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include +#include +#include + +#include "closure.h" + +void closure_queue(struct closure *cl) +{ + struct workqueue_struct *wq = cl->wq; + if (wq) { + INIT_WORK(&cl->work, cl->work.func); + BUG_ON(!queue_work(wq, &cl->work)); + } else + cl->fn(cl); +} +EXPORT_SYMBOL_GPL(closure_queue); + +#define CL_FIELD(type, field) \ + case TYPE_ ## type: \ + return &container_of(cl, struct type, cl)->field + +static struct closure_waitlist *closure_waitlist(struct closure *cl) +{ + switch (cl->type) { + CL_FIELD(closure_with_waitlist, wait); + CL_FIELD(closure_with_waitlist_and_timer, wait); + default: + return NULL; + } +} + +static struct timer_list *closure_timer(struct closure *cl) +{ + switch (cl->type) { + CL_FIELD(closure_with_timer, timer); + CL_FIELD(closure_with_waitlist_and_timer, timer); + default: + return NULL; + } +} + +static inline void closure_put_after_sub(struct closure *cl, int flags) +{ + int r = flags & CLOSURE_REMAINING_MASK; + + BUG_ON(flags & CLOSURE_GUARD_MASK); + BUG_ON(!r && (flags & ~(CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING))); + + /* Must deliver precisely one wakeup */ + if (r == 1 && (flags & CLOSURE_SLEEPING)) + wake_up_process(cl->task); + + if (!r) { + if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { + /* CLOSURE_BLOCKING might be set - clear it */ + atomic_set(&cl->remaining, + CLOSURE_REMAINING_INITIALIZER); + closure_queue(cl); + } else { + struct closure *parent = cl->parent; + struct closure_waitlist *wait = closure_waitlist(cl); + + closure_debug_destroy(cl); + + atomic_set(&cl->remaining, -1); + + if (wait) + closure_wake_up(wait); + + if (cl->fn) + cl->fn(cl); + + if (parent) + closure_put(parent); + } + } +} + +/* For clearing flags with the same atomic op as a put */ +void closure_sub(struct closure *cl, int v) +{ + closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); +} +EXPORT_SYMBOL_GPL(closure_sub); + +void closure_put(struct closure *cl) +{ + closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); +} +EXPORT_SYMBOL_GPL(closure_put); + +static void set_waiting(struct closure *cl, unsigned long f) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + cl->waiting_on = f; +#endif +} + +void __closure_wake_up(struct closure_waitlist *wait_list) +{ + struct llist_node *list; + struct closure *cl; + struct llist_node *reverse = NULL; + + list = llist_del_all(&wait_list->list); + + /* We first reverse the list to preserve FIFO ordering and fairness */ + + while (list) { + struct llist_node *t = list; + list = llist_next(list); + + t->next = reverse; + reverse = t; + } + + /* Then do the wakeups */ + + while (reverse) { + cl = container_of(reverse, struct closure, list); + reverse = llist_next(reverse); + + set_waiting(cl, 0); + closure_sub(cl, CLOSURE_WAITING + 1); + } +} +EXPORT_SYMBOL_GPL(__closure_wake_up); + +bool closure_wait(struct closure_waitlist *list, struct closure *cl) +{ + if (atomic_read(&cl->remaining) & CLOSURE_WAITING) + return false; + + set_waiting(cl, _RET_IP_); + atomic_add(CLOSURE_WAITING + 1, &cl->remaining); + llist_add(&cl->list, &list->list); + + return true; +} +EXPORT_SYMBOL_GPL(closure_wait); + +/** + * closure_sync() - sleep until a closure a closure has nothing left to wait on + * + * Sleeps until the refcount hits 1 - the thread that's running the closure owns + * the last refcount. + */ +void closure_sync(struct closure *cl) +{ + while (1) { + __closure_start_sleep(cl); + closure_set_ret_ip(cl); + + if ((atomic_read(&cl->remaining) & + CLOSURE_REMAINING_MASK) == 1) + break; + + schedule(); + } + + __closure_end_sleep(cl); +} +EXPORT_SYMBOL_GPL(closure_sync); + +/** + * closure_trylock() - try to acquire the closure, without waiting + * @cl: closure to lock + * + * Returns true if the closure was succesfully locked. + */ +bool closure_trylock(struct closure *cl, struct closure *parent) +{ + if (atomic_cmpxchg(&cl->remaining, -1, + CLOSURE_REMAINING_INITIALIZER) != -1) + return false; + + closure_set_ret_ip(cl); + + smp_mb(); + cl->parent = parent; + if (parent) + closure_get(parent); + + closure_debug_create(cl); + return true; +} +EXPORT_SYMBOL_GPL(closure_trylock); + +void __closure_lock(struct closure *cl, struct closure *parent, + struct closure_waitlist *wait_list) +{ + struct closure wait; + closure_init_stack(&wait); + + while (1) { + if (closure_trylock(cl, parent)) + return; + + closure_wait_event_sync(wait_list, &wait, + atomic_read(&cl->remaining) == -1); + } +} +EXPORT_SYMBOL_GPL(__closure_lock); + +static void closure_delay_timer_fn(unsigned long data) +{ + struct closure *cl = (struct closure *) data; + closure_sub(cl, CLOSURE_TIMER + 1); +} + +void do_closure_timer_init(struct closure *cl) +{ + struct timer_list *timer = closure_timer(cl); + + init_timer(timer); + timer->data = (unsigned long) cl; + timer->function = closure_delay_timer_fn; +} +EXPORT_SYMBOL_GPL(do_closure_timer_init); + +bool __closure_delay(struct closure *cl, unsigned long delay, + struct timer_list *timer) +{ + if (atomic_read(&cl->remaining) & CLOSURE_TIMER) + return false; + + BUG_ON(timer_pending(timer)); + + timer->expires = jiffies + delay; + + atomic_add(CLOSURE_TIMER + 1, &cl->remaining); + add_timer(timer); + return true; +} +EXPORT_SYMBOL_GPL(__closure_delay); + +void __closure_flush(struct closure *cl, struct timer_list *timer) +{ + if (del_timer(timer)) + closure_sub(cl, CLOSURE_TIMER + 1); +} +EXPORT_SYMBOL_GPL(__closure_flush); + +void __closure_flush_sync(struct closure *cl, struct timer_list *timer) +{ + if (del_timer_sync(timer)) + closure_sub(cl, CLOSURE_TIMER + 1); +} +EXPORT_SYMBOL_GPL(__closure_flush_sync); + +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + +static LIST_HEAD(closure_list); +static DEFINE_SPINLOCK(closure_list_lock); + +void closure_debug_create(struct closure *cl) +{ + unsigned long flags; + + BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE); + cl->magic = CLOSURE_MAGIC_ALIVE; + + spin_lock_irqsave(&closure_list_lock, flags); + list_add(&cl->all, &closure_list); + spin_unlock_irqrestore(&closure_list_lock, flags); +} +EXPORT_SYMBOL_GPL(closure_debug_create); + +void closure_debug_destroy(struct closure *cl) +{ + unsigned long flags; + + BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); + cl->magic = CLOSURE_MAGIC_DEAD; + + spin_lock_irqsave(&closure_list_lock, flags); + list_del(&cl->all); + spin_unlock_irqrestore(&closure_list_lock, flags); +} +EXPORT_SYMBOL_GPL(closure_debug_destroy); + +static struct dentry *debug; + +#define work_data_bits(work) ((unsigned long *)(&(work)->data)) + +static int debug_seq_show(struct seq_file *f, void *data) +{ + struct closure *cl; + spin_lock_irq(&closure_list_lock); + + list_for_each_entry(cl, &closure_list, all) { + int r = atomic_read(&cl->remaining); + + seq_printf(f, "%p: %pF -> %pf p %p r %i ", + cl, (void *) cl->ip, cl->fn, cl->parent, + r & CLOSURE_REMAINING_MASK); + + seq_printf(f, "%s%s%s%s%s%s\n", + test_bit(WORK_STRUCT_PENDING, + work_data_bits(&cl->work)) ? "Q" : "", + r & CLOSURE_RUNNING ? "R" : "", + r & CLOSURE_BLOCKING ? "B" : "", + r & CLOSURE_STACK ? "S" : "", + r & CLOSURE_SLEEPING ? "Sl" : "", + r & CLOSURE_TIMER ? "T" : ""); + + if (r & CLOSURE_WAITING) + seq_printf(f, " W %pF\n", + (void *) cl->waiting_on); + + seq_printf(f, "\n"); + } + + spin_unlock_irq(&closure_list_lock); + return 0; +} + +static int debug_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, debug_seq_show, NULL); +} + +static const struct file_operations debug_ops = { + .owner = THIS_MODULE, + .open = debug_seq_open, + .read = seq_read, + .release = single_release +}; + +int __init closure_debug_init(void) +{ + debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops); + return 0; +} + +module_init(closure_debug_init); + +#endif + +MODULE_AUTHOR("Kent Overstreet "); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h new file mode 100644 index 000000000000..3f31d599ea56 --- /dev/null +++ b/drivers/md/bcache/closure.h @@ -0,0 +1,670 @@ +#ifndef _LINUX_CLOSURE_H +#define _LINUX_CLOSURE_H + +#include +#include +#include + +/* + * Closure is perhaps the most overused and abused term in computer science, but + * since I've been unable to come up with anything better you're stuck with it + * again. + * + * What are closures? + * + * They embed a refcount. The basic idea is they count "things that are in + * progress" - in flight bios, some other thread that's doing something else - + * anything you might want to wait on. + * + * The refcount may be manipulated with closure_get() and closure_put(). + * closure_put() is where many of the interesting things happen, when it causes + * the refcount to go to 0. + * + * Closures can be used to wait on things both synchronously and asynchronously, + * and synchronous and asynchronous use can be mixed without restriction. To + * wait synchronously, use closure_sync() - you will sleep until your closure's + * refcount hits 1. + * + * To wait asynchronously, use + * continue_at(cl, next_function, workqueue); + * + * passing it, as you might expect, the function to run when nothing is pending + * and the workqueue to run that function out of. + * + * continue_at() also, critically, is a macro that returns the calling function. + * There's good reason for this. + * + * To use safely closures asynchronously, they must always have a refcount while + * they are running owned by the thread that is running them. Otherwise, suppose + * you submit some bios and wish to have a function run when they all complete: + * + * foo_endio(struct bio *bio, int error) + * { + * closure_put(cl); + * } + * + * closure_init(cl); + * + * do_stuff(); + * closure_get(cl); + * bio1->bi_endio = foo_endio; + * bio_submit(bio1); + * + * do_more_stuff(); + * closure_get(cl); + * bio2->bi_endio = foo_endio; + * bio_submit(bio2); + * + * continue_at(cl, complete_some_read, system_wq); + * + * If closure's refcount started at 0, complete_some_read() could run before the + * second bio was submitted - which is almost always not what you want! More + * importantly, it wouldn't be possible to say whether the original thread or + * complete_some_read()'s thread owned the closure - and whatever state it was + * associated with! + * + * So, closure_init() initializes a closure's refcount to 1 - and when a + * closure_fn is run, the refcount will be reset to 1 first. + * + * Then, the rule is - if you got the refcount with closure_get(), release it + * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount + * on a closure because you called closure_init() or you were run out of a + * closure - _always_ use continue_at(). Doing so consistently will help + * eliminate an entire class of particularly pernicious races. + * + * For a closure to wait on an arbitrary event, we need to introduce waitlists: + * + * struct closure_waitlist list; + * closure_wait_event(list, cl, condition); + * closure_wake_up(wait_list); + * + * These work analagously to wait_event() and wake_up() - except that instead of + * operating on the current thread (for wait_event()) and lists of threads, they + * operate on an explicit closure and lists of closures. + * + * Because it's a closure we can now wait either synchronously or + * asynchronously. closure_wait_event() returns the current value of the + * condition, and if it returned false continue_at() or closure_sync() can be + * used to wait for it to become true. + * + * It's useful for waiting on things when you can't sleep in the context in + * which you must check the condition (perhaps a spinlock held, or you might be + * beneath generic_make_request() - in which case you can't sleep on IO). + * + * closure_wait_event() will wait either synchronously or asynchronously, + * depending on whether the closure is in blocking mode or not. You can pick a + * mode explicitly with closure_wait_event_sync() and + * closure_wait_event_async(), which do just what you might expect. + * + * Lastly, you might have a wait list dedicated to a specific event, and have no + * need for specifying the condition - you just want to wait until someone runs + * closure_wake_up() on the appropriate wait list. In that case, just use + * closure_wait(). It will return either true or false, depending on whether the + * closure was already on a wait list or not - a closure can only be on one wait + * list at a time. + * + * Parents: + * + * closure_init() takes two arguments - it takes the closure to initialize, and + * a (possibly null) parent. + * + * If parent is non null, the new closure will have a refcount for its lifetime; + * a closure is considered to be "finished" when its refcount hits 0 and the + * function to run is null. Hence + * + * continue_at(cl, NULL, NULL); + * + * returns up the (spaghetti) stack of closures, precisely like normal return + * returns up the C stack. continue_at() with non null fn is better thought of + * as doing a tail call. + * + * All this implies that a closure should typically be embedded in a particular + * struct (which its refcount will normally control the lifetime of), and that + * struct can very much be thought of as a stack frame. + * + * Locking: + * + * Closures are based on work items but they can be thought of as more like + * threads - in that like threads and unlike work items they have a well + * defined lifetime; they are created (with closure_init()) and eventually + * complete after a continue_at(cl, NULL, NULL). + * + * Suppose you've got some larger structure with a closure embedded in it that's + * used for periodically doing garbage collection. You only want one garbage + * collection happening at a time, so the natural thing to do is protect it with + * a lock. However, it's difficult to use a lock protecting a closure correctly + * because the unlock should come after the last continue_to() (additionally, if + * you're using the closure asynchronously a mutex won't work since a mutex has + * to be unlocked by the same process that locked it). + * + * So to make it less error prone and more efficient, we also have the ability + * to use closures as locks: + * + * closure_init_unlocked(); + * closure_trylock(); + * + * That's all we need for trylock() - the last closure_put() implicitly unlocks + * it for you. But for closure_lock(), we also need a wait list: + * + * struct closure_with_waitlist frobnicator_cl; + * + * closure_init_unlocked(&frobnicator_cl); + * closure_lock(&frobnicator_cl); + * + * A closure_with_waitlist embeds a closure and a wait list - much like struct + * delayed_work embeds a work item and a timer_list. The important thing is, use + * it exactly like you would a regular closure and closure_put() will magically + * handle everything for you. + * + * We've got closures that embed timers, too. They're called, appropriately + * enough: + * struct closure_with_timer; + * + * This gives you access to closure_delay(). It takes a refcount for a specified + * number of jiffies - you could then call closure_sync() (for a slightly + * convoluted version of msleep()) or continue_at() - which gives you the same + * effect as using a delayed work item, except you can reuse the work_struct + * already embedded in struct closure. + * + * Lastly, there's struct closure_with_waitlist_and_timer. It does what you + * probably expect, if you happen to need the features of both. (You don't + * really want to know how all this is implemented, but if I've done my job + * right you shouldn't have to care). + */ + +struct closure; +typedef void (closure_fn) (struct closure *); + +struct closure_waitlist { + struct llist_head list; +}; + +enum closure_type { + TYPE_closure = 0, + TYPE_closure_with_waitlist = 1, + TYPE_closure_with_timer = 2, + TYPE_closure_with_waitlist_and_timer = 3, + MAX_CLOSURE_TYPE = 3, +}; + +enum closure_state { + /* + * CLOSURE_BLOCKING: Causes closure_wait_event() to block, instead of + * waiting asynchronously + * + * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by + * the thread that owns the closure, and cleared by the thread that's + * waking up the closure. + * + * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep + * - indicates that cl->task is valid and closure_put() may wake it up. + * Only set or cleared by the thread that owns the closure. + * + * CLOSURE_TIMER: Analagous to CLOSURE_WAITING, indicates that a closure + * has an outstanding timer. Must be set by the thread that owns the + * closure, and cleared by the timer function when the timer goes off. + * + * The rest are for debugging and don't affect behaviour: + * + * CLOSURE_RUNNING: Set when a closure is running (i.e. by + * closure_init() and when closure_put() runs then next function), and + * must be cleared before remaining hits 0. Primarily to help guard + * against incorrect usage and accidentally transferring references. + * continue_at() and closure_return() clear it for you, if you're doing + * something unusual you can use closure_set_dead() which also helps + * annotate where references are being transferred. + * + * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a + * closure with this flag set + */ + + CLOSURE_BITS_START = (1 << 19), + CLOSURE_DESTRUCTOR = (1 << 19), + CLOSURE_BLOCKING = (1 << 21), + CLOSURE_WAITING = (1 << 23), + CLOSURE_SLEEPING = (1 << 25), + CLOSURE_TIMER = (1 << 27), + CLOSURE_RUNNING = (1 << 29), + CLOSURE_STACK = (1 << 31), +}; + +#define CLOSURE_GUARD_MASK \ + ((CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING|CLOSURE_WAITING| \ + CLOSURE_SLEEPING|CLOSURE_TIMER|CLOSURE_RUNNING|CLOSURE_STACK) << 1) + +#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) +#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) + +struct closure { + union { + struct { + struct workqueue_struct *wq; + struct task_struct *task; + struct llist_node list; + closure_fn *fn; + }; + struct work_struct work; + }; + + struct closure *parent; + + atomic_t remaining; + + enum closure_type type; + +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#define CLOSURE_MAGIC_DEAD 0xc054dead +#define CLOSURE_MAGIC_ALIVE 0xc054a11e + + unsigned magic; + struct list_head all; + unsigned long ip; + unsigned long waiting_on; +#endif +}; + +struct closure_with_waitlist { + struct closure cl; + struct closure_waitlist wait; +}; + +struct closure_with_timer { + struct closure cl; + struct timer_list timer; +}; + +struct closure_with_waitlist_and_timer { + struct closure cl; + struct closure_waitlist wait; + struct timer_list timer; +}; + +extern unsigned invalid_closure_type(void); + +#define __CLOSURE_TYPE(cl, _t) \ + __builtin_types_compatible_p(typeof(cl), struct _t) \ + ? TYPE_ ## _t : \ + +#define __closure_type(cl) \ +( \ + __CLOSURE_TYPE(cl, closure) \ + __CLOSURE_TYPE(cl, closure_with_waitlist) \ + __CLOSURE_TYPE(cl, closure_with_timer) \ + __CLOSURE_TYPE(cl, closure_with_waitlist_and_timer) \ + invalid_closure_type() \ +) + +void closure_sub(struct closure *cl, int v); +void closure_put(struct closure *cl); +void closure_queue(struct closure *cl); +void __closure_wake_up(struct closure_waitlist *list); +bool closure_wait(struct closure_waitlist *list, struct closure *cl); +void closure_sync(struct closure *cl); + +bool closure_trylock(struct closure *cl, struct closure *parent); +void __closure_lock(struct closure *cl, struct closure *parent, + struct closure_waitlist *wait_list); + +void do_closure_timer_init(struct closure *cl); +bool __closure_delay(struct closure *cl, unsigned long delay, + struct timer_list *timer); +void __closure_flush(struct closure *cl, struct timer_list *timer); +void __closure_flush_sync(struct closure *cl, struct timer_list *timer); + +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + +void closure_debug_create(struct closure *cl); +void closure_debug_destroy(struct closure *cl); + +#else + +static inline void closure_debug_create(struct closure *cl) {} +static inline void closure_debug_destroy(struct closure *cl) {} + +#endif + +static inline void closure_set_ip(struct closure *cl) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + cl->ip = _THIS_IP_; +#endif +} + +static inline void closure_set_ret_ip(struct closure *cl) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + cl->ip = _RET_IP_; +#endif +} + +static inline void closure_get(struct closure *cl) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + BUG_ON((atomic_inc_return(&cl->remaining) & + CLOSURE_REMAINING_MASK) <= 1); +#else + atomic_inc(&cl->remaining); +#endif +} + +static inline void closure_set_stopped(struct closure *cl) +{ + atomic_sub(CLOSURE_RUNNING, &cl->remaining); +} + +static inline bool closure_is_stopped(struct closure *cl) +{ + return !(atomic_read(&cl->remaining) & CLOSURE_RUNNING); +} + +static inline bool closure_is_unlocked(struct closure *cl) +{ + return atomic_read(&cl->remaining) == -1; +} + +static inline void do_closure_init(struct closure *cl, struct closure *parent, + bool running) +{ + switch (cl->type) { + case TYPE_closure_with_timer: + case TYPE_closure_with_waitlist_and_timer: + do_closure_timer_init(cl); + default: + break; + } + + cl->parent = parent; + if (parent) + closure_get(parent); + + if (running) { + closure_debug_create(cl); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); + } else + atomic_set(&cl->remaining, -1); + + closure_set_ip(cl); +} + +/* + * Hack to get at the embedded closure if there is one, by doing an unsafe cast: + * the result of __closure_type() is thrown away, it's used merely for type + * checking. + */ +#define __to_internal_closure(cl) \ +({ \ + BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE); \ + (struct closure *) cl; \ +}) + +#define closure_init_type(cl, parent, running) \ +do { \ + struct closure *_cl = __to_internal_closure(cl); \ + _cl->type = __closure_type(*(cl)); \ + do_closure_init(_cl, parent, running); \ +} while (0) + +/** + * __closure_init() - Initialize a closure, skipping the memset() + * + * May be used instead of closure_init() when memory has already been zeroed. + */ +#define __closure_init(cl, parent) \ + closure_init_type(cl, parent, true) + +/** + * closure_init() - Initialize a closure, setting the refcount to 1 + * @cl: closure to initialize + * @parent: parent of the new closure. cl will take a refcount on it for its + * lifetime; may be NULL. + */ +#define closure_init(cl, parent) \ +do { \ + memset((cl), 0, sizeof(*(cl))); \ + __closure_init(cl, parent); \ +} while (0) + +static inline void closure_init_stack(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER| + CLOSURE_BLOCKING|CLOSURE_STACK); +} + +/** + * closure_init_unlocked() - Initialize a closure but leave it unlocked. + * @cl: closure to initialize + * + * For when the closure will be used as a lock. The closure may not be used + * until after a closure_lock() or closure_trylock(). + */ +#define closure_init_unlocked(cl) \ +do { \ + memset((cl), 0, sizeof(*(cl))); \ + closure_init_type(cl, NULL, false); \ +} while (0) + +/** + * closure_lock() - lock and initialize a closure. + * @cl: the closure to lock + * @parent: the new parent for this closure + * + * The closure must be of one of the types that has a waitlist (otherwise we + * wouldn't be able to sleep on contention). + * + * @parent has exactly the same meaning as in closure_init(); if non null, the + * closure will take a reference on @parent which will be released when it is + * unlocked. + */ +#define closure_lock(cl, parent) \ + __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait) + +/** + * closure_delay() - delay some number of jiffies + * @cl: the closure that will sleep + * @delay: the delay in jiffies + * + * Takes a refcount on @cl which will be released after @delay jiffies; this may + * be used to have a function run after a delay with continue_at(), or + * closure_sync() may be used for a convoluted version of msleep(). + */ +#define closure_delay(cl, delay) \ + __closure_delay(__to_internal_closure(cl), delay, &(cl)->timer) + +#define closure_flush(cl) \ + __closure_flush(__to_internal_closure(cl), &(cl)->timer) + +#define closure_flush_sync(cl) \ + __closure_flush_sync(__to_internal_closure(cl), &(cl)->timer) + +static inline void __closure_end_sleep(struct closure *cl) +{ + __set_current_state(TASK_RUNNING); + + if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING) + atomic_sub(CLOSURE_SLEEPING, &cl->remaining); +} + +static inline void __closure_start_sleep(struct closure *cl) +{ + closure_set_ip(cl); + cl->task = current; + set_current_state(TASK_UNINTERRUPTIBLE); + + if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING)) + atomic_add(CLOSURE_SLEEPING, &cl->remaining); +} + +/** + * closure_blocking() - returns true if the closure is in blocking mode. + * + * If a closure is in blocking mode, closure_wait_event() will sleep until the + * condition is true instead of waiting asynchronously. + */ +static inline bool closure_blocking(struct closure *cl) +{ + return atomic_read(&cl->remaining) & CLOSURE_BLOCKING; +} + +/** + * set_closure_blocking() - put a closure in blocking mode. + * + * If a closure is in blocking mode, closure_wait_event() will sleep until the + * condition is true instead of waiting asynchronously. + * + * Not thread safe - can only be called by the thread running the closure. + */ +static inline void set_closure_blocking(struct closure *cl) +{ + if (!closure_blocking(cl)) + atomic_add(CLOSURE_BLOCKING, &cl->remaining); +} + +/* + * Not thread safe - can only be called by the thread running the closure. + */ +static inline void clear_closure_blocking(struct closure *cl) +{ + if (closure_blocking(cl)) + atomic_sub(CLOSURE_BLOCKING, &cl->remaining); +} + +/** + * closure_wake_up() - wake up all closures on a wait list. + */ +static inline void closure_wake_up(struct closure_waitlist *list) +{ + smp_mb(); + __closure_wake_up(list); +} + +/* + * Wait on an event, synchronously or asynchronously - analogous to wait_event() + * but for closures. + * + * The loop is oddly structured so as to avoid a race; we must check the + * condition again after we've added ourself to the waitlist. We know if we were + * already on the waitlist because closure_wait() returns false; thus, we only + * schedule or break if closure_wait() returns false. If it returns true, we + * just loop again - rechecking the condition. + * + * The __closure_wake_up() is necessary because we may race with the event + * becoming true; i.e. we see event false -> wait -> recheck condition, but the + * thread that made the event true may have called closure_wake_up() before we + * added ourself to the wait list. + * + * We have to call closure_sync() at the end instead of just + * __closure_end_sleep() because a different thread might've called + * closure_wake_up() before us and gotten preempted before they dropped the + * refcount on our closure. If this was a stack allocated closure, that would be + * bad. + */ +#define __closure_wait_event(list, cl, condition, _block) \ +({ \ + bool block = _block; \ + typeof(condition) ret; \ + \ + while (1) { \ + ret = (condition); \ + if (ret) { \ + __closure_wake_up(list); \ + if (block) \ + closure_sync(cl); \ + \ + break; \ + } \ + \ + if (block) \ + __closure_start_sleep(cl); \ + \ + if (!closure_wait(list, cl)) { \ + if (!block) \ + break; \ + \ + schedule(); \ + } \ + } \ + \ + ret; \ +}) + +/** + * closure_wait_event() - wait on a condition, synchronously or asynchronously. + * @list: the wait list to wait on + * @cl: the closure that is doing the waiting + * @condition: a C expression for the event to wait for + * + * If the closure is in blocking mode, sleeps until the @condition evaluates to + * true - exactly like wait_event(). + * + * If the closure is not in blocking mode, waits asynchronously; if the + * condition is currently false the @cl is put onto @list and returns. @list + * owns a refcount on @cl; closure_sync() or continue_at() may be used later to + * wait for another thread to wake up @list, which drops the refcount on @cl. + * + * Returns the value of @condition; @cl will be on @list iff @condition was + * false. + * + * closure_wake_up(@list) must be called after changing any variable that could + * cause @condition to become true. + */ +#define closure_wait_event(list, cl, condition) \ + __closure_wait_event(list, cl, condition, closure_blocking(cl)) + +#define closure_wait_event_async(list, cl, condition) \ + __closure_wait_event(list, cl, condition, false) + +#define closure_wait_event_sync(list, cl, condition) \ + __closure_wait_event(list, cl, condition, true) + +static inline void set_closure_fn(struct closure *cl, closure_fn *fn, + struct workqueue_struct *wq) +{ + BUG_ON(object_is_on_stack(cl)); + closure_set_ip(cl); + cl->fn = fn; + cl->wq = wq; + /* between atomic_dec() in closure_put() */ + smp_mb__before_atomic_dec(); +} + +#define continue_at(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_sub(_cl, CLOSURE_RUNNING + 1); \ + return; \ +} while (0) + +#define closure_return(_cl) continue_at((_cl), NULL, NULL) + +#define continue_at_nobarrier(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_queue(cl); \ + return; \ +} while (0) + +#define closure_return_with_destructor(_cl, _destructor) \ +do { \ + set_closure_fn(_cl, _destructor, NULL); \ + closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ + return; \ +} while (0) + +static inline void closure_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + closure_init(cl, parent); + continue_at_nobarrier(cl, fn, wq); +} + +static inline void closure_trylock_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + if (closure_trylock(cl, parent)) + continue_at_nobarrier(cl, fn, wq); +} + +#endif /* _LINUX_CLOSURE_H */ diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c new file mode 100644 index 000000000000..4b37ef2b80e5 --- /dev/null +++ b/drivers/md/bcache/debug.c @@ -0,0 +1,563 @@ +/* + * Assorted bcache debug code + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include + +static struct dentry *debug; + +const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) +{ + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(c, k, i)) { + struct cache *ca = PTR_CACHE(c, k, i); + size_t bucket = PTR_BUCKET_NR(c, k, i); + size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); + + if (KEY_SIZE(k) + r > c->sb.bucket_size) + return "bad, length too big"; + if (bucket < ca->sb.first_bucket) + return "bad, short offset"; + if (bucket >= ca->sb.nbuckets) + return "bad, offset past end of device"; + if (ptr_stale(c, k, i)) + return "stale"; + } + + if (!bkey_cmp(k, &ZERO_KEY)) + return "bad, null key"; + if (!KEY_PTRS(k)) + return "bad, no pointers"; + if (!KEY_SIZE(k)) + return "zeroed key"; + return ""; +} + +struct keyprint_hack bch_pkey(const struct bkey *k) +{ + unsigned i = 0; + struct keyprint_hack r; + char *out = r.s, *end = r.s + KEYHACK_SIZE; + +#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) + + p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k)); + + if (KEY_PTRS(k)) + while (1) { + p("%llu:%llu gen %llu", + PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i)); + + if (++i == KEY_PTRS(k)) + break; + + p(", "); + } + + p("]"); + + if (KEY_DIRTY(k)) + p(" dirty"); + if (KEY_CSUM(k)) + p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]); +#undef p + return r; +} + +struct keyprint_hack bch_pbtree(const struct btree *b) +{ + struct keyprint_hack r; + + snprintf(r.s, 40, "%li level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0), + b->level, b->c->root ? b->c->root->level : -1); + return r; +} + +#if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG) + +static bool skipped_backwards(struct btree *b, struct bkey *k) +{ + return bkey_cmp(k, (!b->level) + ? &START_KEY(bkey_next(k)) + : bkey_next(k)) > 0; +} + +static void dump_bset(struct btree *b, struct bset *i) +{ + struct bkey *k; + unsigned j; + + for (k = i->start; k < end(i); k = bkey_next(k)) { + printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b), + (uint64_t *) k - i->d, i->keys, pkey(k)); + + for (j = 0; j < KEY_PTRS(k); j++) { + size_t n = PTR_BUCKET_NR(b->c, k, j); + printk(" bucket %zu", n); + + if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) + printk(" prio %i", + PTR_BUCKET(b->c, k, j)->prio); + } + + printk(" %s\n", bch_ptr_status(b->c, k)); + + if (bkey_next(k) < end(i) && + skipped_backwards(b, k)) + printk(KERN_ERR "Key skipped backwards\n"); + } +} + +#endif + +#ifdef CONFIG_BCACHE_DEBUG + +void bch_btree_verify(struct btree *b, struct bset *new) +{ + struct btree *v = b->c->verify_data; + struct closure cl; + closure_init_stack(&cl); + + if (!b->c->verify) + return; + + closure_wait_event(&b->io.wait, &cl, + atomic_read(&b->io.cl.remaining) == -1); + + mutex_lock(&b->c->verify_lock); + + bkey_copy(&v->key, &b->key); + v->written = 0; + v->level = b->level; + + bch_btree_read(v); + closure_wait_event(&v->io.wait, &cl, + atomic_read(&b->io.cl.remaining) == -1); + + if (new->keys != v->sets[0].data->keys || + memcmp(new->start, + v->sets[0].data->start, + (void *) end(new) - (void *) new->start)) { + unsigned i, j; + + console_lock(); + + printk(KERN_ERR "*** original memory node:\n"); + for (i = 0; i <= b->nsets; i++) + dump_bset(b, b->sets[i].data); + + printk(KERN_ERR "*** sorted memory node:\n"); + dump_bset(b, new); + + printk(KERN_ERR "*** on disk node:\n"); + dump_bset(v, v->sets[0].data); + + for (j = 0; j < new->keys; j++) + if (new->d[j] != v->sets[0].data->d[j]) + break; + + console_unlock(); + panic("verify failed at %u\n", j); + } + + mutex_unlock(&b->c->verify_lock); +} + +static void data_verify_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + closure_put(cl); +} + +void bch_data_verify(struct search *s) +{ + char name[BDEVNAME_SIZE]; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + struct closure *cl = &s->cl; + struct bio *check; + struct bio_vec *bv; + int i; + + if (!s->unaligned_bvec) + bio_for_each_segment(bv, s->orig_bio, i) + bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; + + check = bio_clone(s->orig_bio, GFP_NOIO); + if (!check) + return; + + if (bio_alloc_pages(check, GFP_NOIO)) + goto out_put; + + check->bi_rw = READ_SYNC; + check->bi_private = cl; + check->bi_end_io = data_verify_endio; + + closure_bio_submit(check, cl, &dc->disk); + closure_sync(cl); + + bio_for_each_segment(bv, s->orig_bio, i) { + void *p1 = kmap(bv->bv_page); + void *p2 = kmap(check->bi_io_vec[i].bv_page); + + if (memcmp(p1 + bv->bv_offset, + p2 + bv->bv_offset, + bv->bv_len)) + printk(KERN_ERR "bcache (%s): verify failed" + " at sector %llu\n", + bdevname(dc->bdev, name), + (uint64_t) s->orig_bio->bi_sector); + + kunmap(bv->bv_page); + kunmap(check->bi_io_vec[i].bv_page); + } + + __bio_for_each_segment(bv, check, i, 0) + __free_page(bv->bv_page); +out_put: + bio_put(check); +} + +#endif + +#ifdef CONFIG_BCACHE_EDEBUG + +unsigned bch_count_data(struct btree *b) +{ + unsigned ret = 0; + struct btree_iter iter; + struct bkey *k; + + if (!b->level) + for_each_key(b, k, &iter) + ret += KEY_SIZE(k); + return ret; +} + +static void vdump_bucket_and_panic(struct btree *b, const char *fmt, + va_list args) +{ + unsigned i; + + console_lock(); + + for (i = 0; i <= b->nsets; i++) + dump_bset(b, b->sets[i].data); + + vprintk(fmt, args); + + console_unlock(); + + panic("at %s\n", pbtree(b)); +} + +void bch_check_key_order_msg(struct btree *b, struct bset *i, + const char *fmt, ...) +{ + struct bkey *k; + + if (!i->keys) + return; + + for (k = i->start; bkey_next(k) < end(i); k = bkey_next(k)) + if (skipped_backwards(b, k)) { + va_list args; + va_start(args, fmt); + + vdump_bucket_and_panic(b, fmt, args); + va_end(args); + } +} + +void bch_check_keys(struct btree *b, const char *fmt, ...) +{ + va_list args; + struct bkey *k, *p = NULL; + struct btree_iter iter; + + if (b->level) + return; + + for_each_key(b, k, &iter) { + if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0) { + printk(KERN_ERR "Keys out of order:\n"); + goto bug; + } + + if (bch_ptr_invalid(b, k)) + continue; + + if (p && bkey_cmp(p, &START_KEY(k)) > 0) { + printk(KERN_ERR "Overlapping keys:\n"); + goto bug; + } + p = k; + } + return; +bug: + va_start(args, fmt); + vdump_bucket_and_panic(b, fmt, args); + va_end(args); +} + +#endif + +#ifdef CONFIG_DEBUG_FS + +/* XXX: cache set refcounting */ + +struct dump_iterator { + char buf[PAGE_SIZE]; + size_t bytes; + struct cache_set *c; + struct keybuf keys; +}; + +static bool dump_pred(struct keybuf *buf, struct bkey *k) +{ + return true; +} + +static ssize_t bch_dump_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iterator *i = file->private_data; + ssize_t ret = 0; + + while (size) { + struct keybuf_key *w; + unsigned bytes = min(i->bytes, size); + + int err = copy_to_user(buf, i->buf, bytes); + if (err) + return err; + + ret += bytes; + buf += bytes; + size -= bytes; + i->bytes -= bytes; + memmove(i->buf, i->buf + bytes, i->bytes); + + if (i->bytes) + break; + + w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); + if (!w) + break; + + i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key)); + bch_keybuf_del(&i->keys, w); + } + + return ret; +} + +static int bch_dump_open(struct inode *inode, struct file *file) +{ + struct cache_set *c = inode->i_private; + struct dump_iterator *i; + + i = kzalloc(sizeof(struct dump_iterator), GFP_KERNEL); + if (!i) + return -ENOMEM; + + file->private_data = i; + i->c = c; + bch_keybuf_init(&i->keys, dump_pred); + i->keys.last_scanned = KEY(0, 0, 0); + + return 0; +} + +static int bch_dump_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static const struct file_operations cache_set_debug_ops = { + .owner = THIS_MODULE, + .open = bch_dump_open, + .read = bch_dump_read, + .release = bch_dump_release +}; + +void bch_debug_init_cache_set(struct cache_set *c) +{ + if (!IS_ERR_OR_NULL(debug)) { + char name[50]; + snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); + + c->debug = debugfs_create_file(name, 0400, debug, c, + &cache_set_debug_ops); + } +} + +#endif + +#ifdef CONFIG_BCACHE_DEBUG +static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, + const char *buffer, size_t size) +{ + void dump(struct btree *b) + { + struct bset *i; + + for (i = b->sets[0].data; + index(i, b) < btree_blocks(b) && + i->seq == b->sets[0].data->seq; + i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c)) + dump_bset(b, i); + } + + struct cache_sb *sb; + struct cache_set *c; + struct btree *all[3], *b, *fill, *orig; + int j; + + struct btree_op op; + bch_btree_op_init_stack(&op); + + sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL); + if (!sb) + return -ENOMEM; + + sb->bucket_size = 128; + sb->block_size = 4; + + c = bch_cache_set_alloc(sb); + if (!c) + return -ENOMEM; + + for (j = 0; j < 3; j++) { + BUG_ON(list_empty(&c->btree_cache)); + all[j] = list_first_entry(&c->btree_cache, struct btree, list); + list_del_init(&all[j]->list); + + all[j]->key = KEY(0, 0, c->sb.bucket_size); + bkey_copy_key(&all[j]->key, &MAX_KEY); + } + + b = all[0]; + fill = all[1]; + orig = all[2]; + + while (1) { + for (j = 0; j < 3; j++) + all[j]->written = all[j]->nsets = 0; + + bch_bset_init_next(b); + + while (1) { + struct bset *i = write_block(b); + struct bkey *k = op.keys.top; + unsigned rand; + + bkey_init(k); + rand = get_random_int(); + + op.type = rand & 1 + ? BTREE_INSERT + : BTREE_REPLACE; + rand >>= 1; + + SET_KEY_SIZE(k, bucket_remainder(c, rand)); + rand >>= c->bucket_bits; + rand &= 1024 * 512 - 1; + rand += c->sb.bucket_size; + SET_KEY_OFFSET(k, rand); +#if 0 + SET_KEY_PTRS(k, 1); +#endif + bch_keylist_push(&op.keys); + bch_btree_insert_keys(b, &op); + + if (should_split(b) || + set_blocks(i, b->c) != + __set_blocks(i, i->keys + 15, b->c)) { + i->csum = csum_set(i); + + memcpy(write_block(fill), + i, set_bytes(i)); + + b->written += set_blocks(i, b->c); + fill->written = b->written; + if (b->written == btree_blocks(b)) + break; + + bch_btree_sort_lazy(b); + bch_bset_init_next(b); + } + } + + memcpy(orig->sets[0].data, + fill->sets[0].data, + btree_bytes(c)); + + bch_btree_sort(b); + fill->written = 0; + bch_btree_read_done(&fill->io.cl); + + if (b->sets[0].data->keys != fill->sets[0].data->keys || + memcmp(b->sets[0].data->start, + fill->sets[0].data->start, + b->sets[0].data->keys * sizeof(uint64_t))) { + struct bset *i = b->sets[0].data; + struct bkey *k, *l; + + for (k = i->start, + l = fill->sets[0].data->start; + k < end(i); + k = bkey_next(k), l = bkey_next(l)) + if (bkey_cmp(k, l) || + KEY_SIZE(k) != KEY_SIZE(l)) + pr_err("key %zi differs: %s " + "!= %s", (uint64_t *) k - i->d, + pkey(k), pkey(l)); + + for (j = 0; j < 3; j++) { + pr_err("**** Set %i ****", j); + dump(all[j]); + } + panic("\n"); + } + + pr_info("fuzz complete: %i keys", b->sets[0].data->keys); + } +} + +kobj_attribute_write(fuzz, btree_fuzz); +#endif + +void bch_debug_exit(void) +{ + if (!IS_ERR_OR_NULL(debug)) + debugfs_remove_recursive(debug); +} + +int __init bch_debug_init(struct kobject *kobj) +{ + int ret = 0; +#ifdef CONFIG_BCACHE_DEBUG + ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr); + if (ret) + return ret; +#endif + + debug = debugfs_create_dir("bcache", NULL); + return ret; +} diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h new file mode 100644 index 000000000000..f9378a218148 --- /dev/null +++ b/drivers/md/bcache/debug.h @@ -0,0 +1,54 @@ +#ifndef _BCACHE_DEBUG_H +#define _BCACHE_DEBUG_H + +/* Btree/bkey debug printing */ + +#define KEYHACK_SIZE 80 +struct keyprint_hack { + char s[KEYHACK_SIZE]; +}; + +struct keyprint_hack bch_pkey(const struct bkey *k); +struct keyprint_hack bch_pbtree(const struct btree *b); +#define pkey(k) (&bch_pkey(k).s[0]) +#define pbtree(b) (&bch_pbtree(b).s[0]) + +#ifdef CONFIG_BCACHE_EDEBUG + +unsigned bch_count_data(struct btree *); +void bch_check_key_order_msg(struct btree *, struct bset *, const char *, ...); +void bch_check_keys(struct btree *, const char *, ...); + +#define bch_check_key_order(b, i) \ + bch_check_key_order_msg(b, i, "keys out of order") +#define EBUG_ON(cond) BUG_ON(cond) + +#else /* EDEBUG */ + +#define bch_count_data(b) 0 +#define bch_check_key_order(b, i) do {} while (0) +#define bch_check_key_order_msg(b, i, ...) do {} while (0) +#define bch_check_keys(b, ...) do {} while (0) +#define EBUG_ON(cond) do {} while (0) + +#endif + +#ifdef CONFIG_BCACHE_DEBUG + +void bch_btree_verify(struct btree *, struct bset *); +void bch_data_verify(struct search *); + +#else /* DEBUG */ + +static inline void bch_btree_verify(struct btree *b, struct bset *i) {} +static inline void bch_data_verify(struct search *s) {}; + +#endif + +#ifdef CONFIG_DEBUG_FS +void bch_debug_init_cache_set(struct cache_set *); +#else +static inline void bch_debug_init_cache_set(struct cache_set *c) {} +#endif + +#endif diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c new file mode 100644 index 000000000000..f565512f6fac --- /dev/null +++ b/drivers/md/bcache/io.c @@ -0,0 +1,390 @@ +/* + * Some low level IO code, and hacks for various block layer limitations + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "bset.h" +#include "debug.h" + +static void bch_bi_idx_hack_endio(struct bio *bio, int error) +{ + struct bio *p = bio->bi_private; + + bio_endio(p, error); + bio_put(bio); +} + +static void bch_generic_make_request_hack(struct bio *bio) +{ + if (bio->bi_idx) { + struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); + + memcpy(clone->bi_io_vec, + bio_iovec(bio), + bio_segments(bio) * sizeof(struct bio_vec)); + + clone->bi_sector = bio->bi_sector; + clone->bi_bdev = bio->bi_bdev; + clone->bi_rw = bio->bi_rw; + clone->bi_vcnt = bio_segments(bio); + clone->bi_size = bio->bi_size; + + clone->bi_private = bio; + clone->bi_end_io = bch_bi_idx_hack_endio; + + bio = clone; + } + + generic_make_request(bio); +} + +/** + * bch_bio_split - split a bio + * @bio: bio to split + * @sectors: number of sectors to split from the front of @bio + * @gfp: gfp mask + * @bs: bio set to allocate from + * + * Allocates and returns a new bio which represents @sectors from the start of + * @bio, and updates @bio to represent the remaining sectors. + * + * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio + * unchanged. + * + * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a + * bvec boundry; it is the caller's responsibility to ensure that @bio is not + * freed before the split. + * + * If bch_bio_split() is running under generic_make_request(), it's not safe to + * allocate more than one bio from the same bio set. Therefore, if it is running + * under generic_make_request() it masks out __GFP_WAIT when doing the + * allocation. The caller must check for failure if there's any possibility of + * it being called from under generic_make_request(); it is then the caller's + * responsibility to retry from a safe context (by e.g. punting to workqueue). + */ +struct bio *bch_bio_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs) +{ + unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9; + struct bio_vec *bv; + struct bio *ret = NULL; + + BUG_ON(sectors <= 0); + + /* + * If we're being called from underneath generic_make_request() and we + * already allocated any bios from this bio set, we risk deadlock if we + * use the mempool. So instead, we possibly fail and let the caller punt + * to workqueue or somesuch and retry in a safe context. + */ + if (current->bio_list) + gfp &= ~__GFP_WAIT; + + if (sectors >= bio_sectors(bio)) + return bio; + + if (bio->bi_rw & REQ_DISCARD) { + ret = bio_alloc_bioset(gfp, 1, bs); + idx = 0; + goto out; + } + + bio_for_each_segment(bv, bio, idx) { + vcnt = idx - bio->bi_idx; + + if (!nbytes) { + ret = bio_alloc_bioset(gfp, vcnt, bs); + if (!ret) + return NULL; + + memcpy(ret->bi_io_vec, bio_iovec(bio), + sizeof(struct bio_vec) * vcnt); + + break; + } else if (nbytes < bv->bv_len) { + ret = bio_alloc_bioset(gfp, ++vcnt, bs); + if (!ret) + return NULL; + + memcpy(ret->bi_io_vec, bio_iovec(bio), + sizeof(struct bio_vec) * vcnt); + + ret->bi_io_vec[vcnt - 1].bv_len = nbytes; + bv->bv_offset += nbytes; + bv->bv_len -= nbytes; + break; + } + + nbytes -= bv->bv_len; + } +out: + ret->bi_bdev = bio->bi_bdev; + ret->bi_sector = bio->bi_sector; + ret->bi_size = sectors << 9; + ret->bi_rw = bio->bi_rw; + ret->bi_vcnt = vcnt; + ret->bi_max_vecs = vcnt; + + bio->bi_sector += sectors; + bio->bi_size -= sectors << 9; + bio->bi_idx = idx; + + if (bio_integrity(bio)) { + if (bio_integrity_clone(ret, bio, gfp)) { + bio_put(ret); + return NULL; + } + + bio_integrity_trim(ret, 0, bio_sectors(ret)); + bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio)); + } + + return ret; +} + +static unsigned bch_bio_max_sectors(struct bio *bio) +{ + unsigned ret = bio_sectors(bio); + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct bio_vec *bv, *end = bio_iovec(bio) + + min_t(int, bio_segments(bio), queue_max_segments(q)); + + struct bvec_merge_data bvm = { + .bi_bdev = bio->bi_bdev, + .bi_sector = bio->bi_sector, + .bi_size = 0, + .bi_rw = bio->bi_rw, + }; + + if (bio->bi_rw & REQ_DISCARD) + return min(ret, q->limits.max_discard_sectors); + + if (bio_segments(bio) > queue_max_segments(q) || + q->merge_bvec_fn) { + ret = 0; + + for (bv = bio_iovec(bio); bv < end; bv++) { + if (q->merge_bvec_fn && + q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) + break; + + ret += bv->bv_len >> 9; + bvm.bi_size += bv->bv_len; + } + + if (ret >= (BIO_MAX_PAGES * PAGE_SIZE) >> 9) + return (BIO_MAX_PAGES * PAGE_SIZE) >> 9; + } + + ret = min(ret, queue_max_sectors(q)); + + WARN_ON(!ret); + ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); + + return ret; +} + +static void bch_bio_submit_split_done(struct closure *cl) +{ + struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); + + s->bio->bi_end_io = s->bi_end_io; + s->bio->bi_private = s->bi_private; + bio_endio(s->bio, 0); + + closure_debug_destroy(&s->cl); + mempool_free(s, s->p->bio_split_hook); +} + +static void bch_bio_submit_split_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); + + if (error) + clear_bit(BIO_UPTODATE, &s->bio->bi_flags); + + bio_put(bio); + closure_put(cl); +} + +static void __bch_bio_submit_split(struct closure *cl) +{ + struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); + struct bio *bio = s->bio, *n; + + do { + n = bch_bio_split(bio, bch_bio_max_sectors(bio), + GFP_NOIO, s->p->bio_split); + if (!n) + continue_at(cl, __bch_bio_submit_split, system_wq); + + n->bi_end_io = bch_bio_submit_split_endio; + n->bi_private = cl; + + closure_get(cl); + bch_generic_make_request_hack(n); + } while (n != bio); + + continue_at(cl, bch_bio_submit_split_done, NULL); +} + +void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) +{ + struct bio_split_hook *s; + + if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) + goto submit; + + if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) + goto submit; + + s = mempool_alloc(p->bio_split_hook, GFP_NOIO); + + s->bio = bio; + s->p = p; + s->bi_end_io = bio->bi_end_io; + s->bi_private = bio->bi_private; + bio_get(bio); + + closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL); + return; +submit: + bch_generic_make_request_hack(bio); +} + +/* Bios with headers */ + +void bch_bbio_free(struct bio *bio, struct cache_set *c) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + mempool_free(b, c->bio_meta); +} + +struct bio *bch_bbio_alloc(struct cache_set *c) +{ + struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); + struct bio *bio = &b->bio; + + bio_init(bio); + bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; + bio->bi_max_vecs = bucket_pages(c); + bio->bi_io_vec = bio->bi_inline_vecs; + + return bio; +} + +void __bch_submit_bbio(struct bio *bio, struct cache_set *c) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + + bio->bi_sector = PTR_OFFSET(&b->key, 0); + bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; + + b->submit_time_us = local_clock_us(); + closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); +} + +void bch_submit_bbio(struct bio *bio, struct cache_set *c, + struct bkey *k, unsigned ptr) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + bch_bkey_copy_single_ptr(&b->key, k, ptr); + __bch_submit_bbio(bio, c); +} + +/* IO errors */ + +void bch_count_io_errors(struct cache *ca, int error, const char *m) +{ + /* + * The halflife of an error is: + * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh + */ + + if (ca->set->error_decay) { + unsigned count = atomic_inc_return(&ca->io_count); + + while (count > ca->set->error_decay) { + unsigned errors; + unsigned old = count; + unsigned new = count - ca->set->error_decay; + + /* + * First we subtract refresh from count; each time we + * succesfully do so, we rescale the errors once: + */ + + count = atomic_cmpxchg(&ca->io_count, old, new); + + if (count == old) { + count = new; + + errors = atomic_read(&ca->io_errors); + do { + old = errors; + new = ((uint64_t) errors * 127) / 128; + errors = atomic_cmpxchg(&ca->io_errors, + old, new); + } while (old != errors); + } + } + } + + if (error) { + char buf[BDEVNAME_SIZE]; + unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, + &ca->io_errors); + errors >>= IO_ERROR_SHIFT; + + if (errors < ca->set->error_limit) + pr_err("%s: IO error on %s, recovering", + bdevname(ca->bdev, buf), m); + else + bch_cache_set_error(ca->set, + "%s: too many IO errors %s", + bdevname(ca->bdev, buf), m); + } +} + +void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, + int error, const char *m) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + struct cache *ca = PTR_CACHE(c, &b->key, 0); + + unsigned threshold = bio->bi_rw & REQ_WRITE + ? c->congested_write_threshold_us + : c->congested_read_threshold_us; + + if (threshold) { + unsigned t = local_clock_us(); + + int us = t - b->submit_time_us; + int congested = atomic_read(&c->congested); + + if (us > (int) threshold) { + int ms = us / 1024; + c->congested_last_us = t; + + ms = min(ms, CONGESTED_MAX + congested); + atomic_sub(ms, &c->congested); + } else if (congested < 0) + atomic_inc(&c->congested); + } + + bch_count_io_errors(ca, error, m); +} + +void bch_bbio_endio(struct cache_set *c, struct bio *bio, + int error, const char *m) +{ + struct closure *cl = bio->bi_private; + + bch_bbio_count_io_errors(c, bio, error, m); + bio_put(bio); + closure_put(cl); +} diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c new file mode 100644 index 000000000000..c871ffaabbb0 --- /dev/null +++ b/drivers/md/bcache/journal.c @@ -0,0 +1,785 @@ +/* + * bcache journalling code, for btree insertions + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +/* + * Journal replay/recovery: + * + * This code is all driven from run_cache_set(); we first read the journal + * entries, do some other stuff, then we mark all the keys in the journal + * entries (same as garbage collection would), then we replay them - reinserting + * them into the cache in precisely the same order as they appear in the + * journal. + * + * We only journal keys that go in leaf nodes, which simplifies things quite a + * bit. + */ + +static void journal_read_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + closure_put(cl); +} + +static int journal_read_bucket(struct cache *ca, struct list_head *list, + struct btree_op *op, unsigned bucket_index) +{ + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->bio; + + struct journal_replay *i; + struct jset *j, *data = ca->set->journal.w[0].data; + unsigned len, left, offset = 0; + int ret = 0; + sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); + + pr_debug("reading %llu", (uint64_t) bucket); + + while (offset < ca->sb.bucket_size) { +reread: left = ca->sb.bucket_size - offset; + len = min_t(unsigned, left, PAGE_SECTORS * 8); + + bio_reset(bio); + bio->bi_sector = bucket + offset; + bio->bi_bdev = ca->bdev; + bio->bi_rw = READ; + bio->bi_size = len << 9; + + bio->bi_end_io = journal_read_endio; + bio->bi_private = &op->cl; + bio_map(bio, data); + + closure_bio_submit(bio, &op->cl, ca); + closure_sync(&op->cl); + + /* This function could be simpler now since we no longer write + * journal entries that overlap bucket boundaries; this means + * the start of a bucket will always have a valid journal entry + * if it has any journal entries at all. + */ + + j = data; + while (len) { + struct list_head *where; + size_t blocks, bytes = set_bytes(j); + + if (j->magic != jset_magic(ca->set)) + return ret; + + if (bytes > left << 9) + return ret; + + if (bytes > len << 9) + goto reread; + + if (j->csum != csum_set(j)) + return ret; + + blocks = set_blocks(j, ca->set); + + while (!list_empty(list)) { + i = list_first_entry(list, + struct journal_replay, list); + if (i->j.seq >= j->last_seq) + break; + list_del(&i->list); + kfree(i); + } + + list_for_each_entry_reverse(i, list, list) { + if (j->seq == i->j.seq) + goto next_set; + + if (j->seq < i->j.last_seq) + goto next_set; + + if (j->seq > i->j.seq) { + where = &i->list; + goto add; + } + } + + where = list; +add: + i = kmalloc(offsetof(struct journal_replay, j) + + bytes, GFP_KERNEL); + if (!i) + return -ENOMEM; + memcpy(&i->j, j, bytes); + list_add(&i->list, where); + ret = 1; + + ja->seq[bucket_index] = j->seq; +next_set: + offset += blocks * ca->sb.block_size; + len -= blocks * ca->sb.block_size; + j = ((void *) j) + blocks * block_bytes(ca); + } + } + + return ret; +} + +int bch_journal_read(struct cache_set *c, struct list_head *list, + struct btree_op *op) +{ +#define read_bucket(b) \ + ({ \ + int ret = journal_read_bucket(ca, list, op, b); \ + __set_bit(b, bitmap); \ + if (ret < 0) \ + return ret; \ + ret; \ + }) + + struct cache *ca; + unsigned iter; + + for_each_cache(ca, c, iter) { + struct journal_device *ja = &ca->journal; + unsigned long bitmap[SB_JOURNAL_BUCKETS / BITS_PER_LONG]; + unsigned i, l, r, m; + uint64_t seq; + + bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); + pr_debug("%u journal buckets", ca->sb.njournal_buckets); + + /* Read journal buckets ordered by golden ratio hash to quickly + * find a sequence of buckets with valid journal entries + */ + for (i = 0; i < ca->sb.njournal_buckets; i++) { + l = (i * 2654435769U) % ca->sb.njournal_buckets; + + if (test_bit(l, bitmap)) + break; + + if (read_bucket(l)) + goto bsearch; + } + + /* If that fails, check all the buckets we haven't checked + * already + */ + pr_debug("falling back to linear search"); + + for (l = 0; l < ca->sb.njournal_buckets; l++) { + if (test_bit(l, bitmap)) + continue; + + if (read_bucket(l)) + goto bsearch; + } +bsearch: + /* Binary search */ + m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); + pr_debug("starting binary search, l %u r %u", l, r); + + while (l + 1 < r) { + m = (l + r) >> 1; + + if (read_bucket(m)) + l = m; + else + r = m; + } + + /* Read buckets in reverse order until we stop finding more + * journal entries + */ + pr_debug("finishing up"); + l = m; + + while (1) { + if (!l--) + l = ca->sb.njournal_buckets - 1; + + if (l == m) + break; + + if (test_bit(l, bitmap)) + continue; + + if (!read_bucket(l)) + break; + } + + seq = 0; + + for (i = 0; i < ca->sb.njournal_buckets; i++) + if (ja->seq[i] > seq) { + seq = ja->seq[i]; + ja->cur_idx = ja->discard_idx = + ja->last_idx = i; + + } + } + + c->journal.seq = list_entry(list->prev, + struct journal_replay, + list)->j.seq; + + return 0; +#undef read_bucket +} + +void bch_journal_mark(struct cache_set *c, struct list_head *list) +{ + atomic_t p = { 0 }; + struct bkey *k; + struct journal_replay *i; + struct journal *j = &c->journal; + uint64_t last = j->seq; + + /* + * journal.pin should never fill up - we never write a journal + * entry when it would fill up. But if for some reason it does, we + * iterate over the list in reverse order so that we can just skip that + * refcount instead of bugging. + */ + + list_for_each_entry_reverse(i, list, list) { + BUG_ON(last < i->j.seq); + i->pin = NULL; + + while (last-- != i->j.seq) + if (fifo_free(&j->pin) > 1) { + fifo_push_front(&j->pin, p); + atomic_set(&fifo_front(&j->pin), 0); + } + + if (fifo_free(&j->pin) > 1) { + fifo_push_front(&j->pin, p); + i->pin = &fifo_front(&j->pin); + atomic_set(i->pin, 1); + } + + for (k = i->j.start; + k < end(&i->j); + k = bkey_next(k)) { + unsigned j; + + for (j = 0; j < KEY_PTRS(k); j++) { + struct bucket *g = PTR_BUCKET(c, k, j); + atomic_inc(&g->pin); + + if (g->prio == BTREE_PRIO && + !ptr_stale(c, k, j)) + g->prio = INITIAL_PRIO; + } + + __bch_btree_mark_key(c, 0, k); + } + } +} + +int bch_journal_replay(struct cache_set *s, struct list_head *list, + struct btree_op *op) +{ + int ret = 0, keys = 0, entries = 0; + struct bkey *k; + struct journal_replay *i = + list_entry(list->prev, struct journal_replay, list); + + uint64_t start = i->j.last_seq, end = i->j.seq, n = start; + + list_for_each_entry(i, list, list) { + BUG_ON(i->pin && atomic_read(i->pin) != 1); + + if (n != i->j.seq) + pr_err("journal entries %llu-%llu " + "missing! (replaying %llu-%llu)\n", + n, i->j.seq - 1, start, end); + + for (k = i->j.start; + k < end(&i->j); + k = bkey_next(k)) { + pr_debug("%s", pkey(k)); + bkey_copy(op->keys.top, k); + bch_keylist_push(&op->keys); + + op->journal = i->pin; + atomic_inc(op->journal); + + ret = bch_btree_insert(op, s); + if (ret) + goto err; + + BUG_ON(!bch_keylist_empty(&op->keys)); + keys++; + + cond_resched(); + } + + if (i->pin) + atomic_dec(i->pin); + n = i->j.seq + 1; + entries++; + } + + pr_info("journal replay done, %i keys in %i entries, seq %llu", + keys, entries, end); + + while (!list_empty(list)) { + i = list_first_entry(list, struct journal_replay, list); + list_del(&i->list); + kfree(i); + } +err: + closure_sync(&op->cl); + return ret; +} + +/* Journalling */ + +static void btree_flush_write(struct cache_set *c) +{ + /* + * Try to find the btree node with that references the oldest journal + * entry, best is our current candidate and is locked if non NULL: + */ + struct btree *b, *best = NULL; + unsigned iter; + + for_each_cached_btree(b, c, iter) { + if (!down_write_trylock(&b->lock)) + continue; + + if (!btree_node_dirty(b) || + !btree_current_write(b)->journal) { + rw_unlock(true, b); + continue; + } + + if (!best) + best = b; + else if (journal_pin_cmp(c, + btree_current_write(best), + btree_current_write(b))) { + rw_unlock(true, best); + best = b; + } else + rw_unlock(true, b); + } + + if (best) + goto out; + + /* We can't find the best btree node, just pick the first */ + list_for_each_entry(b, &c->btree_cache, list) + if (!b->level && btree_node_dirty(b)) { + best = b; + rw_lock(true, best, best->level); + goto found; + } + +out: + if (!best) + return; +found: + if (btree_node_dirty(best)) + bch_btree_write(best, true, NULL); + rw_unlock(true, best); +} + +#define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1) + +static void journal_discard_endio(struct bio *bio, int error) +{ + struct journal_device *ja = + container_of(bio, struct journal_device, discard_bio); + struct cache *ca = container_of(ja, struct cache, journal); + + atomic_set(&ja->discard_in_flight, DISCARD_DONE); + + closure_wake_up(&ca->set->journal.wait); + closure_put(&ca->set->cl); +} + +static void journal_discard_work(struct work_struct *work) +{ + struct journal_device *ja = + container_of(work, struct journal_device, discard_work); + + submit_bio(0, &ja->discard_bio); +} + +static void do_journal_discard(struct cache *ca) +{ + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->discard_bio; + + if (!ca->discard) { + ja->discard_idx = ja->last_idx; + return; + } + + switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) { + case DISCARD_IN_FLIGHT: + return; + + case DISCARD_DONE: + ja->discard_idx = (ja->discard_idx + 1) % + ca->sb.njournal_buckets; + + atomic_set(&ja->discard_in_flight, DISCARD_READY); + /* fallthrough */ + + case DISCARD_READY: + if (ja->discard_idx == ja->last_idx) + return; + + atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); + + bio_init(bio); + bio->bi_sector = bucket_to_sector(ca->set, + ca->sb.d[ja->discard_idx]); + bio->bi_bdev = ca->bdev; + bio->bi_rw = REQ_WRITE|REQ_DISCARD; + bio->bi_max_vecs = 1; + bio->bi_io_vec = bio->bi_inline_vecs; + bio->bi_size = bucket_bytes(ca); + bio->bi_end_io = journal_discard_endio; + + closure_get(&ca->set->cl); + INIT_WORK(&ja->discard_work, journal_discard_work); + schedule_work(&ja->discard_work); + } +} + +static void journal_reclaim(struct cache_set *c) +{ + struct bkey *k = &c->journal.key; + struct cache *ca; + uint64_t last_seq; + unsigned iter, n = 0; + atomic_t p; + + while (!atomic_read(&fifo_front(&c->journal.pin))) + fifo_pop(&c->journal.pin, p); + + last_seq = last_seq(&c->journal); + + /* Update last_idx */ + + for_each_cache(ca, c, iter) { + struct journal_device *ja = &ca->journal; + + while (ja->last_idx != ja->cur_idx && + ja->seq[ja->last_idx] < last_seq) + ja->last_idx = (ja->last_idx + 1) % + ca->sb.njournal_buckets; + } + + for_each_cache(ca, c, iter) + do_journal_discard(ca); + + if (c->journal.blocks_free) + return; + + /* + * Allocate: + * XXX: Sort by free journal space + */ + + for_each_cache(ca, c, iter) { + struct journal_device *ja = &ca->journal; + unsigned next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; + + /* No space available on this device */ + if (next == ja->discard_idx) + continue; + + ja->cur_idx = next; + k->ptr[n++] = PTR(0, + bucket_to_sector(c, ca->sb.d[ja->cur_idx]), + ca->sb.nr_this_dev); + } + + bkey_init(k); + SET_KEY_PTRS(k, n); + + if (n) + c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; + + if (!journal_full(&c->journal)) + __closure_wake_up(&c->journal.wait); +} + +void bch_journal_next(struct journal *j) +{ + atomic_t p = { 1 }; + + j->cur = (j->cur == j->w) + ? &j->w[1] + : &j->w[0]; + + /* + * The fifo_push() needs to happen at the same time as j->seq is + * incremented for last_seq() to be calculated correctly + */ + BUG_ON(!fifo_push(&j->pin, p)); + atomic_set(&fifo_back(&j->pin), 1); + + j->cur->data->seq = ++j->seq; + j->cur->need_write = false; + j->cur->data->keys = 0; + + if (fifo_full(&j->pin)) + pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); +} + +static void journal_write_endio(struct bio *bio, int error) +{ + struct journal_write *w = bio->bi_private; + + cache_set_err_on(error, w->c, "journal io error"); + closure_put(&w->c->journal.io.cl); +} + +static void journal_write(struct closure *); + +static void journal_write_done(struct closure *cl) +{ + struct journal *j = container_of(cl, struct journal, io.cl); + struct cache_set *c = container_of(j, struct cache_set, journal); + + struct journal_write *w = (j->cur == j->w) + ? &j->w[1] + : &j->w[0]; + + __closure_wake_up(&w->wait); + + if (c->journal_delay_ms) + closure_delay(&j->io, msecs_to_jiffies(c->journal_delay_ms)); + + continue_at(cl, journal_write, system_wq); +} + +static void journal_write_unlocked(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, journal.io.cl); + struct cache *ca; + struct journal_write *w = c->journal.cur; + struct bkey *k = &c->journal.key; + unsigned i, sectors = set_blocks(w->data, c) * c->sb.block_size; + + struct bio *bio; + struct bio_list list; + bio_list_init(&list); + + if (!w->need_write) { + /* + * XXX: have to unlock closure before we unlock journal lock, + * else we race with bch_journal(). But this way we race + * against cache set unregister. Doh. + */ + set_closure_fn(cl, NULL, NULL); + closure_sub(cl, CLOSURE_RUNNING + 1); + spin_unlock(&c->journal.lock); + return; + } else if (journal_full(&c->journal)) { + journal_reclaim(c); + spin_unlock(&c->journal.lock); + + btree_flush_write(c); + continue_at(cl, journal_write, system_wq); + } + + c->journal.blocks_free -= set_blocks(w->data, c); + + w->data->btree_level = c->root->level; + + bkey_copy(&w->data->btree_root, &c->root->key); + bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); + + for_each_cache(ca, c, i) + w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; + + w->data->magic = jset_magic(c); + w->data->version = BCACHE_JSET_VERSION; + w->data->last_seq = last_seq(&c->journal); + w->data->csum = csum_set(w->data); + + for (i = 0; i < KEY_PTRS(k); i++) { + ca = PTR_CACHE(c, k, i); + bio = &ca->journal.bio; + + atomic_long_add(sectors, &ca->meta_sectors_written); + + bio_reset(bio); + bio->bi_sector = PTR_OFFSET(k, i); + bio->bi_bdev = ca->bdev; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_size = sectors << 9; + + bio->bi_end_io = journal_write_endio; + bio->bi_private = w; + bio_map(bio, w->data); + + trace_bcache_journal_write(bio); + bio_list_add(&list, bio); + + SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + sectors); + + ca->journal.seq[ca->journal.cur_idx] = w->data->seq; + } + + atomic_dec_bug(&fifo_back(&c->journal.pin)); + bch_journal_next(&c->journal); + journal_reclaim(c); + + spin_unlock(&c->journal.lock); + + while ((bio = bio_list_pop(&list))) + closure_bio_submit(bio, cl, c->cache[0]); + + continue_at(cl, journal_write_done, NULL); +} + +static void journal_write(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, journal.io.cl); + + spin_lock(&c->journal.lock); + journal_write_unlocked(cl); +} + +static void __journal_try_write(struct cache_set *c, bool noflush) +{ + struct closure *cl = &c->journal.io.cl; + + if (!closure_trylock(cl, &c->cl)) + spin_unlock(&c->journal.lock); + else if (noflush && journal_full(&c->journal)) { + spin_unlock(&c->journal.lock); + continue_at(cl, journal_write, system_wq); + } else + journal_write_unlocked(cl); +} + +#define journal_try_write(c) __journal_try_write(c, false) + +void bch_journal_meta(struct cache_set *c, struct closure *cl) +{ + struct journal_write *w; + + if (CACHE_SYNC(&c->sb)) { + spin_lock(&c->journal.lock); + + w = c->journal.cur; + w->need_write = true; + + if (cl) + BUG_ON(!closure_wait(&w->wait, cl)); + + __journal_try_write(c, true); + } +} + +/* + * Entry point to the journalling code - bio_insert() and btree_invalidate() + * pass bch_journal() a list of keys to be journalled, and then + * bch_journal() hands those same keys off to btree_insert_async() + */ + +void bch_journal(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct cache_set *c = op->c; + struct journal_write *w; + size_t b, n = ((uint64_t *) op->keys.top) - op->keys.list; + + if (op->type != BTREE_INSERT || + !CACHE_SYNC(&c->sb)) + goto out; + + /* + * If we're looping because we errored, might already be waiting on + * another journal write: + */ + while (atomic_read(&cl->parent->remaining) & CLOSURE_WAITING) + closure_sync(cl->parent); + + spin_lock(&c->journal.lock); + + if (journal_full(&c->journal)) { + /* XXX: tracepoint */ + closure_wait(&c->journal.wait, cl); + + journal_reclaim(c); + spin_unlock(&c->journal.lock); + + btree_flush_write(c); + continue_at(cl, bch_journal, bcache_wq); + } + + w = c->journal.cur; + w->need_write = true; + b = __set_blocks(w->data, w->data->keys + n, c); + + if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS || + b > c->journal.blocks_free) { + /* XXX: If we were inserting so many keys that they won't fit in + * an _empty_ journal write, we'll deadlock. For now, handle + * this in bch_keylist_realloc() - but something to think about. + */ + BUG_ON(!w->data->keys); + + /* XXX: tracepoint */ + BUG_ON(!closure_wait(&w->wait, cl)); + + closure_flush(&c->journal.io); + + journal_try_write(c); + continue_at(cl, bch_journal, bcache_wq); + } + + memcpy(end(w->data), op->keys.list, n * sizeof(uint64_t)); + w->data->keys += n; + + op->journal = &fifo_back(&c->journal.pin); + atomic_inc(op->journal); + + if (op->flush_journal) { + closure_flush(&c->journal.io); + closure_wait(&w->wait, cl->parent); + } + + journal_try_write(c); +out: + bch_btree_insert_async(cl); +} + +void bch_journal_free(struct cache_set *c) +{ + free_pages((unsigned long) c->journal.w[1].data, JSET_BITS); + free_pages((unsigned long) c->journal.w[0].data, JSET_BITS); + free_fifo(&c->journal.pin); +} + +int bch_journal_alloc(struct cache_set *c) +{ + struct journal *j = &c->journal; + + closure_init_unlocked(&j->io); + spin_lock_init(&j->lock); + + c->journal_delay_ms = 100; + + j->w[0].c = c; + j->w[1].c = c; + + if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || + !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || + !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS))) + return -ENOMEM; + + return 0; +} diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h new file mode 100644 index 000000000000..3d7851274b04 --- /dev/null +++ b/drivers/md/bcache/journal.h @@ -0,0 +1,215 @@ +#ifndef _BCACHE_JOURNAL_H +#define _BCACHE_JOURNAL_H + +/* + * THE JOURNAL: + * + * The journal is treated as a circular buffer of buckets - a journal entry + * never spans two buckets. This means (not implemented yet) we can resize the + * journal at runtime, and will be needed for bcache on raw flash support. + * + * Journal entries contain a list of keys, ordered by the time they were + * inserted; thus journal replay just has to reinsert the keys. + * + * We also keep some things in the journal header that are logically part of the + * superblock - all the things that are frequently updated. This is for future + * bcache on raw flash support; the superblock (which will become another + * journal) can't be moved or wear leveled, so it contains just enough + * information to find the main journal, and the superblock only has to be + * rewritten when we want to move/wear level the main journal. + * + * Currently, we don't journal BTREE_REPLACE operations - this will hopefully be + * fixed eventually. This isn't a bug - BTREE_REPLACE is used for insertions + * from cache misses, which don't have to be journaled, and for writeback and + * moving gc we work around it by flushing the btree to disk before updating the + * gc information. But it is a potential issue with incremental garbage + * collection, and it's fragile. + * + * OPEN JOURNAL ENTRIES: + * + * Each journal entry contains, in the header, the sequence number of the last + * journal entry still open - i.e. that has keys that haven't been flushed to + * disk in the btree. + * + * We track this by maintaining a refcount for every open journal entry, in a + * fifo; each entry in the fifo corresponds to a particular journal + * entry/sequence number. When the refcount at the tail of the fifo goes to + * zero, we pop it off - thus, the size of the fifo tells us the number of open + * journal entries + * + * We take a refcount on a journal entry when we add some keys to a journal + * entry that we're going to insert (held by struct btree_op), and then when we + * insert those keys into the btree the btree write we're setting up takes a + * copy of that refcount (held by struct btree_write). That refcount is dropped + * when the btree write completes. + * + * A struct btree_write can only hold a refcount on a single journal entry, but + * might contain keys for many journal entries - we handle this by making sure + * it always has a refcount on the _oldest_ journal entry of all the journal + * entries it has keys for. + * + * JOURNAL RECLAIM: + * + * As mentioned previously, our fifo of refcounts tells us the number of open + * journal entries; from that and the current journal sequence number we compute + * last_seq - the oldest journal entry we still need. We write last_seq in each + * journal entry, and we also have to keep track of where it exists on disk so + * we don't overwrite it when we loop around the journal. + * + * To do that we track, for each journal bucket, the sequence number of the + * newest journal entry it contains - if we don't need that journal entry we + * don't need anything in that bucket anymore. From that we track the last + * journal bucket we still need; all this is tracked in struct journal_device + * and updated by journal_reclaim(). + * + * JOURNAL FILLING UP: + * + * There are two ways the journal could fill up; either we could run out of + * space to write to, or we could have too many open journal entries and run out + * of room in the fifo of refcounts. Since those refcounts are decremented + * without any locking we can't safely resize that fifo, so we handle it the + * same way. + * + * If the journal fills up, we start flushing dirty btree nodes until we can + * allocate space for a journal write again - preferentially flushing btree + * nodes that are pinning the oldest journal entries first. + */ + +#define BCACHE_JSET_VERSION_UUIDv1 1 +/* Always latest UUID format */ +#define BCACHE_JSET_VERSION_UUID 1 +#define BCACHE_JSET_VERSION 1 + +/* + * On disk format for a journal entry: + * seq is monotonically increasing; every journal entry has its own unique + * sequence number. + * + * last_seq is the oldest journal entry that still has keys the btree hasn't + * flushed to disk yet. + * + * version is for on disk format changes. + */ +struct jset { + uint64_t csum; + uint64_t magic; + uint64_t seq; + uint32_t version; + uint32_t keys; + + uint64_t last_seq; + + BKEY_PADDED(uuid_bucket); + BKEY_PADDED(btree_root); + uint16_t btree_level; + uint16_t pad[3]; + + uint64_t prio_bucket[MAX_CACHES_PER_SET]; + + union { + struct bkey start[0]; + uint64_t d[0]; + }; +}; + +/* + * Only used for holding the journal entries we read in btree_journal_read() + * during cache_registration + */ +struct journal_replay { + struct list_head list; + atomic_t *pin; + struct jset j; +}; + +/* + * We put two of these in struct journal; we used them for writes to the + * journal that are being staged or in flight. + */ +struct journal_write { + struct jset *data; +#define JSET_BITS 3 + + struct cache_set *c; + struct closure_waitlist wait; + bool need_write; +}; + +/* Embedded in struct cache_set */ +struct journal { + spinlock_t lock; + /* used when waiting because the journal was full */ + struct closure_waitlist wait; + struct closure_with_timer io; + + /* Number of blocks free in the bucket(s) we're currently writing to */ + unsigned blocks_free; + uint64_t seq; + DECLARE_FIFO(atomic_t, pin); + + BKEY_PADDED(key); + + struct journal_write w[2], *cur; +}; + +/* + * Embedded in struct cache. First three fields refer to the array of journal + * buckets, in cache_sb. + */ +struct journal_device { + /* + * For each journal bucket, contains the max sequence number of the + * journal writes it contains - so we know when a bucket can be reused. + */ + uint64_t seq[SB_JOURNAL_BUCKETS]; + + /* Journal bucket we're currently writing to */ + unsigned cur_idx; + + /* Last journal bucket that still contains an open journal entry */ + unsigned last_idx; + + /* Next journal bucket to be discarded */ + unsigned discard_idx; + +#define DISCARD_READY 0 +#define DISCARD_IN_FLIGHT 1 +#define DISCARD_DONE 2 + /* 1 - discard in flight, -1 - discard completed */ + atomic_t discard_in_flight; + + struct work_struct discard_work; + struct bio discard_bio; + struct bio_vec discard_bv; + + /* Bio for journal reads/writes to this device */ + struct bio bio; + struct bio_vec bv[8]; +}; + +#define journal_pin_cmp(c, l, r) \ + (fifo_idx(&(c)->journal.pin, (l)->journal) > \ + fifo_idx(&(c)->journal.pin, (r)->journal)) + +#define JOURNAL_PIN 20000 + +#define journal_full(j) \ + (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1) + +struct closure; +struct cache_set; +struct btree_op; + +void bch_journal(struct closure *); +void bch_journal_next(struct journal *); +void bch_journal_mark(struct cache_set *, struct list_head *); +void bch_journal_meta(struct cache_set *, struct closure *); +int bch_journal_read(struct cache_set *, struct list_head *, + struct btree_op *); +int bch_journal_replay(struct cache_set *, struct list_head *, + struct btree_op *); + +void bch_journal_free(struct cache_set *); +int bch_journal_alloc(struct cache_set *); + +#endif /* _BCACHE_JOURNAL_H */ diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c new file mode 100644 index 000000000000..c69fc92b02cf --- /dev/null +++ b/drivers/md/bcache/movinggc.c @@ -0,0 +1,254 @@ +/* + * Moving/copying garbage collector + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +struct moving_io { + struct keybuf_key *w; + struct search s; + struct bbio bio; +}; + +static bool moving_pred(struct keybuf *buf, struct bkey *k) +{ + struct cache_set *c = container_of(buf, struct cache_set, + moving_gc_keys); + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) { + struct cache *ca = PTR_CACHE(c, k, i); + struct bucket *g = PTR_BUCKET(c, k, i); + + if (GC_SECTORS_USED(g) < ca->gc_move_threshold) + return true; + } + + return false; +} + +/* Moving GC - IO loop */ + +static void moving_io_destructor(struct closure *cl) +{ + struct moving_io *io = container_of(cl, struct moving_io, s.cl); + kfree(io); +} + +static void write_moving_finish(struct closure *cl) +{ + struct moving_io *io = container_of(cl, struct moving_io, s.cl); + struct bio *bio = &io->bio.bio; + struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt); + + while (bv-- != bio->bi_io_vec) + __free_page(bv->bv_page); + + pr_debug("%s %s", io->s.op.insert_collision + ? "collision moving" : "moved", + pkey(&io->w->key)); + + bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); + + atomic_dec_bug(&io->s.op.c->in_flight); + closure_wake_up(&io->s.op.c->moving_gc_wait); + + closure_return_with_destructor(cl, moving_io_destructor); +} + +static void read_moving_endio(struct bio *bio, int error) +{ + struct moving_io *io = container_of(bio->bi_private, + struct moving_io, s.cl); + + if (error) + io->s.error = error; + + bch_bbio_endio(io->s.op.c, bio, error, "reading data to move"); +} + +static void moving_init(struct moving_io *io) +{ + struct bio *bio = &io->bio.bio; + + bio_init(bio); + bio_get(bio); + bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + + bio->bi_size = KEY_SIZE(&io->w->key) << 9; + bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key), + PAGE_SECTORS); + bio->bi_private = &io->s.cl; + bio->bi_io_vec = bio->bi_inline_vecs; + bio_map(bio, NULL); +} + +static void write_moving(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct moving_io *io = container_of(s, struct moving_io, s); + + if (!s->error) { + trace_bcache_write_moving(&io->bio.bio); + + moving_init(io); + + io->bio.bio.bi_sector = KEY_START(&io->w->key); + s->op.lock = -1; + s->op.write_prio = 1; + s->op.cache_bio = &io->bio.bio; + + s->writeback = KEY_DIRTY(&io->w->key); + s->op.csum = KEY_CSUM(&io->w->key); + + s->op.type = BTREE_REPLACE; + bkey_copy(&s->op.replace, &io->w->key); + + closure_init(&s->op.cl, cl); + bch_insert_data(&s->op.cl); + } + + continue_at(cl, write_moving_finish, NULL); +} + +static void read_moving_submit(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct moving_io *io = container_of(s, struct moving_io, s); + struct bio *bio = &io->bio.bio; + + trace_bcache_read_moving(bio); + bch_submit_bbio(bio, s->op.c, &io->w->key, 0); + + continue_at(cl, write_moving, bch_gc_wq); +} + +static void read_moving(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, moving_gc); + struct keybuf_key *w; + struct moving_io *io; + struct bio *bio; + + /* XXX: if we error, background writeback could stall indefinitely */ + + while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { + w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY); + if (!w) + break; + + io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec) + * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), + GFP_KERNEL); + if (!io) + goto err; + + w->private = io; + io->w = w; + io->s.op.inode = KEY_INODE(&w->key); + io->s.op.c = c; + + moving_init(io); + bio = &io->bio.bio; + + bio->bi_rw = READ; + bio->bi_end_io = read_moving_endio; + + if (bio_alloc_pages(bio, GFP_KERNEL)) + goto err; + + pr_debug("%s", pkey(&w->key)); + + closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); + + if (atomic_inc_return(&c->in_flight) >= 64) { + closure_wait_event(&c->moving_gc_wait, cl, + atomic_read(&c->in_flight) < 64); + continue_at(cl, read_moving, bch_gc_wq); + } + } + + if (0) { +err: if (!IS_ERR_OR_NULL(w->private)) + kfree(w->private); + + bch_keybuf_del(&c->moving_gc_keys, w); + } + + closure_return(cl); +} + +void bch_moving_gc(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, gc.cl); + struct cache *ca; + struct bucket *b; + unsigned i; + + bool bucket_cmp(struct bucket *l, struct bucket *r) + { + return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); + } + + unsigned top(struct cache *ca) + { + return GC_SECTORS_USED(heap_peek(&ca->heap)); + } + + if (!c->copy_gc_enabled) + closure_return(cl); + + mutex_lock(&c->bucket_lock); + + for_each_cache(ca, c, i) { + unsigned sectors_to_move = 0; + unsigned reserve_sectors = ca->sb.bucket_size * + min(fifo_used(&ca->free), ca->free.size / 2); + + ca->heap.used = 0; + + for_each_bucket(b, ca) { + if (!GC_SECTORS_USED(b)) + continue; + + if (!heap_full(&ca->heap)) { + sectors_to_move += GC_SECTORS_USED(b); + heap_add(&ca->heap, b, bucket_cmp); + } else if (bucket_cmp(b, heap_peek(&ca->heap))) { + sectors_to_move -= top(ca); + sectors_to_move += GC_SECTORS_USED(b); + + ca->heap.data[0] = b; + heap_sift(&ca->heap, 0, bucket_cmp); + } + } + + while (sectors_to_move > reserve_sectors) { + heap_pop(&ca->heap, b, bucket_cmp); + sectors_to_move -= GC_SECTORS_USED(b); + } + + ca->gc_move_threshold = top(ca); + + pr_debug("threshold %u", ca->gc_move_threshold); + } + + mutex_unlock(&c->bucket_lock); + + c->moving_gc_keys.last_scanned = ZERO_KEY; + + closure_init(&c->moving_gc, cl); + read_moving(&c->moving_gc); + + closure_return(cl); +} + +void bch_moving_init_cache_set(struct cache_set *c) +{ + bch_keybuf_init(&c->moving_gc_keys, moving_pred); +} diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c new file mode 100644 index 000000000000..4f552de49aaa --- /dev/null +++ b/drivers/md/bcache/request.c @@ -0,0 +1,1409 @@ +/* + * Main bcache entry point - handle a read or a write request and decide what to + * do with it; the make_request functions are called by the block layer. + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include "blk-cgroup.h" + +#include + +#define CUTOFF_CACHE_ADD 95 +#define CUTOFF_CACHE_READA 90 +#define CUTOFF_WRITEBACK 50 +#define CUTOFF_WRITEBACK_SYNC 75 + +struct kmem_cache *bch_search_cache; + +static void check_should_skip(struct cached_dev *, struct search *); + +/* Cgroup interface */ + +#ifdef CONFIG_CGROUP_BCACHE +static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 }; + +static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup) +{ + struct cgroup_subsys_state *css; + return cgroup && + (css = cgroup_subsys_state(cgroup, bcache_subsys_id)) + ? container_of(css, struct bch_cgroup, css) + : &bcache_default_cgroup; +} + +struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio) +{ + struct cgroup_subsys_state *css = bio->bi_css + ? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id) + : task_subsys_state(current, bcache_subsys_id); + + return css + ? container_of(css, struct bch_cgroup, css) + : &bcache_default_cgroup; +} + +static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, loff_t *ppos) +{ + char tmp[1024]; + int len = snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, + cgroup_to_bcache(cgrp)->cache_mode + 1); + + if (len < 0) + return len; + + return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); +} + +static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + int v = read_string_list(buf, bch_cache_modes); + if (v < 0) + return v; + + cgroup_to_bcache(cgrp)->cache_mode = v - 1; + return 0; +} + +static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft) +{ + return cgroup_to_bcache(cgrp)->verify; +} + +static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + cgroup_to_bcache(cgrp)->verify = val; + return 0; +} + +static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_hits); +} + +static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_misses); +} + +static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp, + struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_bypass_hits); +} + +static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp, + struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_bypass_misses); +} + +static struct cftype bch_files[] = { + { + .name = "cache_mode", + .read = cache_mode_read, + .write_string = cache_mode_write, + }, + { + .name = "verify", + .read_u64 = bch_verify_read, + .write_u64 = bch_verify_write, + }, + { + .name = "cache_hits", + .read_u64 = bch_cache_hits_read, + }, + { + .name = "cache_misses", + .read_u64 = bch_cache_misses_read, + }, + { + .name = "cache_bypass_hits", + .read_u64 = bch_cache_bypass_hits_read, + }, + { + .name = "cache_bypass_misses", + .read_u64 = bch_cache_bypass_misses_read, + }, + { } /* terminate */ +}; + +static void init_bch_cgroup(struct bch_cgroup *cg) +{ + cg->cache_mode = -1; +} + +static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) +{ + struct bch_cgroup *cg; + + cg = kzalloc(sizeof(*cg), GFP_KERNEL); + if (!cg) + return ERR_PTR(-ENOMEM); + init_bch_cgroup(cg); + return &cg->css; +} + +static void bcachecg_destroy(struct cgroup *cgroup) +{ + struct bch_cgroup *cg = cgroup_to_bcache(cgroup); + free_css_id(&bcache_subsys, &cg->css); + kfree(cg); +} + +struct cgroup_subsys bcache_subsys = { + .create = bcachecg_create, + .destroy = bcachecg_destroy, + .subsys_id = bcache_subsys_id, + .name = "bcache", + .module = THIS_MODULE, +}; +EXPORT_SYMBOL_GPL(bcache_subsys); +#endif + +static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) +{ +#ifdef CONFIG_CGROUP_BCACHE + int r = bch_bio_to_cgroup(bio)->cache_mode; + if (r >= 0) + return r; +#endif + return BDEV_CACHE_MODE(&dc->sb); +} + +static bool verify(struct cached_dev *dc, struct bio *bio) +{ +#ifdef CONFIG_CGROUP_BCACHE + if (bch_bio_to_cgroup(bio)->verify) + return true; +#endif + return dc->verify; +} + +static void bio_csum(struct bio *bio, struct bkey *k) +{ + struct bio_vec *bv; + uint64_t csum = 0; + int i; + + bio_for_each_segment(bv, bio, i) { + void *d = kmap(bv->bv_page) + bv->bv_offset; + csum = crc64_update(csum, d, bv->bv_len); + kunmap(bv->bv_page); + } + + k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); +} + +/* Insert data into cache */ + +static void bio_invalidate(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct bio *bio = op->cache_bio; + + pr_debug("invalidating %i sectors from %llu", + bio_sectors(bio), (uint64_t) bio->bi_sector); + + while (bio_sectors(bio)) { + unsigned len = min(bio_sectors(bio), 1U << 14); + + if (bch_keylist_realloc(&op->keys, 0, op->c)) + goto out; + + bio->bi_sector += len; + bio->bi_size -= len << 9; + + bch_keylist_add(&op->keys, + &KEY(op->inode, bio->bi_sector, len)); + } + + op->insert_data_done = true; + bio_put(bio); +out: + continue_at(cl, bch_journal, bcache_wq); +} + +struct open_bucket { + struct list_head list; + struct task_struct *last; + unsigned sectors_free; + BKEY_PADDED(key); +}; + +void bch_open_buckets_free(struct cache_set *c) +{ + struct open_bucket *b; + + while (!list_empty(&c->data_buckets)) { + b = list_first_entry(&c->data_buckets, + struct open_bucket, list); + list_del(&b->list); + kfree(b); + } +} + +int bch_open_buckets_alloc(struct cache_set *c) +{ + int i; + + spin_lock_init(&c->data_bucket_lock); + + for (i = 0; i < 6; i++) { + struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return -ENOMEM; + + list_add(&b->list, &c->data_buckets); + } + + return 0; +} + +/* + * We keep multiple buckets open for writes, and try to segregate different + * write streams for better cache utilization: first we look for a bucket where + * the last write to it was sequential with the current write, and failing that + * we look for a bucket that was last used by the same task. + * + * The ideas is if you've got multiple tasks pulling data into the cache at the + * same time, you'll get better cache utilization if you try to segregate their + * data and preserve locality. + * + * For example, say you've starting Firefox at the same time you're copying a + * bunch of files. Firefox will likely end up being fairly hot and stay in the + * cache awhile, but the data you copied might not be; if you wrote all that + * data to the same buckets it'd get invalidated at the same time. + * + * Both of those tasks will be doing fairly random IO so we can't rely on + * detecting sequential IO to segregate their data, but going off of the task + * should be a sane heuristic. + */ +static struct open_bucket *pick_data_bucket(struct cache_set *c, + const struct bkey *search, + struct task_struct *task, + struct bkey *alloc) +{ + struct open_bucket *ret, *ret_task = NULL; + + list_for_each_entry_reverse(ret, &c->data_buckets, list) + if (!bkey_cmp(&ret->key, search)) + goto found; + else if (ret->last == task) + ret_task = ret; + + ret = ret_task ?: list_first_entry(&c->data_buckets, + struct open_bucket, list); +found: + if (!ret->sectors_free && KEY_PTRS(alloc)) { + ret->sectors_free = c->sb.bucket_size; + bkey_copy(&ret->key, alloc); + bkey_init(alloc); + } + + if (!ret->sectors_free) + ret = NULL; + + return ret; +} + +/* + * Allocates some space in the cache to write to, and k to point to the newly + * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the + * end of the newly allocated space). + * + * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many + * sectors were actually allocated. + * + * If s->writeback is true, will not fail. + */ +static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, + struct search *s) +{ + struct cache_set *c = s->op.c; + struct open_bucket *b; + BKEY_PADDED(key) alloc; + struct closure cl, *w = NULL; + unsigned i; + + if (s->writeback) { + closure_init_stack(&cl); + w = &cl; + } + + /* + * We might have to allocate a new bucket, which we can't do with a + * spinlock held. So if we have to allocate, we drop the lock, allocate + * and then retry. KEY_PTRS() indicates whether alloc points to + * allocated bucket(s). + */ + + bkey_init(&alloc.key); + spin_lock(&c->data_bucket_lock); + + while (!(b = pick_data_bucket(c, k, s->task, &alloc.key))) { + unsigned watermark = s->op.write_prio + ? WATERMARK_MOVINGGC + : WATERMARK_NONE; + + spin_unlock(&c->data_bucket_lock); + + if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, w)) + return false; + + spin_lock(&c->data_bucket_lock); + } + + /* + * If we had to allocate, we might race and not need to allocate the + * second time we call find_data_bucket(). If we allocated a bucket but + * didn't use it, drop the refcount bch_bucket_alloc_set() took: + */ + if (KEY_PTRS(&alloc.key)) + __bkey_put(c, &alloc.key); + + for (i = 0; i < KEY_PTRS(&b->key); i++) + EBUG_ON(ptr_stale(c, &b->key, i)); + + /* Set up the pointer to the space we're allocating: */ + + for (i = 0; i < KEY_PTRS(&b->key); i++) + k->ptr[i] = b->key.ptr[i]; + + sectors = min(sectors, b->sectors_free); + + SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors); + SET_KEY_SIZE(k, sectors); + SET_KEY_PTRS(k, KEY_PTRS(&b->key)); + + /* + * Move b to the end of the lru, and keep track of what this bucket was + * last used for: + */ + list_move_tail(&b->list, &c->data_buckets); + bkey_copy_key(&b->key, k); + b->last = s->task; + + b->sectors_free -= sectors; + + for (i = 0; i < KEY_PTRS(&b->key); i++) { + SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors); + + atomic_long_add(sectors, + &PTR_CACHE(c, &b->key, i)->sectors_written); + } + + if (b->sectors_free < c->sb.block_size) + b->sectors_free = 0; + + /* + * k takes refcounts on the buckets it points to until it's inserted + * into the btree, but if we're done with this bucket we just transfer + * get_data_bucket()'s refcount. + */ + if (b->sectors_free) + for (i = 0; i < KEY_PTRS(&b->key); i++) + atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin); + + spin_unlock(&c->data_bucket_lock); + return true; +} + +static void bch_insert_data_error(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + /* + * Our data write just errored, which means we've got a bunch of keys to + * insert that point to data that wasn't succesfully written. + * + * We don't have to insert those keys but we still have to invalidate + * that region of the cache - so, if we just strip off all the pointers + * from the keys we'll accomplish just that. + */ + + struct bkey *src = op->keys.bottom, *dst = op->keys.bottom; + + while (src != op->keys.top) { + struct bkey *n = bkey_next(src); + + SET_KEY_PTRS(src, 0); + bkey_copy(dst, src); + + dst = bkey_next(dst); + src = n; + } + + op->keys.top = dst; + + bch_journal(cl); +} + +static void bch_insert_data_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + + if (error) { + /* TODO: We could try to recover from this. */ + if (s->writeback) + s->error = error; + else if (s->write) + set_closure_fn(cl, bch_insert_data_error, bcache_wq); + else + set_closure_fn(cl, NULL, NULL); + } + + bch_bbio_endio(op->c, bio, error, "writing data to cache"); +} + +static void bch_insert_data_loop(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + struct bio *bio = op->cache_bio, *n; + + if (op->skip) + return bio_invalidate(cl); + + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { + set_gc_sectors(op->c); + bch_queue_gc(op->c); + } + + do { + unsigned i; + struct bkey *k; + struct bio_set *split = s->d + ? s->d->bio_split : op->c->bio_split; + + /* 1 for the device pointer and 1 for the chksum */ + if (bch_keylist_realloc(&op->keys, + 1 + (op->csum ? 1 : 0), + op->c)) + continue_at(cl, bch_journal, bcache_wq); + + k = op->keys.top; + bkey_init(k); + SET_KEY_INODE(k, op->inode); + SET_KEY_OFFSET(k, bio->bi_sector); + + if (!bch_alloc_sectors(k, bio_sectors(bio), s)) + goto err; + + n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); + if (!n) { + __bkey_put(op->c, k); + continue_at(cl, bch_insert_data_loop, bcache_wq); + } + + n->bi_end_io = bch_insert_data_endio; + n->bi_private = cl; + + if (s->writeback) { + SET_KEY_DIRTY(k, true); + + for (i = 0; i < KEY_PTRS(k); i++) + SET_GC_MARK(PTR_BUCKET(op->c, k, i), + GC_MARK_DIRTY); + } + + SET_KEY_CSUM(k, op->csum); + if (KEY_CSUM(k)) + bio_csum(n, k); + + pr_debug("%s", pkey(k)); + bch_keylist_push(&op->keys); + + trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev); + n->bi_rw |= REQ_WRITE; + bch_submit_bbio(n, op->c, k, 0); + } while (n != bio); + + op->insert_data_done = true; + continue_at(cl, bch_journal, bcache_wq); +err: + /* bch_alloc_sectors() blocks if s->writeback = true */ + BUG_ON(s->writeback); + + /* + * But if it's not a writeback write we'd rather just bail out if + * there aren't any buckets ready to write to - it might take awhile and + * we might be starving btree writes for gc or something. + */ + + if (s->write) { + /* + * Writethrough write: We can't complete the write until we've + * updated the index. But we don't want to delay the write while + * we wait for buckets to be freed up, so just invalidate the + * rest of the write. + */ + op->skip = true; + return bio_invalidate(cl); + } else { + /* + * From a cache miss, we can just insert the keys for the data + * we have written or bail out if we didn't do anything. + */ + op->insert_data_done = true; + bio_put(bio); + + if (!bch_keylist_empty(&op->keys)) + continue_at(cl, bch_journal, bcache_wq); + else + closure_return(cl); + } +} + +/** + * bch_insert_data - stick some data in the cache + * + * This is the starting point for any data to end up in a cache device; it could + * be from a normal write, or a writeback write, or a write to a flash only + * volume - it's also used by the moving garbage collector to compact data in + * mostly empty buckets. + * + * It first writes the data to the cache, creating a list of keys to be inserted + * (if the data had to be fragmented there will be multiple keys); after the + * data is written it calls bch_journal, and after the keys have been added to + * the next journal write they're inserted into the btree. + * + * It inserts the data in op->cache_bio; bi_sector is used for the key offset, + * and op->inode is used for the key inode. + * + * If op->skip is true, instead of inserting the data it invalidates the region + * of the cache represented by op->cache_bio and op->inode. + */ +void bch_insert_data(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + bch_keylist_init(&op->keys); + bio_get(op->cache_bio); + bch_insert_data_loop(cl); +} + +void bch_btree_insert_async(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + + if (bch_btree_insert(op, op->c)) { + s->error = -ENOMEM; + op->insert_data_done = true; + } + + if (op->insert_data_done) { + bch_keylist_free(&op->keys); + closure_return(cl); + } else + continue_at(cl, bch_insert_data_loop, bcache_wq); +} + +/* Common code for the make_request functions */ + +static void request_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + + if (error) { + struct search *s = container_of(cl, struct search, cl); + s->error = error; + /* Only cache read errors are recoverable */ + s->recoverable = false; + } + + bio_put(bio); + closure_put(cl); +} + +void bch_cache_read_endio(struct bio *bio, int error) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + struct closure *cl = bio->bi_private; + struct search *s = container_of(cl, struct search, cl); + + /* + * If the bucket was reused while our bio was in flight, we might have + * read the wrong data. Set s->error but not error so it doesn't get + * counted against the cache device, but we'll still reread the data + * from the backing device. + */ + + if (error) + s->error = error; + else if (ptr_stale(s->op.c, &b->key, 0)) { + atomic_long_inc(&s->op.c->cache_read_races); + s->error = -EINTR; + } + + bch_bbio_endio(s->op.c, bio, error, "reading from cache"); +} + +static void bio_complete(struct search *s) +{ + if (s->orig_bio) { + int cpu, rw = bio_data_dir(s->orig_bio); + unsigned long duration = jiffies - s->start_time; + + cpu = part_stat_lock(); + part_round_stats(cpu, &s->d->disk->part0); + part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); + part_stat_unlock(); + + trace_bcache_request_end(s, s->orig_bio); + bio_endio(s->orig_bio, s->error); + s->orig_bio = NULL; + } +} + +static void do_bio_hook(struct search *s) +{ + struct bio *bio = &s->bio.bio; + memcpy(bio, s->orig_bio, sizeof(struct bio)); + + bio->bi_end_io = request_endio; + bio->bi_private = &s->cl; + atomic_set(&bio->bi_cnt, 3); +} + +static void search_free(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + bio_complete(s); + + if (s->op.cache_bio) + bio_put(s->op.cache_bio); + + if (s->unaligned_bvec) + mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec); + + closure_debug_destroy(cl); + mempool_free(s, s->d->c->search); +} + +static struct search *search_alloc(struct bio *bio, struct bcache_device *d) +{ + struct bio_vec *bv; + struct search *s = mempool_alloc(d->c->search, GFP_NOIO); + memset(s, 0, offsetof(struct search, op.keys)); + + __closure_init(&s->cl, NULL); + + s->op.inode = d->id; + s->op.c = d->c; + s->d = d; + s->op.lock = -1; + s->task = current; + s->orig_bio = bio; + s->write = (bio->bi_rw & REQ_WRITE) != 0; + s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; + s->recoverable = 1; + s->start_time = jiffies; + do_bio_hook(s); + + if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) { + bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO); + memcpy(bv, bio_iovec(bio), + sizeof(struct bio_vec) * bio_segments(bio)); + + s->bio.bio.bi_io_vec = bv; + s->unaligned_bvec = 1; + } + + return s; +} + +static void btree_read_async(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + int ret = btree_root(search_recurse, op->c, op); + + if (ret == -EAGAIN) + continue_at(cl, btree_read_async, bcache_wq); + + closure_return(cl); +} + +/* Cached devices */ + +static void cached_dev_bio_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + search_free(cl); + cached_dev_put(dc); +} + +/* Process reads */ + +static void cached_dev_read_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + + if (s->op.insert_collision) + bch_mark_cache_miss_collision(s); + + if (s->op.cache_bio) { + int i; + struct bio_vec *bv; + + __bio_for_each_segment(bv, s->op.cache_bio, i, 0) + __free_page(bv->bv_page); + } + + cached_dev_bio_complete(cl); +} + +static void request_read_error(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct bio_vec *bv; + int i; + + if (s->recoverable) { + /* The cache read failed, but we can retry from the backing + * device. + */ + pr_debug("recovering at sector %llu", + (uint64_t) s->orig_bio->bi_sector); + + s->error = 0; + bv = s->bio.bio.bi_io_vec; + do_bio_hook(s); + s->bio.bio.bi_io_vec = bv; + + if (!s->unaligned_bvec) + bio_for_each_segment(bv, s->orig_bio, i) + bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; + else + memcpy(s->bio.bio.bi_io_vec, + bio_iovec(s->orig_bio), + sizeof(struct bio_vec) * + bio_segments(s->orig_bio)); + + /* XXX: invalidate cache */ + + trace_bcache_read_retry(&s->bio.bio); + closure_bio_submit(&s->bio.bio, &s->cl, s->d); + } + + continue_at(cl, cached_dev_read_complete, NULL); +} + +static void request_read_done(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + /* + * s->cache_bio != NULL implies that we had a cache miss; cache_bio now + * contains data ready to be inserted into the cache. + * + * First, we copy the data we just read from cache_bio's bounce buffers + * to the buffers the original bio pointed to: + */ + + if (s->op.cache_bio) { + struct bio_vec *src, *dst; + unsigned src_offset, dst_offset, bytes; + void *dst_ptr; + + bio_reset(s->op.cache_bio); + s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; + s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; + s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; + bio_map(s->op.cache_bio, NULL); + + src = bio_iovec(s->op.cache_bio); + dst = bio_iovec(s->cache_miss); + src_offset = src->bv_offset; + dst_offset = dst->bv_offset; + dst_ptr = kmap(dst->bv_page); + + while (1) { + if (dst_offset == dst->bv_offset + dst->bv_len) { + kunmap(dst->bv_page); + dst++; + if (dst == bio_iovec_idx(s->cache_miss, + s->cache_miss->bi_vcnt)) + break; + + dst_offset = dst->bv_offset; + dst_ptr = kmap(dst->bv_page); + } + + if (src_offset == src->bv_offset + src->bv_len) { + src++; + if (src == bio_iovec_idx(s->op.cache_bio, + s->op.cache_bio->bi_vcnt)) + BUG(); + + src_offset = src->bv_offset; + } + + bytes = min(dst->bv_offset + dst->bv_len - dst_offset, + src->bv_offset + src->bv_len - src_offset); + + memcpy(dst_ptr + dst_offset, + page_address(src->bv_page) + src_offset, + bytes); + + src_offset += bytes; + dst_offset += bytes; + } + + bio_put(s->cache_miss); + s->cache_miss = NULL; + } + + if (verify(dc, &s->bio.bio) && s->recoverable) + bch_data_verify(s); + + bio_complete(s); + + if (s->op.cache_bio && + !test_bit(CACHE_SET_STOPPING, &s->op.c->flags)) { + s->op.type = BTREE_REPLACE; + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + } + + continue_at(cl, cached_dev_read_complete, NULL); +} + +static void request_read_done_bh(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); + + if (s->error) + continue_at_nobarrier(cl, request_read_error, bcache_wq); + else if (s->op.cache_bio || verify(dc, &s->bio.bio)) + continue_at_nobarrier(cl, request_read_done, bcache_wq); + else + continue_at_nobarrier(cl, cached_dev_read_complete, NULL); +} + +static int cached_dev_cache_miss(struct btree *b, struct search *s, + struct bio *bio, unsigned sectors) +{ + int ret = 0; + unsigned reada; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + struct bio *miss; + + miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + if (!miss) + return -EAGAIN; + + if (miss == bio) + s->op.lookup_done = true; + + miss->bi_end_io = request_endio; + miss->bi_private = &s->cl; + + if (s->cache_miss || s->op.skip) + goto out_submit; + + if (miss != bio || + (bio->bi_rw & REQ_RAHEAD) || + (bio->bi_rw & REQ_META) || + s->op.c->gc_stats.in_use >= CUTOFF_CACHE_READA) + reada = 0; + else { + reada = min(dc->readahead >> 9, + sectors - bio_sectors(miss)); + + if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) + reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); + } + + s->cache_bio_sectors = bio_sectors(miss) + reada; + s->op.cache_bio = bio_alloc_bioset(GFP_NOWAIT, + DIV_ROUND_UP(s->cache_bio_sectors, PAGE_SECTORS), + dc->disk.bio_split); + + if (!s->op.cache_bio) + goto out_submit; + + s->op.cache_bio->bi_sector = miss->bi_sector; + s->op.cache_bio->bi_bdev = miss->bi_bdev; + s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; + + s->op.cache_bio->bi_end_io = request_endio; + s->op.cache_bio->bi_private = &s->cl; + + /* btree_search_recurse()'s btree iterator is no good anymore */ + ret = -EINTR; + if (!bch_btree_insert_check_key(b, &s->op, s->op.cache_bio)) + goto out_put; + + bio_map(s->op.cache_bio, NULL); + if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + goto out_put; + + s->cache_miss = miss; + bio_get(s->op.cache_bio); + + trace_bcache_cache_miss(s->orig_bio); + closure_bio_submit(s->op.cache_bio, &s->cl, s->d); + + return ret; +out_put: + bio_put(s->op.cache_bio); + s->op.cache_bio = NULL; +out_submit: + closure_bio_submit(miss, &s->cl, s->d); + return ret; +} + +static void request_read(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + + check_should_skip(dc, s); + closure_call(&s->op.cl, btree_read_async, NULL, cl); + + continue_at(cl, request_read_done_bh, NULL); +} + +/* Process writes */ + +static void cached_dev_write_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + up_read_non_owner(&dc->writeback_lock); + cached_dev_bio_complete(cl); +} + +static bool should_writeback(struct cached_dev *dc, struct bio *bio) +{ + unsigned threshold = (bio->bi_rw & REQ_SYNC) + ? CUTOFF_WRITEBACK_SYNC + : CUTOFF_WRITEBACK; + + return !atomic_read(&dc->disk.detaching) && + cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && + dc->disk.c->gc_stats.in_use < threshold; +} + +static void request_write(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + struct bio *bio = &s->bio.bio; + struct bkey start, end; + start = KEY(dc->disk.id, bio->bi_sector, 0); + end = KEY(dc->disk.id, bio_end(bio), 0); + + bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); + + check_should_skip(dc, s); + down_read_non_owner(&dc->writeback_lock); + + if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) { + s->op.skip = false; + s->writeback = true; + } + + if (bio->bi_rw & REQ_DISCARD) + goto skip; + + if (s->op.skip) + goto skip; + + if (should_writeback(dc, s->orig_bio)) + s->writeback = true; + + if (!s->writeback) { + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + trace_bcache_writethrough(s->orig_bio); + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + trace_bcache_writeback(s->orig_bio); + bch_writeback_add(dc, bio_sectors(bio)); + } +out: + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + continue_at(cl, cached_dev_write_complete, NULL); +skip: + s->op.skip = true; + s->op.cache_bio = s->orig_bio; + bio_get(s->op.cache_bio); + trace_bcache_write_skip(s->orig_bio); + + if ((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + goto out; + + closure_bio_submit(bio, cl, s->d); + goto out; +} + +static void request_nodata(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + struct bio *bio = &s->bio.bio; + + if (bio->bi_rw & REQ_DISCARD) { + request_write(dc, s); + return; + } + + if (s->op.flush_journal) + bch_journal_meta(s->op.c, cl); + + closure_bio_submit(bio, cl, s->d); + + continue_at(cl, cached_dev_bio_complete, NULL); +} + +/* Cached devices - read & write stuff */ + +int bch_get_congested(struct cache_set *c) +{ + int i; + + if (!c->congested_read_threshold_us && + !c->congested_write_threshold_us) + return 0; + + i = (local_clock_us() - c->congested_last_us) / 1024; + if (i < 0) + return 0; + + i += atomic_read(&c->congested); + if (i >= 0) + return 0; + + i += CONGESTED_MAX; + + return i <= 0 ? 1 : fract_exp_two(i, 6); +} + +static void add_sequential(struct task_struct *t) +{ + ewma_add(t->sequential_io_avg, + t->sequential_io, 8, 0); + + t->sequential_io = 0; +} + +static void check_should_skip(struct cached_dev *dc, struct search *s) +{ + struct hlist_head *iohash(uint64_t k) + { return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; } + + struct cache_set *c = s->op.c; + struct bio *bio = &s->bio.bio; + + long rand; + int cutoff = bch_get_congested(c); + unsigned mode = cache_mode(dc, bio); + + if (atomic_read(&dc->disk.detaching) || + c->gc_stats.in_use > CUTOFF_CACHE_ADD || + (bio->bi_rw & REQ_DISCARD)) + goto skip; + + if (mode == CACHE_MODE_NONE || + (mode == CACHE_MODE_WRITEAROUND && + (bio->bi_rw & REQ_WRITE))) + goto skip; + + if (bio->bi_sector & (c->sb.block_size - 1) || + bio_sectors(bio) & (c->sb.block_size - 1)) { + pr_debug("skipping unaligned io"); + goto skip; + } + + if (!cutoff) { + cutoff = dc->sequential_cutoff >> 9; + + if (!cutoff) + goto rescale; + + if (mode == CACHE_MODE_WRITEBACK && + (bio->bi_rw & REQ_WRITE) && + (bio->bi_rw & REQ_SYNC)) + goto rescale; + } + + if (dc->sequential_merge) { + struct io *i; + + spin_lock(&dc->io_lock); + + hlist_for_each_entry(i, iohash(bio->bi_sector), hash) + if (i->last == bio->bi_sector && + time_before(jiffies, i->jiffies)) + goto found; + + i = list_first_entry(&dc->io_lru, struct io, lru); + + add_sequential(s->task); + i->sequential = 0; +found: + if (i->sequential + bio->bi_size > i->sequential) + i->sequential += bio->bi_size; + + i->last = bio_end(bio); + i->jiffies = jiffies + msecs_to_jiffies(5000); + s->task->sequential_io = i->sequential; + + hlist_del(&i->hash); + hlist_add_head(&i->hash, iohash(i->last)); + list_move_tail(&i->lru, &dc->io_lru); + + spin_unlock(&dc->io_lock); + } else { + s->task->sequential_io = bio->bi_size; + + add_sequential(s->task); + } + + rand = get_random_int(); + cutoff -= bitmap_weight(&rand, BITS_PER_LONG); + + if (cutoff <= (int) (max(s->task->sequential_io, + s->task->sequential_io_avg) >> 9)) + goto skip; + +rescale: + bch_rescale_priorities(c, bio_sectors(bio)); + return; +skip: + bch_mark_sectors_bypassed(s, bio_sectors(bio)); + s->op.skip = true; +} + +static void cached_dev_make_request(struct request_queue *q, struct bio *bio) +{ + struct search *s; + struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + int cpu, rw = bio_data_dir(bio); + + cpu = part_stat_lock(); + part_stat_inc(cpu, &d->disk->part0, ios[rw]); + part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + + bio->bi_bdev = dc->bdev; + bio->bi_sector += BDEV_DATA_START; + + if (cached_dev_get(dc)) { + s = search_alloc(bio, d); + trace_bcache_request_start(s, bio); + + if (!bio_has_data(bio)) + request_nodata(dc, s); + else if (rw) + request_write(dc, s); + else + request_read(dc, s); + } else { + if ((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + bio_endio(bio, 0); + else + bch_generic_make_request(bio, &d->bio_split_hook); + } +} + +static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); +} + +static int cached_dev_congested(void *data, int bits) +{ + struct bcache_device *d = data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + struct request_queue *q = bdev_get_queue(dc->bdev); + int ret = 0; + + if (bdi_congested(&q->backing_dev_info, bits)) + return 1; + + if (cached_dev_get(dc)) { + unsigned i; + struct cache *ca; + + for_each_cache(ca, d->c, i) { + q = bdev_get_queue(ca->bdev); + ret |= bdi_congested(&q->backing_dev_info, bits); + } + + cached_dev_put(dc); + } + + return ret; +} + +void bch_cached_dev_request_init(struct cached_dev *dc) +{ + struct gendisk *g = dc->disk.disk; + + g->queue->make_request_fn = cached_dev_make_request; + g->queue->backing_dev_info.congested_fn = cached_dev_congested; + dc->disk.cache_miss = cached_dev_cache_miss; + dc->disk.ioctl = cached_dev_ioctl; +} + +/* Flash backed devices */ + +static int flash_dev_cache_miss(struct btree *b, struct search *s, + struct bio *bio, unsigned sectors) +{ + /* Zero fill bio */ + + while (bio->bi_idx != bio->bi_vcnt) { + struct bio_vec *bv = bio_iovec(bio); + unsigned j = min(bv->bv_len >> 9, sectors); + + void *p = kmap(bv->bv_page); + memset(p + bv->bv_offset, 0, j << 9); + kunmap(bv->bv_page); + + bv->bv_len -= j << 9; + bv->bv_offset += j << 9; + + if (bv->bv_len) + return 0; + + bio->bi_sector += j; + bio->bi_size -= j << 9; + + bio->bi_idx++; + sectors -= j; + } + + s->op.lookup_done = true; + + return 0; +} + +static void flash_dev_make_request(struct request_queue *q, struct bio *bio) +{ + struct search *s; + struct closure *cl; + struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + int cpu, rw = bio_data_dir(bio); + + cpu = part_stat_lock(); + part_stat_inc(cpu, &d->disk->part0, ios[rw]); + part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + + s = search_alloc(bio, d); + cl = &s->cl; + bio = &s->bio.bio; + + trace_bcache_request_start(s, bio); + + if (bio_has_data(bio) && !rw) { + closure_call(&s->op.cl, btree_read_async, NULL, cl); + } else if (bio_has_data(bio) || s->op.skip) { + bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, + &KEY(d->id, bio->bi_sector, 0), + &KEY(d->id, bio_end(bio), 0)); + + s->writeback = true; + s->op.cache_bio = bio; + + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + } else { + /* No data - probably a cache flush */ + if (s->op.flush_journal) + bch_journal_meta(s->op.c, cl); + } + + continue_at(cl, search_free, NULL); +} + +static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +static int flash_dev_congested(void *data, int bits) +{ + struct bcache_device *d = data; + struct request_queue *q; + struct cache *ca; + unsigned i; + int ret = 0; + + for_each_cache(ca, d->c, i) { + q = bdev_get_queue(ca->bdev); + ret |= bdi_congested(&q->backing_dev_info, bits); + } + + return ret; +} + +void bch_flash_dev_request_init(struct bcache_device *d) +{ + struct gendisk *g = d->disk; + + g->queue->make_request_fn = flash_dev_make_request; + g->queue->backing_dev_info.congested_fn = flash_dev_congested; + d->cache_miss = flash_dev_cache_miss; + d->ioctl = flash_dev_ioctl; +} + +void bch_request_exit(void) +{ +#ifdef CONFIG_CGROUP_BCACHE + cgroup_unload_subsys(&bcache_subsys); +#endif + if (bch_search_cache) + kmem_cache_destroy(bch_search_cache); +} + +int __init bch_request_init(void) +{ + bch_search_cache = KMEM_CACHE(search, 0); + if (!bch_search_cache) + return -ENOMEM; + +#ifdef CONFIG_CGROUP_BCACHE + cgroup_load_subsys(&bcache_subsys); + init_bch_cgroup(&bcache_default_cgroup); + + cgroup_add_cftypes(&bcache_subsys, bch_files); +#endif + return 0; +} diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h new file mode 100644 index 000000000000..254d9ab5707c --- /dev/null +++ b/drivers/md/bcache/request.h @@ -0,0 +1,62 @@ +#ifndef _BCACHE_REQUEST_H_ +#define _BCACHE_REQUEST_H_ + +#include + +struct search { + /* Stack frame for bio_complete */ + struct closure cl; + + struct bcache_device *d; + struct task_struct *task; + + struct bbio bio; + struct bio *orig_bio; + struct bio *cache_miss; + unsigned cache_bio_sectors; + + unsigned recoverable:1; + unsigned unaligned_bvec:1; + + unsigned write:1; + unsigned writeback:1; + + /* IO error returned to s->bio */ + short error; + unsigned long start_time; + + /* Anything past op->keys won't get zeroed in do_bio_hook */ + struct btree_op op; +}; + +void bch_cache_read_endio(struct bio *, int); +int bch_get_congested(struct cache_set *); +void bch_insert_data(struct closure *cl); +void bch_btree_insert_async(struct closure *); +void bch_cache_read_endio(struct bio *, int); + +void bch_open_buckets_free(struct cache_set *); +int bch_open_buckets_alloc(struct cache_set *); + +void bch_cached_dev_request_init(struct cached_dev *dc); +void bch_flash_dev_request_init(struct bcache_device *d); + +extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; + +struct bch_cgroup { +#ifdef CONFIG_CGROUP_BCACHE + struct cgroup_subsys_state css; +#endif + /* + * We subtract one from the index into bch_cache_modes[], so that + * default == -1; this makes it so the rest match up with d->cache_mode, + * and we use d->cache_mode if cgrp->cache_mode < 0 + */ + short cache_mode; + bool verify; + struct cache_stat_collector stats; +}; + +struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio); + +#endif /* _BCACHE_REQUEST_H_ */ diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c new file mode 100644 index 000000000000..bf6cf9518c89 --- /dev/null +++ b/drivers/md/bcache/stats.c @@ -0,0 +1,245 @@ +/* + * bcache stats code + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "stats.h" +#include "btree.h" +#include "request.h" +#include "sysfs.h" + +/* + * We keep absolute totals of various statistics, and addionally a set of three + * rolling averages. + * + * Every so often, a timer goes off and rescales the rolling averages. + * accounting_rescale[] is how many times the timer has to go off before we + * rescale each set of numbers; that gets us half lives of 5 minutes, one hour, + * and one day. + * + * accounting_delay is how often the timer goes off - 22 times in 5 minutes, + * and accounting_weight is what we use to rescale: + * + * pow(31 / 32, 22) ~= 1/2 + * + * So that we don't have to increment each set of numbers every time we (say) + * get a cache hit, we increment a single atomic_t in acc->collector, and when + * the rescale function runs it resets the atomic counter to 0 and adds its + * old value to each of the exported numbers. + * + * To reduce rounding error, the numbers in struct cache_stats are all + * stored left shifted by 16, and scaled back in the sysfs show() function. + */ + +static const unsigned DAY_RESCALE = 288; +static const unsigned HOUR_RESCALE = 12; +static const unsigned FIVE_MINUTE_RESCALE = 1; +static const unsigned accounting_delay = (HZ * 300) / 22; +static const unsigned accounting_weight = 32; + +/* sysfs reading/writing */ + +read_attribute(cache_hits); +read_attribute(cache_misses); +read_attribute(cache_bypass_hits); +read_attribute(cache_bypass_misses); +read_attribute(cache_hit_ratio); +read_attribute(cache_readaheads); +read_attribute(cache_miss_collisions); +read_attribute(bypassed); + +SHOW(bch_stats) +{ + struct cache_stats *s = + container_of(kobj, struct cache_stats, kobj); +#define var(stat) (s->stat >> 16) + var_print(cache_hits); + var_print(cache_misses); + var_print(cache_bypass_hits); + var_print(cache_bypass_misses); + + sysfs_print(cache_hit_ratio, + DIV_SAFE(var(cache_hits) * 100, + var(cache_hits) + var(cache_misses))); + + var_print(cache_readaheads); + var_print(cache_miss_collisions); + sysfs_hprint(bypassed, var(sectors_bypassed) << 9); +#undef var + return 0; +} + +STORE(bch_stats) +{ + return size; +} + +static void bch_stats_release(struct kobject *k) +{ +} + +static struct attribute *bch_stats_files[] = { + &sysfs_cache_hits, + &sysfs_cache_misses, + &sysfs_cache_bypass_hits, + &sysfs_cache_bypass_misses, + &sysfs_cache_hit_ratio, + &sysfs_cache_readaheads, + &sysfs_cache_miss_collisions, + &sysfs_bypassed, + NULL +}; +static KTYPE(bch_stats); + +static void scale_accounting(unsigned long data); + +void bch_cache_accounting_init(struct cache_accounting *acc, struct closure *parent) +{ + kobject_init(&acc->total.kobj, &bch_stats_ktype); + kobject_init(&acc->five_minute.kobj, &bch_stats_ktype); + kobject_init(&acc->hour.kobj, &bch_stats_ktype); + kobject_init(&acc->day.kobj, &bch_stats_ktype); + + closure_init(&acc->cl, parent); + init_timer(&acc->timer); + acc->timer.expires = jiffies + accounting_delay; + acc->timer.data = (unsigned long) acc; + acc->timer.function = scale_accounting; + add_timer(&acc->timer); +} + +int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, + struct kobject *parent) +{ + int ret = kobject_add(&acc->total.kobj, parent, + "stats_total"); + ret = ret ?: kobject_add(&acc->five_minute.kobj, parent, + "stats_five_minute"); + ret = ret ?: kobject_add(&acc->hour.kobj, parent, + "stats_hour"); + ret = ret ?: kobject_add(&acc->day.kobj, parent, + "stats_day"); + return ret; +} + +void bch_cache_accounting_clear(struct cache_accounting *acc) +{ + memset(&acc->total.cache_hits, + 0, + sizeof(unsigned long) * 7); +} + +void bch_cache_accounting_destroy(struct cache_accounting *acc) +{ + kobject_put(&acc->total.kobj); + kobject_put(&acc->five_minute.kobj); + kobject_put(&acc->hour.kobj); + kobject_put(&acc->day.kobj); + + atomic_set(&acc->closing, 1); + if (del_timer_sync(&acc->timer)) + closure_return(&acc->cl); +} + +/* EWMA scaling */ + +static void scale_stat(unsigned long *stat) +{ + *stat = ewma_add(*stat, 0, accounting_weight, 0); +} + +static void scale_stats(struct cache_stats *stats, unsigned long rescale_at) +{ + if (++stats->rescale == rescale_at) { + stats->rescale = 0; + scale_stat(&stats->cache_hits); + scale_stat(&stats->cache_misses); + scale_stat(&stats->cache_bypass_hits); + scale_stat(&stats->cache_bypass_misses); + scale_stat(&stats->cache_readaheads); + scale_stat(&stats->cache_miss_collisions); + scale_stat(&stats->sectors_bypassed); + } +} + +static void scale_accounting(unsigned long data) +{ + struct cache_accounting *acc = (struct cache_accounting *) data; + +#define move_stat(name) do { \ + unsigned t = atomic_xchg(&acc->collector.name, 0); \ + t <<= 16; \ + acc->five_minute.name += t; \ + acc->hour.name += t; \ + acc->day.name += t; \ + acc->total.name += t; \ +} while (0) + + move_stat(cache_hits); + move_stat(cache_misses); + move_stat(cache_bypass_hits); + move_stat(cache_bypass_misses); + move_stat(cache_readaheads); + move_stat(cache_miss_collisions); + move_stat(sectors_bypassed); + + scale_stats(&acc->total, 0); + scale_stats(&acc->day, DAY_RESCALE); + scale_stats(&acc->hour, HOUR_RESCALE); + scale_stats(&acc->five_minute, FIVE_MINUTE_RESCALE); + + acc->timer.expires += accounting_delay; + + if (!atomic_read(&acc->closing)) + add_timer(&acc->timer); + else + closure_return(&acc->cl); +} + +static void mark_cache_stats(struct cache_stat_collector *stats, + bool hit, bool bypass) +{ + if (!bypass) + if (hit) + atomic_inc(&stats->cache_hits); + else + atomic_inc(&stats->cache_misses); + else + if (hit) + atomic_inc(&stats->cache_bypass_hits); + else + atomic_inc(&stats->cache_bypass_misses); +} + +void bch_mark_cache_accounting(struct search *s, bool hit, bool bypass) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + mark_cache_stats(&dc->accounting.collector, hit, bypass); + mark_cache_stats(&s->op.c->accounting.collector, hit, bypass); +#ifdef CONFIG_CGROUP_BCACHE + mark_cache_stats(&(bch_bio_to_cgroup(s->orig_bio)->stats), hit, bypass); +#endif +} + +void bch_mark_cache_readahead(struct search *s) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + atomic_inc(&dc->accounting.collector.cache_readaheads); + atomic_inc(&s->op.c->accounting.collector.cache_readaheads); +} + +void bch_mark_cache_miss_collision(struct search *s) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + atomic_inc(&dc->accounting.collector.cache_miss_collisions); + atomic_inc(&s->op.c->accounting.collector.cache_miss_collisions); +} + +void bch_mark_sectors_bypassed(struct search *s, int sectors) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + atomic_add(sectors, &dc->accounting.collector.sectors_bypassed); + atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed); +} diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h new file mode 100644 index 000000000000..c7c7a8fd29fe --- /dev/null +++ b/drivers/md/bcache/stats.h @@ -0,0 +1,58 @@ +#ifndef _BCACHE_STATS_H_ +#define _BCACHE_STATS_H_ + +struct cache_stat_collector { + atomic_t cache_hits; + atomic_t cache_misses; + atomic_t cache_bypass_hits; + atomic_t cache_bypass_misses; + atomic_t cache_readaheads; + atomic_t cache_miss_collisions; + atomic_t sectors_bypassed; +}; + +struct cache_stats { + struct kobject kobj; + + unsigned long cache_hits; + unsigned long cache_misses; + unsigned long cache_bypass_hits; + unsigned long cache_bypass_misses; + unsigned long cache_readaheads; + unsigned long cache_miss_collisions; + unsigned long sectors_bypassed; + + unsigned rescale; +}; + +struct cache_accounting { + struct closure cl; + struct timer_list timer; + atomic_t closing; + + struct cache_stat_collector collector; + + struct cache_stats total; + struct cache_stats five_minute; + struct cache_stats hour; + struct cache_stats day; +}; + +struct search; + +void bch_cache_accounting_init(struct cache_accounting *acc, + struct closure *parent); + +int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, + struct kobject *parent); + +void bch_cache_accounting_clear(struct cache_accounting *acc); + +void bch_cache_accounting_destroy(struct cache_accounting *acc); + +void bch_mark_cache_accounting(struct search *s, bool hit, bool bypass); +void bch_mark_cache_readahead(struct search *s); +void bch_mark_cache_miss_collision(struct search *s); +void bch_mark_sectors_bypassed(struct search *s, int sectors); + +#endif /* _BCACHE_STATS_H_ */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c new file mode 100644 index 000000000000..31ef47f1f3b6 --- /dev/null +++ b/drivers/md/bcache/super.c @@ -0,0 +1,1941 @@ +/* + * bcache setup/teardown code, and some metadata io - read a superblock and + * figure out what to do with it. + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Kent Overstreet "); + +static const char bcache_magic[] = { + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 +}; + +static const char invalid_uuid[] = { + 0xa0, 0x3e, 0xf8, 0xed, 0x3e, 0xe1, 0xb8, 0x78, + 0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99 +}; + +/* Default is -1; we skip past it for struct cached_dev's cache mode */ +const char * const bch_cache_modes[] = { + "default", + "writethrough", + "writeback", + "writearound", + "none", + NULL +}; + +struct uuid_entry_v0 { + uint8_t uuid[16]; + uint8_t label[32]; + uint32_t first_reg; + uint32_t last_reg; + uint32_t invalidated; + uint32_t pad; +}; + +static struct kobject *bcache_kobj; +struct mutex bch_register_lock; +LIST_HEAD(bch_cache_sets); +static LIST_HEAD(uncached_devices); + +static int bcache_major, bcache_minor; +static wait_queue_head_t unregister_wait; +struct workqueue_struct *bcache_wq; + +#define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE) + +static void bio_split_pool_free(struct bio_split_pool *p) +{ + if (p->bio_split) + bioset_free(p->bio_split); + +} + +static int bio_split_pool_init(struct bio_split_pool *p) +{ + p->bio_split = bioset_create(4, 0); + if (!p->bio_split) + return -ENOMEM; + + p->bio_split_hook = mempool_create_kmalloc_pool(4, + sizeof(struct bio_split_hook)); + if (!p->bio_split_hook) + return -ENOMEM; + + return 0; +} + +/* Superblock */ + +static const char *read_super(struct cache_sb *sb, struct block_device *bdev, + struct page **res) +{ + const char *err; + struct cache_sb *s; + struct buffer_head *bh = __bread(bdev, 1, SB_SIZE); + unsigned i; + + if (!bh) + return "IO error"; + + s = (struct cache_sb *) bh->b_data; + + sb->offset = le64_to_cpu(s->offset); + sb->version = le64_to_cpu(s->version); + + memcpy(sb->magic, s->magic, 16); + memcpy(sb->uuid, s->uuid, 16); + memcpy(sb->set_uuid, s->set_uuid, 16); + memcpy(sb->label, s->label, SB_LABEL_SIZE); + + sb->flags = le64_to_cpu(s->flags); + sb->seq = le64_to_cpu(s->seq); + + sb->nbuckets = le64_to_cpu(s->nbuckets); + sb->block_size = le16_to_cpu(s->block_size); + sb->bucket_size = le16_to_cpu(s->bucket_size); + + sb->nr_in_set = le16_to_cpu(s->nr_in_set); + sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); + sb->last_mount = le32_to_cpu(s->last_mount); + + sb->first_bucket = le16_to_cpu(s->first_bucket); + sb->keys = le16_to_cpu(s->keys); + + for (i = 0; i < SB_JOURNAL_BUCKETS; i++) + sb->d[i] = le64_to_cpu(s->d[i]); + + pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u", + sb->version, sb->flags, sb->seq, sb->keys); + + err = "Not a bcache superblock"; + if (sb->offset != SB_SECTOR) + goto err; + + if (memcmp(sb->magic, bcache_magic, 16)) + goto err; + + err = "Too many journal buckets"; + if (sb->keys > SB_JOURNAL_BUCKETS) + goto err; + + err = "Bad checksum"; + if (s->csum != csum_set(s)) + goto err; + + err = "Bad UUID"; + if (is_zero(sb->uuid, 16)) + goto err; + + err = "Unsupported superblock version"; + if (sb->version > BCACHE_SB_VERSION) + goto err; + + err = "Bad block/bucket size"; + if (!is_power_of_2(sb->block_size) || sb->block_size > PAGE_SECTORS || + !is_power_of_2(sb->bucket_size) || sb->bucket_size < PAGE_SECTORS) + goto err; + + err = "Too many buckets"; + if (sb->nbuckets > LONG_MAX) + goto err; + + err = "Not enough buckets"; + if (sb->nbuckets < 1 << 7) + goto err; + + err = "Invalid superblock: device too small"; + if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) + goto err; + + if (sb->version == CACHE_BACKING_DEV) + goto out; + + err = "Bad UUID"; + if (is_zero(sb->set_uuid, 16)) + goto err; + + err = "Bad cache device number in set"; + if (!sb->nr_in_set || + sb->nr_in_set <= sb->nr_this_dev || + sb->nr_in_set > MAX_CACHES_PER_SET) + goto err; + + err = "Journal buckets not sequential"; + for (i = 0; i < sb->keys; i++) + if (sb->d[i] != sb->first_bucket + i) + goto err; + + err = "Too many journal buckets"; + if (sb->first_bucket + sb->keys > sb->nbuckets) + goto err; + + err = "Invalid superblock: first bucket comes before end of super"; + if (sb->first_bucket * sb->bucket_size < 16) + goto err; +out: + sb->last_mount = get_seconds(); + err = NULL; + + get_page(bh->b_page); + *res = bh->b_page; +err: + put_bh(bh); + return err; +} + +static void write_bdev_super_endio(struct bio *bio, int error) +{ + struct cached_dev *dc = bio->bi_private; + /* XXX: error checking */ + + closure_put(&dc->sb_write.cl); +} + +static void __write_super(struct cache_sb *sb, struct bio *bio) +{ + struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page); + unsigned i; + + bio->bi_sector = SB_SECTOR; + bio->bi_rw = REQ_SYNC|REQ_META; + bio->bi_size = SB_SIZE; + bio_map(bio, NULL); + + out->offset = cpu_to_le64(sb->offset); + out->version = cpu_to_le64(sb->version); + + memcpy(out->uuid, sb->uuid, 16); + memcpy(out->set_uuid, sb->set_uuid, 16); + memcpy(out->label, sb->label, SB_LABEL_SIZE); + + out->flags = cpu_to_le64(sb->flags); + out->seq = cpu_to_le64(sb->seq); + + out->last_mount = cpu_to_le32(sb->last_mount); + out->first_bucket = cpu_to_le16(sb->first_bucket); + out->keys = cpu_to_le16(sb->keys); + + for (i = 0; i < sb->keys; i++) + out->d[i] = cpu_to_le64(sb->d[i]); + + out->csum = csum_set(out); + + pr_debug("ver %llu, flags %llu, seq %llu", + sb->version, sb->flags, sb->seq); + + submit_bio(REQ_WRITE, bio); +} + +void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) +{ + struct closure *cl = &dc->sb_write.cl; + struct bio *bio = &dc->sb_bio; + + closure_lock(&dc->sb_write, parent); + + bio_reset(bio); + bio->bi_bdev = dc->bdev; + bio->bi_end_io = write_bdev_super_endio; + bio->bi_private = dc; + + closure_get(cl); + __write_super(&dc->sb, bio); + + closure_return(cl); +} + +static void write_super_endio(struct bio *bio, int error) +{ + struct cache *ca = bio->bi_private; + + bch_count_io_errors(ca, error, "writing superblock"); + closure_put(&ca->set->sb_write.cl); +} + +void bcache_write_super(struct cache_set *c) +{ + struct closure *cl = &c->sb_write.cl; + struct cache *ca; + unsigned i; + + closure_lock(&c->sb_write, &c->cl); + + c->sb.seq++; + + for_each_cache(ca, c, i) { + struct bio *bio = &ca->sb_bio; + + ca->sb.version = BCACHE_SB_VERSION; + ca->sb.seq = c->sb.seq; + ca->sb.last_mount = c->sb.last_mount; + + SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); + + bio_reset(bio); + bio->bi_bdev = ca->bdev; + bio->bi_end_io = write_super_endio; + bio->bi_private = ca; + + closure_get(cl); + __write_super(&ca->sb, bio); + } + + closure_return(cl); +} + +/* UUID io */ + +static void uuid_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct cache_set *c = container_of(cl, struct cache_set, uuid_write.cl); + + cache_set_err_on(error, c, "accessing uuids"); + bch_bbio_free(bio, c); + closure_put(cl); +} + +static void uuid_io(struct cache_set *c, unsigned long rw, + struct bkey *k, struct closure *parent) +{ + struct closure *cl = &c->uuid_write.cl; + struct uuid_entry *u; + unsigned i; + + BUG_ON(!parent); + closure_lock(&c->uuid_write, parent); + + for (i = 0; i < KEY_PTRS(k); i++) { + struct bio *bio = bch_bbio_alloc(c); + + bio->bi_rw = REQ_SYNC|REQ_META|rw; + bio->bi_size = KEY_SIZE(k) << 9; + + bio->bi_end_io = uuid_endio; + bio->bi_private = cl; + bio_map(bio, c->uuids); + + bch_submit_bbio(bio, c, k, i); + + if (!(rw & WRITE)) + break; + } + + pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", + pkey(&c->uuid_bucket)); + + for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) + if (!is_zero(u->uuid, 16)) + pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u", + u - c->uuids, u->uuid, u->label, + u->first_reg, u->last_reg, u->invalidated); + + closure_return(cl); +} + +static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) +{ + struct bkey *k = &j->uuid_bucket; + + if (__bch_ptr_invalid(c, 1, k)) + return "bad uuid pointer"; + + bkey_copy(&c->uuid_bucket, k); + uuid_io(c, READ_SYNC, k, cl); + + if (j->version < BCACHE_JSET_VERSION_UUIDv1) { + struct uuid_entry_v0 *u0 = (void *) c->uuids; + struct uuid_entry *u1 = (void *) c->uuids; + int i; + + closure_sync(cl); + + /* + * Since the new uuid entry is bigger than the old, we have to + * convert starting at the highest memory address and work down + * in order to do it in place + */ + + for (i = c->nr_uuids - 1; + i >= 0; + --i) { + memcpy(u1[i].uuid, u0[i].uuid, 16); + memcpy(u1[i].label, u0[i].label, 32); + + u1[i].first_reg = u0[i].first_reg; + u1[i].last_reg = u0[i].last_reg; + u1[i].invalidated = u0[i].invalidated; + + u1[i].flags = 0; + u1[i].sectors = 0; + } + } + + return NULL; +} + +static int __uuid_write(struct cache_set *c) +{ + BKEY_PADDED(key) k; + struct closure cl; + closure_init_stack(&cl); + + lockdep_assert_held(&bch_register_lock); + + if (bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, &cl)) + return 1; + + SET_KEY_SIZE(&k.key, c->sb.bucket_size); + uuid_io(c, REQ_WRITE, &k.key, &cl); + closure_sync(&cl); + + bkey_copy(&c->uuid_bucket, &k.key); + __bkey_put(c, &k.key); + return 0; +} + +int bch_uuid_write(struct cache_set *c) +{ + int ret = __uuid_write(c); + + if (!ret) + bch_journal_meta(c, NULL); + + return ret; +} + +static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) +{ + struct uuid_entry *u; + + for (u = c->uuids; + u < c->uuids + c->nr_uuids; u++) + if (!memcmp(u->uuid, uuid, 16)) + return u; + + return NULL; +} + +static struct uuid_entry *uuid_find_empty(struct cache_set *c) +{ + static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + return uuid_find(c, zero_uuid); +} + +/* + * Bucket priorities/gens: + * + * For each bucket, we store on disk its + * 8 bit gen + * 16 bit priority + * + * See alloc.c for an explanation of the gen. The priority is used to implement + * lru (and in the future other) cache replacement policies; for most purposes + * it's just an opaque integer. + * + * The gens and the priorities don't have a whole lot to do with each other, and + * it's actually the gens that must be written out at specific times - it's no + * big deal if the priorities don't get written, if we lose them we just reuse + * buckets in suboptimal order. + * + * On disk they're stored in a packed array, and in as many buckets are required + * to fit them all. The buckets we use to store them form a list; the journal + * header points to the first bucket, the first bucket points to the second + * bucket, et cetera. + * + * This code is used by the allocation code; periodically (whenever it runs out + * of buckets to allocate from) the allocation code will invalidate some + * buckets, but it can't use those buckets until their new gens are safely on + * disk. + */ + +static void prio_endio(struct bio *bio, int error) +{ + struct cache *ca = bio->bi_private; + + cache_set_err_on(error, ca->set, "accessing priorities"); + bch_bbio_free(bio, ca->set); + closure_put(&ca->prio); +} + +static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw) +{ + struct closure *cl = &ca->prio; + struct bio *bio = bch_bbio_alloc(ca->set); + + closure_init_stack(cl); + + bio->bi_sector = bucket * ca->sb.bucket_size; + bio->bi_bdev = ca->bdev; + bio->bi_rw = REQ_SYNC|REQ_META|rw; + bio->bi_size = bucket_bytes(ca); + + bio->bi_end_io = prio_endio; + bio->bi_private = ca; + bio_map(bio, ca->disk_buckets); + + closure_bio_submit(bio, &ca->prio, ca); + closure_sync(cl); +} + +#define buckets_free(c) "free %zu, free_inc %zu, unused %zu", \ + fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused) + +void bch_prio_write(struct cache *ca) +{ + int i; + struct bucket *b; + struct closure cl; + + closure_init_stack(&cl); + + lockdep_assert_held(&ca->set->bucket_lock); + + for (b = ca->buckets; + b < ca->buckets + ca->sb.nbuckets; b++) + b->disk_gen = b->gen; + + ca->disk_buckets->seq++; + + atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), + &ca->meta_sectors_written); + + pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), + fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + blktrace_msg(ca, "Starting priorities: " buckets_free(ca)); + + for (i = prio_buckets(ca) - 1; i >= 0; --i) { + long bucket; + struct prio_set *p = ca->disk_buckets; + struct bucket_disk *d = p->data, *end = d + prios_per_bucket(ca); + + for (b = ca->buckets + i * prios_per_bucket(ca); + b < ca->buckets + ca->sb.nbuckets && d < end; + b++, d++) { + d->prio = cpu_to_le16(b->prio); + d->gen = b->gen; + } + + p->next_bucket = ca->prio_buckets[i + 1]; + p->magic = pset_magic(ca); + p->csum = crc64(&p->magic, bucket_bytes(ca) - 8); + + bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, &cl); + BUG_ON(bucket == -1); + + mutex_unlock(&ca->set->bucket_lock); + prio_io(ca, bucket, REQ_WRITE); + mutex_lock(&ca->set->bucket_lock); + + ca->prio_buckets[i] = bucket; + atomic_dec_bug(&ca->buckets[bucket].pin); + } + + mutex_unlock(&ca->set->bucket_lock); + + bch_journal_meta(ca->set, &cl); + closure_sync(&cl); + + mutex_lock(&ca->set->bucket_lock); + + ca->need_save_prio = 0; + + /* + * Don't want the old priorities to get garbage collected until after we + * finish writing the new ones, and they're journalled + */ + for (i = 0; i < prio_buckets(ca); i++) + ca->prio_last_buckets[i] = ca->prio_buckets[i]; +} + +static void prio_read(struct cache *ca, uint64_t bucket) +{ + struct prio_set *p = ca->disk_buckets; + struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; + struct bucket *b; + unsigned bucket_nr = 0; + + for (b = ca->buckets; + b < ca->buckets + ca->sb.nbuckets; + b++, d++) { + if (d == end) { + ca->prio_buckets[bucket_nr] = bucket; + ca->prio_last_buckets[bucket_nr] = bucket; + bucket_nr++; + + prio_io(ca, bucket, READ_SYNC); + + if (p->csum != crc64(&p->magic, bucket_bytes(ca) - 8)) + pr_warn("bad csum reading priorities"); + + if (p->magic != pset_magic(ca)) + pr_warn("bad magic reading priorities"); + + bucket = p->next_bucket; + d = p->data; + } + + b->prio = le16_to_cpu(d->prio); + b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen; + } +} + +/* Bcache device */ + +static int open_dev(struct block_device *b, fmode_t mode) +{ + struct bcache_device *d = b->bd_disk->private_data; + if (atomic_read(&d->closing)) + return -ENXIO; + + closure_get(&d->cl); + return 0; +} + +static int release_dev(struct gendisk *b, fmode_t mode) +{ + struct bcache_device *d = b->private_data; + closure_put(&d->cl); + return 0; +} + +static int ioctl_dev(struct block_device *b, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct bcache_device *d = b->bd_disk->private_data; + return d->ioctl(d, mode, cmd, arg); +} + +static const struct block_device_operations bcache_ops = { + .open = open_dev, + .release = release_dev, + .ioctl = ioctl_dev, + .owner = THIS_MODULE, +}; + +void bcache_device_stop(struct bcache_device *d) +{ + if (!atomic_xchg(&d->closing, 1)) + closure_queue(&d->cl); +} + +static void bcache_device_detach(struct bcache_device *d) +{ + lockdep_assert_held(&bch_register_lock); + + if (atomic_read(&d->detaching)) { + struct uuid_entry *u = d->c->uuids + d->id; + + SET_UUID_FLASH_ONLY(u, 0); + memcpy(u->uuid, invalid_uuid, 16); + u->invalidated = cpu_to_le32(get_seconds()); + bch_uuid_write(d->c); + + atomic_set(&d->detaching, 0); + } + + d->c->devices[d->id] = NULL; + closure_put(&d->c->caching); + d->c = NULL; +} + +static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, + unsigned id) +{ + BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags)); + + d->id = id; + d->c = c; + c->devices[id] = d; + + closure_get(&c->caching); +} + +static void bcache_device_link(struct bcache_device *d, struct cache_set *c, + const char *name) +{ + snprintf(d->name, BCACHEDEVNAME_SIZE, + "%s%u", name, d->id); + + WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || + sysfs_create_link(&c->kobj, &d->kobj, d->name), + "Couldn't create device <-> cache set symlinks"); +} + +static void bcache_device_free(struct bcache_device *d) +{ + lockdep_assert_held(&bch_register_lock); + + pr_info("%s stopped", d->disk->disk_name); + + if (d->c) + bcache_device_detach(d); + + if (d->disk) + del_gendisk(d->disk); + if (d->disk && d->disk->queue) + blk_cleanup_queue(d->disk->queue); + if (d->disk) + put_disk(d->disk); + + bio_split_pool_free(&d->bio_split_hook); + if (d->unaligned_bvec) + mempool_destroy(d->unaligned_bvec); + if (d->bio_split) + bioset_free(d->bio_split); + + closure_debug_destroy(&d->cl); +} + +static int bcache_device_init(struct bcache_device *d, unsigned block_size) +{ + struct request_queue *q; + + if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || + !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, + sizeof(struct bio_vec) * BIO_MAX_PAGES)) || + bio_split_pool_init(&d->bio_split_hook)) + + return -ENOMEM; + + d->disk = alloc_disk(1); + if (!d->disk) + return -ENOMEM; + + snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); + + d->disk->major = bcache_major; + d->disk->first_minor = bcache_minor++; + d->disk->fops = &bcache_ops; + d->disk->private_data = d; + + q = blk_alloc_queue(GFP_KERNEL); + if (!q) + return -ENOMEM; + + blk_queue_make_request(q, NULL); + d->disk->queue = q; + q->queuedata = d; + q->backing_dev_info.congested_data = d; + q->limits.max_hw_sectors = UINT_MAX; + q->limits.max_sectors = UINT_MAX; + q->limits.max_segment_size = UINT_MAX; + q->limits.max_segments = BIO_MAX_PAGES; + q->limits.max_discard_sectors = UINT_MAX; + q->limits.io_min = block_size; + q->limits.logical_block_size = block_size; + q->limits.physical_block_size = block_size; + set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); + set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + + return 0; +} + +/* Cached device */ + +static void calc_cached_dev_sectors(struct cache_set *c) +{ + uint64_t sectors = 0; + struct cached_dev *dc; + + list_for_each_entry(dc, &c->cached_devs, list) + sectors += bdev_sectors(dc->bdev); + + c->cached_dev_sectors = sectors; +} + +void bch_cached_dev_run(struct cached_dev *dc) +{ + struct bcache_device *d = &dc->disk; + + if (atomic_xchg(&dc->running, 1)) + return; + + if (!d->c && + BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { + struct closure cl; + closure_init_stack(&cl); + + SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE); + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + } + + add_disk(d->disk); +#if 0 + char *env[] = { "SYMLINK=label" , NULL }; + kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); +#endif + if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || + sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) + pr_debug("error creating sysfs link"); +} + +static void cached_dev_detach_finish(struct work_struct *w) +{ + struct cached_dev *dc = container_of(w, struct cached_dev, detach); + char buf[BDEVNAME_SIZE]; + struct closure cl; + closure_init_stack(&cl); + + BUG_ON(!atomic_read(&dc->disk.detaching)); + BUG_ON(atomic_read(&dc->count)); + + sysfs_remove_link(&dc->disk.c->kobj, dc->disk.name); + sysfs_remove_link(&dc->disk.kobj, "cache"); + + mutex_lock(&bch_register_lock); + + memset(&dc->sb.set_uuid, 0, 16); + SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); + + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + + bcache_device_detach(&dc->disk); + list_move(&dc->list, &uncached_devices); + + mutex_unlock(&bch_register_lock); + + pr_info("Caching disabled for %s", bdevname(dc->bdev, buf)); + + /* Drop ref we took in cached_dev_detach() */ + closure_put(&dc->disk.cl); +} + +void bch_cached_dev_detach(struct cached_dev *dc) +{ + lockdep_assert_held(&bch_register_lock); + + if (atomic_read(&dc->disk.closing)) + return; + + if (atomic_xchg(&dc->disk.detaching, 1)) + return; + + /* + * Block the device from being closed and freed until we're finished + * detaching + */ + closure_get(&dc->disk.cl); + + bch_writeback_queue(dc); + cached_dev_put(dc); +} + +int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) +{ + uint32_t rtime = cpu_to_le32(get_seconds()); + struct uuid_entry *u; + char buf[BDEVNAME_SIZE]; + + bdevname(dc->bdev, buf); + + if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)) + return -ENOENT; + + if (dc->disk.c) { + pr_err("Can't attach %s: already attached", buf); + return -EINVAL; + } + + if (test_bit(CACHE_SET_STOPPING, &c->flags)) { + pr_err("Can't attach %s: shutting down", buf); + return -EINVAL; + } + + if (dc->sb.block_size < c->sb.block_size) { + /* Will die */ + pr_err("Couldn't attach %s: block size " + "less than set's block size", buf); + return -EINVAL; + } + + u = uuid_find(c, dc->sb.uuid); + + if (u && + (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE || + BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) { + memcpy(u->uuid, invalid_uuid, 16); + u->invalidated = cpu_to_le32(get_seconds()); + u = NULL; + } + + if (!u) { + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { + pr_err("Couldn't find uuid for %s in set", buf); + return -ENOENT; + } + + u = uuid_find_empty(c); + if (!u) { + pr_err("Not caching %s, no room for UUID", buf); + return -EINVAL; + } + } + + /* Deadlocks since we're called via sysfs... + sysfs_remove_file(&dc->kobj, &sysfs_attach); + */ + + if (is_zero(u->uuid, 16)) { + struct closure cl; + closure_init_stack(&cl); + + memcpy(u->uuid, dc->sb.uuid, 16); + memcpy(u->label, dc->sb.label, SB_LABEL_SIZE); + u->first_reg = u->last_reg = rtime; + bch_uuid_write(c); + + memcpy(dc->sb.set_uuid, c->sb.set_uuid, 16); + SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); + + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + } else { + u->last_reg = rtime; + bch_uuid_write(c); + } + + bcache_device_attach(&dc->disk, c, u - c->uuids); + bcache_device_link(&dc->disk, c, "bdev"); + list_move(&dc->list, &c->cached_devs); + calc_cached_dev_sectors(c); + + smp_wmb(); + /* + * dc->c must be set before dc->count != 0 - paired with the mb in + * cached_dev_get() + */ + atomic_set(&dc->count, 1); + + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { + atomic_set(&dc->has_dirty, 1); + atomic_inc(&dc->count); + bch_writeback_queue(dc); + } + + bch_cached_dev_run(dc); + + pr_info("Caching %s as %s on set %pU", + bdevname(dc->bdev, buf), dc->disk.disk->disk_name, + dc->disk.c->sb.set_uuid); + return 0; +} + +void bch_cached_dev_release(struct kobject *kobj) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + kfree(dc); + module_put(THIS_MODULE); +} + +static void cached_dev_free(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + + cancel_delayed_work_sync(&dc->writeback_rate_update); + + mutex_lock(&bch_register_lock); + + bcache_device_free(&dc->disk); + list_del(&dc->list); + + mutex_unlock(&bch_register_lock); + + if (!IS_ERR_OR_NULL(dc->bdev)) { + blk_sync_queue(bdev_get_queue(dc->bdev)); + blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + } + + wake_up(&unregister_wait); + + kobject_put(&dc->disk.kobj); +} + +static void cached_dev_flush(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + struct bcache_device *d = &dc->disk; + + bch_cache_accounting_destroy(&dc->accounting); + kobject_del(&d->kobj); + + continue_at(cl, cached_dev_free, system_wq); +} + +static int cached_dev_init(struct cached_dev *dc, unsigned block_size) +{ + int err; + struct io *io; + + closure_init(&dc->disk.cl, NULL); + set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); + + __module_get(THIS_MODULE); + INIT_LIST_HEAD(&dc->list); + kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); + + bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); + + err = bcache_device_init(&dc->disk, block_size); + if (err) + goto err; + + spin_lock_init(&dc->io_lock); + closure_init_unlocked(&dc->sb_write); + INIT_WORK(&dc->detach, cached_dev_detach_finish); + + dc->sequential_merge = true; + dc->sequential_cutoff = 4 << 20; + + INIT_LIST_HEAD(&dc->io_lru); + dc->sb_bio.bi_max_vecs = 1; + dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs; + + for (io = dc->io; io < dc->io + RECENT_IO; io++) { + list_add(&io->lru, &dc->io_lru); + hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); + } + + bch_writeback_init_cached_dev(dc); + return 0; +err: + bcache_device_stop(&dc->disk); + return err; +} + +/* Cached device - bcache superblock */ + +static const char *register_bdev(struct cache_sb *sb, struct page *sb_page, + struct block_device *bdev, + struct cached_dev *dc) +{ + char name[BDEVNAME_SIZE]; + const char *err = "cannot allocate memory"; + struct gendisk *g; + struct cache_set *c; + + if (!dc || cached_dev_init(dc, sb->block_size << 9) != 0) + return err; + + memcpy(&dc->sb, sb, sizeof(struct cache_sb)); + dc->sb_bio.bi_io_vec[0].bv_page = sb_page; + dc->bdev = bdev; + dc->bdev->bd_holder = dc; + + g = dc->disk.disk; + + set_capacity(g, dc->bdev->bd_part->nr_sects - 16); + + bch_cached_dev_request_init(dc); + + err = "error creating kobject"; + if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, + "bcache")) + goto err; + if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) + goto err; + + list_add(&dc->list, &uncached_devices); + list_for_each_entry(c, &bch_cache_sets, list) + bch_cached_dev_attach(dc, c); + + if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || + BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) + bch_cached_dev_run(dc); + + return NULL; +err: + kobject_put(&dc->disk.kobj); + pr_notice("error opening %s: %s", bdevname(bdev, name), err); + /* + * Return NULL instead of an error because kobject_put() cleans + * everything up + */ + return NULL; +} + +/* Flash only volumes */ + +void bch_flash_dev_release(struct kobject *kobj) +{ + struct bcache_device *d = container_of(kobj, struct bcache_device, + kobj); + kfree(d); +} + +static void flash_dev_free(struct closure *cl) +{ + struct bcache_device *d = container_of(cl, struct bcache_device, cl); + bcache_device_free(d); + kobject_put(&d->kobj); +} + +static void flash_dev_flush(struct closure *cl) +{ + struct bcache_device *d = container_of(cl, struct bcache_device, cl); + + sysfs_remove_link(&d->c->kobj, d->name); + sysfs_remove_link(&d->kobj, "cache"); + kobject_del(&d->kobj); + continue_at(cl, flash_dev_free, system_wq); +} + +static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) +{ + struct bcache_device *d = kzalloc(sizeof(struct bcache_device), + GFP_KERNEL); + if (!d) + return -ENOMEM; + + closure_init(&d->cl, NULL); + set_closure_fn(&d->cl, flash_dev_flush, system_wq); + + kobject_init(&d->kobj, &bch_flash_dev_ktype); + + if (bcache_device_init(d, block_bytes(c))) + goto err; + + bcache_device_attach(d, c, u - c->uuids); + set_capacity(d->disk, u->sectors); + bch_flash_dev_request_init(d); + add_disk(d->disk); + + if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache")) + goto err; + + bcache_device_link(d, c, "volume"); + + return 0; +err: + kobject_put(&d->kobj); + return -ENOMEM; +} + +static int flash_devs_run(struct cache_set *c) +{ + int ret = 0; + struct uuid_entry *u; + + for (u = c->uuids; + u < c->uuids + c->nr_uuids && !ret; + u++) + if (UUID_FLASH_ONLY(u)) + ret = flash_dev_run(c, u); + + return ret; +} + +int bch_flash_dev_create(struct cache_set *c, uint64_t size) +{ + struct uuid_entry *u; + + if (test_bit(CACHE_SET_STOPPING, &c->flags)) + return -EINTR; + + u = uuid_find_empty(c); + if (!u) { + pr_err("Can't create volume, no room for UUID"); + return -EINVAL; + } + + get_random_bytes(u->uuid, 16); + memset(u->label, 0, 32); + u->first_reg = u->last_reg = cpu_to_le32(get_seconds()); + + SET_UUID_FLASH_ONLY(u, 1); + u->sectors = size >> 9; + + bch_uuid_write(c); + + return flash_dev_run(c, u); +} + +/* Cache set */ + +__printf(2, 3) +bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) +{ + va_list args; + + if (test_bit(CACHE_SET_STOPPING, &c->flags)) + return false; + + /* XXX: we can be called from atomic context + acquire_console_sem(); + */ + + printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid); + + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + + printk(", disabling caching\n"); + + bch_cache_set_unregister(c); + return true; +} + +void bch_cache_set_release(struct kobject *kobj) +{ + struct cache_set *c = container_of(kobj, struct cache_set, kobj); + kfree(c); + module_put(THIS_MODULE); +} + +static void cache_set_free(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, cl); + struct cache *ca; + unsigned i; + + if (!IS_ERR_OR_NULL(c->debug)) + debugfs_remove(c->debug); + + bch_open_buckets_free(c); + bch_btree_cache_free(c); + bch_journal_free(c); + + for_each_cache(ca, c, i) + if (ca) + kobject_put(&ca->kobj); + + free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); + free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); + + kfree(c->fill_iter); + if (c->bio_split) + bioset_free(c->bio_split); + if (c->bio_meta) + mempool_destroy(c->bio_meta); + if (c->search) + mempool_destroy(c->search); + kfree(c->devices); + + mutex_lock(&bch_register_lock); + list_del(&c->list); + mutex_unlock(&bch_register_lock); + + pr_info("Cache set %pU unregistered", c->sb.set_uuid); + wake_up(&unregister_wait); + + closure_debug_destroy(&c->cl); + kobject_put(&c->kobj); +} + +static void cache_set_flush(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, caching); + struct btree *b; + + /* Shut down allocator threads */ + set_bit(CACHE_SET_STOPPING_2, &c->flags); + wake_up(&c->alloc_wait); + + bch_cache_accounting_destroy(&c->accounting); + + kobject_put(&c->internal); + kobject_del(&c->kobj); + + if (!IS_ERR_OR_NULL(c->root)) + list_add(&c->root->list, &c->btree_cache); + + /* Should skip this if we're unregistering because of an error */ + list_for_each_entry(b, &c->btree_cache, list) + if (btree_node_dirty(b)) + bch_btree_write(b, true, NULL); + + closure_return(cl); +} + +static void __cache_set_unregister(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, caching); + struct cached_dev *dc, *t; + size_t i; + + mutex_lock(&bch_register_lock); + + if (test_bit(CACHE_SET_UNREGISTERING, &c->flags)) + list_for_each_entry_safe(dc, t, &c->cached_devs, list) + bch_cached_dev_detach(dc); + + for (i = 0; i < c->nr_uuids; i++) + if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i])) + bcache_device_stop(c->devices[i]); + + mutex_unlock(&bch_register_lock); + + continue_at(cl, cache_set_flush, system_wq); +} + +void bch_cache_set_stop(struct cache_set *c) +{ + if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) + closure_queue(&c->caching); +} + +void bch_cache_set_unregister(struct cache_set *c) +{ + set_bit(CACHE_SET_UNREGISTERING, &c->flags); + bch_cache_set_stop(c); +} + +#define alloc_bucket_pages(gfp, c) \ + ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(c)))) + +struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) +{ + int iter_size; + struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); + if (!c) + return NULL; + + __module_get(THIS_MODULE); + closure_init(&c->cl, NULL); + set_closure_fn(&c->cl, cache_set_free, system_wq); + + closure_init(&c->caching, &c->cl); + set_closure_fn(&c->caching, __cache_set_unregister, system_wq); + + /* Maybe create continue_at_noreturn() and use it here? */ + closure_set_stopped(&c->cl); + closure_put(&c->cl); + + kobject_init(&c->kobj, &bch_cache_set_ktype); + kobject_init(&c->internal, &bch_cache_set_internal_ktype); + + bch_cache_accounting_init(&c->accounting, &c->cl); + + memcpy(c->sb.set_uuid, sb->set_uuid, 16); + c->sb.block_size = sb->block_size; + c->sb.bucket_size = sb->bucket_size; + c->sb.nr_in_set = sb->nr_in_set; + c->sb.last_mount = sb->last_mount; + c->bucket_bits = ilog2(sb->bucket_size); + c->block_bits = ilog2(sb->block_size); + c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); + + c->btree_pages = c->sb.bucket_size / PAGE_SECTORS; + if (c->btree_pages > BTREE_MAX_PAGES) + c->btree_pages = max_t(int, c->btree_pages / 4, + BTREE_MAX_PAGES); + + init_waitqueue_head(&c->alloc_wait); + mutex_init(&c->bucket_lock); + mutex_init(&c->fill_lock); + mutex_init(&c->sort_lock); + spin_lock_init(&c->sort_time_lock); + closure_init_unlocked(&c->sb_write); + closure_init_unlocked(&c->uuid_write); + spin_lock_init(&c->btree_read_time_lock); + bch_moving_init_cache_set(c); + + INIT_LIST_HEAD(&c->list); + INIT_LIST_HEAD(&c->cached_devs); + INIT_LIST_HEAD(&c->btree_cache); + INIT_LIST_HEAD(&c->btree_cache_freeable); + INIT_LIST_HEAD(&c->btree_cache_freed); + INIT_LIST_HEAD(&c->data_buckets); + + c->search = mempool_create_slab_pool(32, bch_search_cache); + if (!c->search) + goto err; + + iter_size = (sb->bucket_size / sb->block_size + 1) * + sizeof(struct btree_iter_set); + + if (!(c->devices = kzalloc(c->nr_uuids * sizeof(void *), GFP_KERNEL)) || + !(c->bio_meta = mempool_create_kmalloc_pool(2, + sizeof(struct bbio) + sizeof(struct bio_vec) * + bucket_pages(c))) || + !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || + !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) || + !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) || + !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || + bch_journal_alloc(c) || + bch_btree_cache_alloc(c) || + bch_open_buckets_alloc(c)) + goto err; + + c->fill_iter->size = sb->bucket_size / sb->block_size; + + c->congested_read_threshold_us = 2000; + c->congested_write_threshold_us = 20000; + c->error_limit = 8 << IO_ERROR_SHIFT; + + return c; +err: + bch_cache_set_unregister(c); + return NULL; +} + +static void run_cache_set(struct cache_set *c) +{ + const char *err = "cannot allocate memory"; + struct cached_dev *dc, *t; + struct cache *ca; + unsigned i; + + struct btree_op op; + bch_btree_op_init_stack(&op); + op.lock = SHRT_MAX; + + for_each_cache(ca, c, i) + c->nbuckets += ca->sb.nbuckets; + + if (CACHE_SYNC(&c->sb)) { + LIST_HEAD(journal); + struct bkey *k; + struct jset *j; + + err = "cannot allocate memory for journal"; + if (bch_journal_read(c, &journal, &op)) + goto err; + + pr_debug("btree_journal_read() done"); + + err = "no journal entries found"; + if (list_empty(&journal)) + goto err; + + j = &list_entry(journal.prev, struct journal_replay, list)->j; + + err = "IO error reading priorities"; + for_each_cache(ca, c, i) + prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]); + + /* + * If prio_read() fails it'll call cache_set_error and we'll + * tear everything down right away, but if we perhaps checked + * sooner we could avoid journal replay. + */ + + k = &j->btree_root; + + err = "bad btree root"; + if (__bch_ptr_invalid(c, j->btree_level + 1, k)) + goto err; + + err = "error reading btree root"; + c->root = bch_btree_node_get(c, k, j->btree_level, &op); + if (IS_ERR_OR_NULL(c->root)) + goto err; + + list_del_init(&c->root->list); + rw_unlock(true, c->root); + + err = uuid_read(c, j, &op.cl); + if (err) + goto err; + + err = "error in recovery"; + if (bch_btree_check(c, &op)) + goto err; + + bch_journal_mark(c, &journal); + bch_btree_gc_finish(c); + pr_debug("btree_check() done"); + + /* + * bcache_journal_next() can't happen sooner, or + * btree_gc_finish() will give spurious errors about last_gc > + * gc_gen - this is a hack but oh well. + */ + bch_journal_next(&c->journal); + + for_each_cache(ca, c, i) + closure_call(&ca->alloc, bch_allocator_thread, + system_wq, &c->cl); + + /* + * First place it's safe to allocate: btree_check() and + * btree_gc_finish() have to run before we have buckets to + * allocate, and bch_bucket_alloc_set() might cause a journal + * entry to be written so bcache_journal_next() has to be called + * first. + * + * If the uuids were in the old format we have to rewrite them + * before the next journal entry is written: + */ + if (j->version < BCACHE_JSET_VERSION_UUID) + __uuid_write(c); + + bch_journal_replay(c, &journal, &op); + } else { + pr_notice("invalidating existing data"); + /* Don't want invalidate_buckets() to queue a gc yet */ + closure_lock(&c->gc, NULL); + + for_each_cache(ca, c, i) { + unsigned j; + + ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, + 2, SB_JOURNAL_BUCKETS); + + for (j = 0; j < ca->sb.keys; j++) + ca->sb.d[j] = ca->sb.first_bucket + j; + } + + bch_btree_gc_finish(c); + + for_each_cache(ca, c, i) + closure_call(&ca->alloc, bch_allocator_thread, + ca->alloc_workqueue, &c->cl); + + mutex_lock(&c->bucket_lock); + for_each_cache(ca, c, i) + bch_prio_write(ca); + mutex_unlock(&c->bucket_lock); + + wake_up(&c->alloc_wait); + + err = "cannot allocate new UUID bucket"; + if (__uuid_write(c)) + goto err_unlock_gc; + + err = "cannot allocate new btree root"; + c->root = bch_btree_node_alloc(c, 0, &op.cl); + if (IS_ERR_OR_NULL(c->root)) + goto err_unlock_gc; + + bkey_copy_key(&c->root->key, &MAX_KEY); + bch_btree_write(c->root, true, &op); + + bch_btree_set_root(c->root); + rw_unlock(true, c->root); + + /* + * We don't want to write the first journal entry until + * everything is set up - fortunately journal entries won't be + * written until the SET_CACHE_SYNC() here: + */ + SET_CACHE_SYNC(&c->sb, true); + + bch_journal_next(&c->journal); + bch_journal_meta(c, &op.cl); + + /* Unlock */ + closure_set_stopped(&c->gc.cl); + closure_put(&c->gc.cl); + } + + closure_sync(&op.cl); + c->sb.last_mount = get_seconds(); + bcache_write_super(c); + + list_for_each_entry_safe(dc, t, &uncached_devices, list) + bch_cached_dev_attach(dc, c); + + flash_devs_run(c); + + return; +err_unlock_gc: + closure_set_stopped(&c->gc.cl); + closure_put(&c->gc.cl); +err: + closure_sync(&op.cl); + /* XXX: test this, it's broken */ + bch_cache_set_error(c, err); +} + +static bool can_attach_cache(struct cache *ca, struct cache_set *c) +{ + return ca->sb.block_size == c->sb.block_size && + ca->sb.bucket_size == c->sb.block_size && + ca->sb.nr_in_set == c->sb.nr_in_set; +} + +static const char *register_cache_set(struct cache *ca) +{ + char buf[12]; + const char *err = "cannot allocate memory"; + struct cache_set *c; + + list_for_each_entry(c, &bch_cache_sets, list) + if (!memcmp(c->sb.set_uuid, ca->sb.set_uuid, 16)) { + if (c->cache[ca->sb.nr_this_dev]) + return "duplicate cache set member"; + + if (!can_attach_cache(ca, c)) + return "cache sb does not match set"; + + if (!CACHE_SYNC(&ca->sb)) + SET_CACHE_SYNC(&c->sb, false); + + goto found; + } + + c = bch_cache_set_alloc(&ca->sb); + if (!c) + return err; + + err = "error creating kobject"; + if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->sb.set_uuid) || + kobject_add(&c->internal, &c->kobj, "internal")) + goto err; + + if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) + goto err; + + bch_debug_init_cache_set(c); + + list_add(&c->list, &bch_cache_sets); +found: + sprintf(buf, "cache%i", ca->sb.nr_this_dev); + if (sysfs_create_link(&ca->kobj, &c->kobj, "set") || + sysfs_create_link(&c->kobj, &ca->kobj, buf)) + goto err; + + if (ca->sb.seq > c->sb.seq) { + c->sb.version = ca->sb.version; + memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); + c->sb.flags = ca->sb.flags; + c->sb.seq = ca->sb.seq; + pr_debug("set version = %llu", c->sb.version); + } + + ca->set = c; + ca->set->cache[ca->sb.nr_this_dev] = ca; + c->cache_by_alloc[c->caches_loaded++] = ca; + + if (c->caches_loaded == c->sb.nr_in_set) + run_cache_set(c); + + return NULL; +err: + bch_cache_set_unregister(c); + return err; +} + +/* Cache device */ + +void bch_cache_release(struct kobject *kobj) +{ + struct cache *ca = container_of(kobj, struct cache, kobj); + + if (ca->set) + ca->set->cache[ca->sb.nr_this_dev] = NULL; + + bch_cache_allocator_exit(ca); + + bio_split_pool_free(&ca->bio_split_hook); + + if (ca->alloc_workqueue) + destroy_workqueue(ca->alloc_workqueue); + + free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); + kfree(ca->prio_buckets); + vfree(ca->buckets); + + free_heap(&ca->heap); + free_fifo(&ca->unused); + free_fifo(&ca->free_inc); + free_fifo(&ca->free); + + if (ca->sb_bio.bi_inline_vecs[0].bv_page) + put_page(ca->sb_bio.bi_io_vec[0].bv_page); + + if (!IS_ERR_OR_NULL(ca->bdev)) { + blk_sync_queue(bdev_get_queue(ca->bdev)); + blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + } + + kfree(ca); + module_put(THIS_MODULE); +} + +static int cache_alloc(struct cache_sb *sb, struct cache *ca) +{ + size_t free; + struct bucket *b; + + if (!ca) + return -ENOMEM; + + __module_get(THIS_MODULE); + kobject_init(&ca->kobj, &bch_cache_ktype); + + memcpy(&ca->sb, sb, sizeof(struct cache_sb)); + + INIT_LIST_HEAD(&ca->discards); + + bio_init(&ca->sb_bio); + ca->sb_bio.bi_max_vecs = 1; + ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs; + + bio_init(&ca->journal.bio); + ca->journal.bio.bi_max_vecs = 8; + ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; + + free = roundup_pow_of_two(ca->sb.nbuckets) >> 9; + free = max_t(size_t, free, (prio_buckets(ca) + 8) * 2); + + if (!init_fifo(&ca->free, free, GFP_KERNEL) || + !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || + !init_fifo(&ca->unused, free << 2, GFP_KERNEL) || + !init_heap(&ca->heap, free << 3, GFP_KERNEL) || + !(ca->buckets = vmalloc(sizeof(struct bucket) * + ca->sb.nbuckets)) || + !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * + 2, GFP_KERNEL)) || + !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || + !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) || + bio_split_pool_init(&ca->bio_split_hook)) + goto err; + + ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); + + memset(ca->buckets, 0, ca->sb.nbuckets * sizeof(struct bucket)); + for_each_bucket(b, ca) + atomic_set(&b->pin, 0); + + if (bch_cache_allocator_init(ca)) + goto err; + + return 0; +err: + kobject_put(&ca->kobj); + return -ENOMEM; +} + +static const char *register_cache(struct cache_sb *sb, struct page *sb_page, + struct block_device *bdev, struct cache *ca) +{ + char name[BDEVNAME_SIZE]; + const char *err = "cannot allocate memory"; + + if (cache_alloc(sb, ca) != 0) + return err; + + ca->sb_bio.bi_io_vec[0].bv_page = sb_page; + ca->bdev = bdev; + ca->bdev->bd_holder = ca; + + if (blk_queue_discard(bdev_get_queue(ca->bdev))) + ca->discard = CACHE_DISCARD(&ca->sb); + + err = "error creating kobject"; + if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) + goto err; + + err = register_cache_set(ca); + if (err) + goto err; + + pr_info("registered cache device %s", bdevname(bdev, name)); + + return NULL; +err: + kobject_put(&ca->kobj); + pr_info("error opening %s: %s", bdevname(bdev, name), err); + /* Return NULL instead of an error because kobject_put() cleans + * everything up + */ + return NULL; +} + +/* Global interfaces/init */ + +static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, + const char *, size_t); + +kobj_attribute_write(register, register_bcache); +kobj_attribute_write(register_quiet, register_bcache); + +static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + const char *buffer, size_t size) +{ + ssize_t ret = size; + const char *err = "cannot allocate memory"; + char *path = NULL; + struct cache_sb *sb = NULL; + struct block_device *bdev = NULL; + struct page *sb_page = NULL; + + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + + mutex_lock(&bch_register_lock); + + if (!(path = kstrndup(buffer, size, GFP_KERNEL)) || + !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL))) + goto err; + + err = "failed to open device"; + bdev = blkdev_get_by_path(strim(path), + FMODE_READ|FMODE_WRITE|FMODE_EXCL, + sb); + if (bdev == ERR_PTR(-EBUSY)) + err = "device busy"; + + if (IS_ERR(bdev) || + set_blocksize(bdev, 4096)) + goto err; + + err = read_super(sb, bdev, &sb_page); + if (err) + goto err_close; + + if (sb->version == CACHE_BACKING_DEV) { + struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); + + err = register_bdev(sb, sb_page, bdev, dc); + } else { + struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); + + err = register_cache(sb, sb_page, bdev, ca); + } + + if (err) { + /* register_(bdev|cache) will only return an error if they + * didn't get far enough to create the kobject - if they did, + * the kobject destructor will do this cleanup. + */ + put_page(sb_page); +err_close: + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); +err: + if (attr != &ksysfs_register_quiet) + pr_info("error opening %s: %s", path, err); + ret = -EINVAL; + } + + kfree(sb); + kfree(path); + mutex_unlock(&bch_register_lock); + module_put(THIS_MODULE); + return ret; +} + +static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) +{ + if (code == SYS_DOWN || + code == SYS_HALT || + code == SYS_POWER_OFF) { + DEFINE_WAIT(wait); + unsigned long start = jiffies; + bool stopped = false; + + struct cache_set *c, *tc; + struct cached_dev *dc, *tdc; + + mutex_lock(&bch_register_lock); + + if (list_empty(&bch_cache_sets) && + list_empty(&uncached_devices)) + goto out; + + pr_info("Stopping all devices:"); + + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + bch_cache_set_stop(c); + + list_for_each_entry_safe(dc, tdc, &uncached_devices, list) + bcache_device_stop(&dc->disk); + + /* What's a condition variable? */ + while (1) { + long timeout = start + 2 * HZ - jiffies; + + stopped = list_empty(&bch_cache_sets) && + list_empty(&uncached_devices); + + if (timeout < 0 || stopped) + break; + + prepare_to_wait(&unregister_wait, &wait, + TASK_UNINTERRUPTIBLE); + + mutex_unlock(&bch_register_lock); + schedule_timeout(timeout); + mutex_lock(&bch_register_lock); + } + + finish_wait(&unregister_wait, &wait); + + if (stopped) + pr_info("All devices stopped"); + else + pr_notice("Timeout waiting for devices to be closed"); +out: + mutex_unlock(&bch_register_lock); + } + + return NOTIFY_DONE; +} + +static struct notifier_block reboot = { + .notifier_call = bcache_reboot, + .priority = INT_MAX, /* before any real devices */ +}; + +static void bcache_exit(void) +{ + bch_debug_exit(); + bch_writeback_exit(); + bch_request_exit(); + bch_btree_exit(); + if (bcache_kobj) + kobject_put(bcache_kobj); + if (bcache_wq) + destroy_workqueue(bcache_wq); + unregister_blkdev(bcache_major, "bcache"); + unregister_reboot_notifier(&reboot); +} + +static int __init bcache_init(void) +{ + static const struct attribute *files[] = { + &ksysfs_register.attr, + &ksysfs_register_quiet.attr, + NULL + }; + + mutex_init(&bch_register_lock); + init_waitqueue_head(&unregister_wait); + register_reboot_notifier(&reboot); + + bcache_major = register_blkdev(0, "bcache"); + if (bcache_major < 0) + return bcache_major; + + if (!(bcache_wq = create_workqueue("bcache")) || + !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || + sysfs_create_files(bcache_kobj, files) || + bch_btree_init() || + bch_request_init() || + bch_writeback_init() || + bch_debug_init(bcache_kobj)) + goto err; + + return 0; +err: + bcache_exit(); + return -ENOMEM; +} + +module_exit(bcache_exit); +module_init(bcache_init); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c new file mode 100644 index 000000000000..5c7e77073b1f --- /dev/null +++ b/drivers/md/bcache/sysfs.c @@ -0,0 +1,817 @@ +/* + * bcache sysfs interfaces + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "sysfs.h" +#include "btree.h" +#include "request.h" + +#include + +static const char * const cache_replacement_policies[] = { + "lru", + "fifo", + "random", + NULL +}; + +write_attribute(attach); +write_attribute(detach); +write_attribute(unregister); +write_attribute(stop); +write_attribute(clear_stats); +write_attribute(trigger_gc); +write_attribute(prune_cache); +write_attribute(flash_vol_create); + +read_attribute(bucket_size); +read_attribute(block_size); +read_attribute(nbuckets); +read_attribute(tree_depth); +read_attribute(root_usage_percent); +read_attribute(priority_stats); +read_attribute(btree_cache_size); +read_attribute(btree_cache_max_chain); +read_attribute(cache_available_percent); +read_attribute(written); +read_attribute(btree_written); +read_attribute(metadata_written); +read_attribute(active_journal_entries); + +sysfs_time_stats_attribute(btree_gc, sec, ms); +sysfs_time_stats_attribute(btree_split, sec, us); +sysfs_time_stats_attribute(btree_sort, ms, us); +sysfs_time_stats_attribute(btree_read, ms, us); +sysfs_time_stats_attribute(try_harder, ms, us); + +read_attribute(btree_nodes); +read_attribute(btree_used_percent); +read_attribute(average_key_size); +read_attribute(dirty_data); +read_attribute(bset_tree_stats); + +read_attribute(state); +read_attribute(cache_read_races); +read_attribute(writeback_keys_done); +read_attribute(writeback_keys_failed); +read_attribute(io_errors); +read_attribute(congested); +rw_attribute(congested_read_threshold_us); +rw_attribute(congested_write_threshold_us); + +rw_attribute(sequential_cutoff); +rw_attribute(sequential_merge); +rw_attribute(data_csum); +rw_attribute(cache_mode); +rw_attribute(writeback_metadata); +rw_attribute(writeback_running); +rw_attribute(writeback_percent); +rw_attribute(writeback_delay); +rw_attribute(writeback_rate); + +rw_attribute(writeback_rate_update_seconds); +rw_attribute(writeback_rate_d_term); +rw_attribute(writeback_rate_p_term_inverse); +rw_attribute(writeback_rate_d_smooth); +read_attribute(writeback_rate_debug); + +rw_attribute(synchronous); +rw_attribute(journal_delay_ms); +rw_attribute(discard); +rw_attribute(running); +rw_attribute(label); +rw_attribute(readahead); +rw_attribute(io_error_limit); +rw_attribute(io_error_halflife); +rw_attribute(verify); +rw_attribute(key_merging_disabled); +rw_attribute(gc_always_rewrite); +rw_attribute(freelist_percent); +rw_attribute(cache_replacement_policy); +rw_attribute(btree_shrinker_disabled); +rw_attribute(copy_gc_enabled); +rw_attribute(size); + +SHOW(__bch_cached_dev) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + const char *states[] = { "no cache", "clean", "dirty", "inconsistent" }; + +#define var(stat) (dc->stat) + + if (attr == &sysfs_cache_mode) + return snprint_string_list(buf, PAGE_SIZE, + bch_cache_modes + 1, + BDEV_CACHE_MODE(&dc->sb)); + + sysfs_printf(data_csum, "%i", dc->disk.data_csum); + var_printf(verify, "%i"); + var_printf(writeback_metadata, "%i"); + var_printf(writeback_running, "%i"); + var_print(writeback_delay); + var_print(writeback_percent); + sysfs_print(writeback_rate, dc->writeback_rate.rate); + + var_print(writeback_rate_update_seconds); + var_print(writeback_rate_d_term); + var_print(writeback_rate_p_term_inverse); + var_print(writeback_rate_d_smooth); + + if (attr == &sysfs_writeback_rate_debug) { + char dirty[20]; + char derivative[20]; + char target[20]; + hprint(dirty, + atomic_long_read(&dc->disk.sectors_dirty) << 9); + hprint(derivative, dc->writeback_rate_derivative << 9); + hprint(target, dc->writeback_rate_target << 9); + + return sprintf(buf, + "rate:\t\t%u\n" + "change:\t\t%i\n" + "dirty:\t\t%s\n" + "derivative:\t%s\n" + "target:\t\t%s\n", + dc->writeback_rate.rate, + dc->writeback_rate_change, + dirty, derivative, target); + } + + sysfs_hprint(dirty_data, + atomic_long_read(&dc->disk.sectors_dirty) << 9); + + var_printf(sequential_merge, "%i"); + var_hprint(sequential_cutoff); + var_hprint(readahead); + + sysfs_print(running, atomic_read(&dc->running)); + sysfs_print(state, states[BDEV_STATE(&dc->sb)]); + + if (attr == &sysfs_label) { + memcpy(buf, dc->sb.label, SB_LABEL_SIZE); + buf[SB_LABEL_SIZE + 1] = '\0'; + strcat(buf, "\n"); + return strlen(buf); + } + +#undef var + return 0; +} +SHOW_LOCKED(bch_cached_dev) + +STORE(__cached_dev) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + unsigned v = size; + struct cache_set *c; + +#define d_strtoul(var) sysfs_strtoul(var, dc->var) +#define d_strtoi_h(var) sysfs_hatoi(var, dc->var) + + sysfs_strtoul(data_csum, dc->disk.data_csum); + d_strtoul(verify); + d_strtoul(writeback_metadata); + d_strtoul(writeback_running); + d_strtoul(writeback_delay); + sysfs_strtoul_clamp(writeback_rate, + dc->writeback_rate.rate, 1, 1000000); + sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); + + d_strtoul(writeback_rate_update_seconds); + d_strtoul(writeback_rate_d_term); + d_strtoul(writeback_rate_p_term_inverse); + sysfs_strtoul_clamp(writeback_rate_p_term_inverse, + dc->writeback_rate_p_term_inverse, 1, INT_MAX); + d_strtoul(writeback_rate_d_smooth); + + d_strtoul(sequential_merge); + d_strtoi_h(sequential_cutoff); + d_strtoi_h(readahead); + + if (attr == &sysfs_clear_stats) + bch_cache_accounting_clear(&dc->accounting); + + if (attr == &sysfs_running && + strtoul_or_return(buf)) + bch_cached_dev_run(dc); + + if (attr == &sysfs_cache_mode) { + ssize_t v = read_string_list(buf, bch_cache_modes + 1); + + if (v < 0) + return v; + + if ((unsigned) v != BDEV_CACHE_MODE(&dc->sb)) { + SET_BDEV_CACHE_MODE(&dc->sb, v); + bch_write_bdev_super(dc, NULL); + } + } + + if (attr == &sysfs_label) { + memcpy(dc->sb.label, buf, SB_LABEL_SIZE); + bch_write_bdev_super(dc, NULL); + if (dc->disk.c) { + memcpy(dc->disk.c->uuids[dc->disk.id].label, + buf, SB_LABEL_SIZE); + bch_uuid_write(dc->disk.c); + } + } + + if (attr == &sysfs_attach) { + if (parse_uuid(buf, dc->sb.set_uuid) < 16) + return -EINVAL; + + list_for_each_entry(c, &bch_cache_sets, list) { + v = bch_cached_dev_attach(dc, c); + if (!v) + return size; + } + + pr_err("Can't attach %s: cache set not found", buf); + size = v; + } + + if (attr == &sysfs_detach && dc->disk.c) + bch_cached_dev_detach(dc); + + if (attr == &sysfs_stop) + bcache_device_stop(&dc->disk); + + return size; +} + +STORE(bch_cached_dev) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + + mutex_lock(&bch_register_lock); + size = __cached_dev_store(kobj, attr, buf, size); + + if (attr == &sysfs_writeback_running) + bch_writeback_queue(dc); + + if (attr == &sysfs_writeback_percent) + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); + + mutex_unlock(&bch_register_lock); + return size; +} + +static struct attribute *bch_cached_dev_files[] = { + &sysfs_attach, + &sysfs_detach, + &sysfs_stop, +#if 0 + &sysfs_data_csum, +#endif + &sysfs_cache_mode, + &sysfs_writeback_metadata, + &sysfs_writeback_running, + &sysfs_writeback_delay, + &sysfs_writeback_percent, + &sysfs_writeback_rate, + &sysfs_writeback_rate_update_seconds, + &sysfs_writeback_rate_d_term, + &sysfs_writeback_rate_p_term_inverse, + &sysfs_writeback_rate_d_smooth, + &sysfs_writeback_rate_debug, + &sysfs_dirty_data, + &sysfs_sequential_cutoff, + &sysfs_sequential_merge, + &sysfs_clear_stats, + &sysfs_running, + &sysfs_state, + &sysfs_label, + &sysfs_readahead, +#ifdef CONFIG_BCACHE_DEBUG + &sysfs_verify, +#endif + NULL +}; +KTYPE(bch_cached_dev); + +SHOW(bch_flash_dev) +{ + struct bcache_device *d = container_of(kobj, struct bcache_device, + kobj); + struct uuid_entry *u = &d->c->uuids[d->id]; + + sysfs_printf(data_csum, "%i", d->data_csum); + sysfs_hprint(size, u->sectors << 9); + + if (attr == &sysfs_label) { + memcpy(buf, u->label, SB_LABEL_SIZE); + buf[SB_LABEL_SIZE + 1] = '\0'; + strcat(buf, "\n"); + return strlen(buf); + } + + return 0; +} + +STORE(__bch_flash_dev) +{ + struct bcache_device *d = container_of(kobj, struct bcache_device, + kobj); + struct uuid_entry *u = &d->c->uuids[d->id]; + + sysfs_strtoul(data_csum, d->data_csum); + + if (attr == &sysfs_size) { + uint64_t v; + strtoi_h_or_return(buf, v); + + u->sectors = v >> 9; + bch_uuid_write(d->c); + set_capacity(d->disk, u->sectors); + } + + if (attr == &sysfs_label) { + memcpy(u->label, buf, SB_LABEL_SIZE); + bch_uuid_write(d->c); + } + + if (attr == &sysfs_unregister) { + atomic_set(&d->detaching, 1); + bcache_device_stop(d); + } + + return size; +} +STORE_LOCKED(bch_flash_dev) + +static struct attribute *bch_flash_dev_files[] = { + &sysfs_unregister, +#if 0 + &sysfs_data_csum, +#endif + &sysfs_label, + &sysfs_size, + NULL +}; +KTYPE(bch_flash_dev); + +SHOW(__bch_cache_set) +{ + unsigned root_usage(struct cache_set *c) + { + unsigned bytes = 0; + struct bkey *k; + struct btree *b; + struct btree_iter iter; + + goto lock_root; + + do { + rw_unlock(false, b); +lock_root: + b = c->root; + rw_lock(false, b, b->level); + } while (b != c->root); + + for_each_key_filter(b, k, &iter, bch_ptr_bad) + bytes += bkey_bytes(k); + + rw_unlock(false, b); + + return (bytes * 100) / btree_bytes(c); + } + + size_t cache_size(struct cache_set *c) + { + size_t ret = 0; + struct btree *b; + + mutex_lock(&c->bucket_lock); + list_for_each_entry(b, &c->btree_cache, list) + ret += 1 << (b->page_order + PAGE_SHIFT); + + mutex_unlock(&c->bucket_lock); + return ret; + } + + unsigned cache_max_chain(struct cache_set *c) + { + unsigned ret = 0; + struct hlist_head *h; + + mutex_lock(&c->bucket_lock); + + for (h = c->bucket_hash; + h < c->bucket_hash + (1 << BUCKET_HASH_BITS); + h++) { + unsigned i = 0; + struct hlist_node *p; + + hlist_for_each(p, h) + i++; + + ret = max(ret, i); + } + + mutex_unlock(&c->bucket_lock); + return ret; + } + + unsigned btree_used(struct cache_set *c) + { + return div64_u64(c->gc_stats.key_bytes * 100, + (c->gc_stats.nodes ?: 1) * btree_bytes(c)); + } + + unsigned average_key_size(struct cache_set *c) + { + return c->gc_stats.nkeys + ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys) + : 0; + } + + struct cache_set *c = container_of(kobj, struct cache_set, kobj); + + sysfs_print(synchronous, CACHE_SYNC(&c->sb)); + sysfs_print(journal_delay_ms, c->journal_delay_ms); + sysfs_hprint(bucket_size, bucket_bytes(c)); + sysfs_hprint(block_size, block_bytes(c)); + sysfs_print(tree_depth, c->root->level); + sysfs_print(root_usage_percent, root_usage(c)); + + sysfs_hprint(btree_cache_size, cache_size(c)); + sysfs_print(btree_cache_max_chain, cache_max_chain(c)); + sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use); + + sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); + sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); + sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us); + sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); + sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); + + sysfs_print(btree_used_percent, btree_used(c)); + sysfs_print(btree_nodes, c->gc_stats.nodes); + sysfs_hprint(dirty_data, c->gc_stats.dirty); + sysfs_hprint(average_key_size, average_key_size(c)); + + sysfs_print(cache_read_races, + atomic_long_read(&c->cache_read_races)); + + sysfs_print(writeback_keys_done, + atomic_long_read(&c->writeback_keys_done)); + sysfs_print(writeback_keys_failed, + atomic_long_read(&c->writeback_keys_failed)); + + /* See count_io_errors for why 88 */ + sysfs_print(io_error_halflife, c->error_decay * 88); + sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT); + + sysfs_hprint(congested, + ((uint64_t) bch_get_congested(c)) << 9); + sysfs_print(congested_read_threshold_us, + c->congested_read_threshold_us); + sysfs_print(congested_write_threshold_us, + c->congested_write_threshold_us); + + sysfs_print(active_journal_entries, fifo_used(&c->journal.pin)); + sysfs_printf(verify, "%i", c->verify); + sysfs_printf(key_merging_disabled, "%i", c->key_merging_disabled); + sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); + sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); + sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); + + if (attr == &sysfs_bset_tree_stats) + return bch_bset_print_stats(c, buf); + + return 0; +} +SHOW_LOCKED(bch_cache_set) + +STORE(__bch_cache_set) +{ + struct cache_set *c = container_of(kobj, struct cache_set, kobj); + + if (attr == &sysfs_unregister) + bch_cache_set_unregister(c); + + if (attr == &sysfs_stop) + bch_cache_set_stop(c); + + if (attr == &sysfs_synchronous) { + bool sync = strtoul_or_return(buf); + + if (sync != CACHE_SYNC(&c->sb)) { + SET_CACHE_SYNC(&c->sb, sync); + bcache_write_super(c); + } + } + + if (attr == &sysfs_flash_vol_create) { + int r; + uint64_t v; + strtoi_h_or_return(buf, v); + + r = bch_flash_dev_create(c, v); + if (r) + return r; + } + + if (attr == &sysfs_clear_stats) { + atomic_long_set(&c->writeback_keys_done, 0); + atomic_long_set(&c->writeback_keys_failed, 0); + + memset(&c->gc_stats, 0, sizeof(struct gc_stat)); + bch_cache_accounting_clear(&c->accounting); + } + + if (attr == &sysfs_trigger_gc) + bch_queue_gc(c); + + if (attr == &sysfs_prune_cache) { + struct shrink_control sc; + sc.gfp_mask = GFP_KERNEL; + sc.nr_to_scan = strtoul_or_return(buf); + c->shrink.shrink(&c->shrink, &sc); + } + + sysfs_strtoul(congested_read_threshold_us, + c->congested_read_threshold_us); + sysfs_strtoul(congested_write_threshold_us, + c->congested_write_threshold_us); + + if (attr == &sysfs_io_error_limit) + c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT; + + /* See count_io_errors() for why 88 */ + if (attr == &sysfs_io_error_halflife) + c->error_decay = strtoul_or_return(buf) / 88; + + sysfs_strtoul(journal_delay_ms, c->journal_delay_ms); + sysfs_strtoul(verify, c->verify); + sysfs_strtoul(key_merging_disabled, c->key_merging_disabled); + sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite); + sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled); + sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled); + + return size; +} +STORE_LOCKED(bch_cache_set) + +SHOW(bch_cache_set_internal) +{ + struct cache_set *c = container_of(kobj, struct cache_set, internal); + return bch_cache_set_show(&c->kobj, attr, buf); +} + +STORE(bch_cache_set_internal) +{ + struct cache_set *c = container_of(kobj, struct cache_set, internal); + return bch_cache_set_store(&c->kobj, attr, buf, size); +} + +static void bch_cache_set_internal_release(struct kobject *k) +{ +} + +static struct attribute *bch_cache_set_files[] = { + &sysfs_unregister, + &sysfs_stop, + &sysfs_synchronous, + &sysfs_journal_delay_ms, + &sysfs_flash_vol_create, + + &sysfs_bucket_size, + &sysfs_block_size, + &sysfs_tree_depth, + &sysfs_root_usage_percent, + &sysfs_btree_cache_size, + &sysfs_cache_available_percent, + + &sysfs_average_key_size, + &sysfs_dirty_data, + + &sysfs_io_error_limit, + &sysfs_io_error_halflife, + &sysfs_congested, + &sysfs_congested_read_threshold_us, + &sysfs_congested_write_threshold_us, + &sysfs_clear_stats, + NULL +}; +KTYPE(bch_cache_set); + +static struct attribute *bch_cache_set_internal_files[] = { + &sysfs_active_journal_entries, + + sysfs_time_stats_attribute_list(btree_gc, sec, ms) + sysfs_time_stats_attribute_list(btree_split, sec, us) + sysfs_time_stats_attribute_list(btree_sort, ms, us) + sysfs_time_stats_attribute_list(btree_read, ms, us) + sysfs_time_stats_attribute_list(try_harder, ms, us) + + &sysfs_btree_nodes, + &sysfs_btree_used_percent, + &sysfs_btree_cache_max_chain, + + &sysfs_bset_tree_stats, + &sysfs_cache_read_races, + &sysfs_writeback_keys_done, + &sysfs_writeback_keys_failed, + + &sysfs_trigger_gc, + &sysfs_prune_cache, +#ifdef CONFIG_BCACHE_DEBUG + &sysfs_verify, + &sysfs_key_merging_disabled, +#endif + &sysfs_gc_always_rewrite, + &sysfs_btree_shrinker_disabled, + &sysfs_copy_gc_enabled, + NULL +}; +KTYPE(bch_cache_set_internal); + +SHOW(__bch_cache) +{ + struct cache *ca = container_of(kobj, struct cache, kobj); + + sysfs_hprint(bucket_size, bucket_bytes(ca)); + sysfs_hprint(block_size, block_bytes(ca)); + sysfs_print(nbuckets, ca->sb.nbuckets); + sysfs_print(discard, ca->discard); + sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9); + sysfs_hprint(btree_written, + atomic_long_read(&ca->btree_sectors_written) << 9); + sysfs_hprint(metadata_written, + (atomic_long_read(&ca->meta_sectors_written) + + atomic_long_read(&ca->btree_sectors_written)) << 9); + + sysfs_print(io_errors, + atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT); + + sysfs_print(freelist_percent, ca->free.size * 100 / + ((size_t) ca->sb.nbuckets)); + + if (attr == &sysfs_cache_replacement_policy) + return snprint_string_list(buf, PAGE_SIZE, + cache_replacement_policies, + CACHE_REPLACEMENT(&ca->sb)); + + if (attr == &sysfs_priority_stats) { + int cmp(const void *l, const void *r) + { return *((uint16_t *) r) - *((uint16_t *) l); } + + /* Number of quantiles we compute */ + const unsigned nq = 31; + + size_t n = ca->sb.nbuckets, i, unused, btree; + uint64_t sum = 0; + uint16_t q[nq], *p, *cached; + ssize_t ret; + + cached = p = vmalloc(ca->sb.nbuckets * sizeof(uint16_t)); + if (!p) + return -ENOMEM; + + mutex_lock(&ca->set->bucket_lock); + for (i = ca->sb.first_bucket; i < n; i++) + p[i] = ca->buckets[i].prio; + mutex_unlock(&ca->set->bucket_lock); + + sort(p, n, sizeof(uint16_t), cmp, NULL); + + while (n && + !cached[n - 1]) + --n; + + unused = ca->sb.nbuckets - n; + + while (cached < p + n && + *cached == BTREE_PRIO) + cached++; + + btree = cached - p; + n -= btree; + + for (i = 0; i < n; i++) + sum += INITIAL_PRIO - cached[i]; + + if (n) + do_div(sum, n); + + for (i = 0; i < nq; i++) + q[i] = INITIAL_PRIO - cached[n * (i + 1) / (nq + 1)]; + + vfree(p); + + ret = snprintf(buf, PAGE_SIZE, + "Unused: %zu%%\n" + "Metadata: %zu%%\n" + "Average: %llu\n" + "Sectors per Q: %zu\n" + "Quantiles: [", + unused * 100 / (size_t) ca->sb.nbuckets, + btree * 100 / (size_t) ca->sb.nbuckets, sum, + n * ca->sb.bucket_size / (nq + 1)); + + for (i = 0; i < nq && ret < (ssize_t) PAGE_SIZE; i++) + ret += snprintf(buf + ret, PAGE_SIZE - ret, + i < nq - 1 ? "%u " : "%u]\n", q[i]); + + buf[PAGE_SIZE - 1] = '\0'; + return ret; + } + + return 0; +} +SHOW_LOCKED(bch_cache) + +STORE(__bch_cache) +{ + struct cache *ca = container_of(kobj, struct cache, kobj); + + if (attr == &sysfs_discard) { + bool v = strtoul_or_return(buf); + + if (blk_queue_discard(bdev_get_queue(ca->bdev))) + ca->discard = v; + + if (v != CACHE_DISCARD(&ca->sb)) { + SET_CACHE_DISCARD(&ca->sb, v); + bcache_write_super(ca->set); + } + } + + if (attr == &sysfs_cache_replacement_policy) { + ssize_t v = read_string_list(buf, cache_replacement_policies); + + if (v < 0) + return v; + + if ((unsigned) v != CACHE_REPLACEMENT(&ca->sb)) { + mutex_lock(&ca->set->bucket_lock); + SET_CACHE_REPLACEMENT(&ca->sb, v); + mutex_unlock(&ca->set->bucket_lock); + + bcache_write_super(ca->set); + } + } + + if (attr == &sysfs_freelist_percent) { + DECLARE_FIFO(long, free); + long i; + size_t p = strtoul_or_return(buf); + + p = clamp_t(size_t, + ((size_t) ca->sb.nbuckets * p) / 100, + roundup_pow_of_two(ca->sb.nbuckets) >> 9, + ca->sb.nbuckets / 2); + + if (!init_fifo_exact(&free, p, GFP_KERNEL)) + return -ENOMEM; + + mutex_lock(&ca->set->bucket_lock); + + fifo_move(&free, &ca->free); + fifo_swap(&free, &ca->free); + + mutex_unlock(&ca->set->bucket_lock); + + while (fifo_pop(&free, i)) + atomic_dec(&ca->buckets[i].pin); + + free_fifo(&free); + } + + if (attr == &sysfs_clear_stats) { + atomic_long_set(&ca->sectors_written, 0); + atomic_long_set(&ca->btree_sectors_written, 0); + atomic_long_set(&ca->meta_sectors_written, 0); + atomic_set(&ca->io_count, 0); + atomic_set(&ca->io_errors, 0); + } + + return size; +} +STORE_LOCKED(bch_cache) + +static struct attribute *bch_cache_files[] = { + &sysfs_bucket_size, + &sysfs_block_size, + &sysfs_nbuckets, + &sysfs_priority_stats, + &sysfs_discard, + &sysfs_written, + &sysfs_btree_written, + &sysfs_metadata_written, + &sysfs_io_errors, + &sysfs_clear_stats, + &sysfs_freelist_percent, + &sysfs_cache_replacement_policy, + NULL +}; +KTYPE(bch_cache); diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h new file mode 100644 index 000000000000..34e4ba1184fe --- /dev/null +++ b/drivers/md/bcache/sysfs.h @@ -0,0 +1,110 @@ +#ifndef _BCACHE_SYSFS_H_ +#define _BCACHE_SYSFS_H_ + +#define KTYPE(type) \ +struct kobj_type type ## _ktype = { \ + .release = type ## _release, \ + .sysfs_ops = &((const struct sysfs_ops) { \ + .show = type ## _show, \ + .store = type ## _store \ + }), \ + .default_attrs = type ## _files \ +} + +#define SHOW(fn) \ +static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ + char *buf) \ + +#define STORE(fn) \ +static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ + const char *buf, size_t size) \ + +#define SHOW_LOCKED(fn) \ +SHOW(fn) \ +{ \ + ssize_t ret; \ + mutex_lock(&bch_register_lock); \ + ret = __ ## fn ## _show(kobj, attr, buf); \ + mutex_unlock(&bch_register_lock); \ + return ret; \ +} + +#define STORE_LOCKED(fn) \ +STORE(fn) \ +{ \ + ssize_t ret; \ + mutex_lock(&bch_register_lock); \ + ret = __ ## fn ## _store(kobj, attr, buf, size); \ + mutex_unlock(&bch_register_lock); \ + return ret; \ +} + +#define __sysfs_attribute(_name, _mode) \ + static struct attribute sysfs_##_name = \ + { .name = #_name, .mode = _mode } + +#define write_attribute(n) __sysfs_attribute(n, S_IWUSR) +#define read_attribute(n) __sysfs_attribute(n, S_IRUGO) +#define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR) + +#define sysfs_printf(file, fmt, ...) \ +do { \ + if (attr == &sysfs_ ## file) \ + return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \ +} while (0) + +#define sysfs_print(file, var) \ +do { \ + if (attr == &sysfs_ ## file) \ + return snprint(buf, PAGE_SIZE, var); \ +} while (0) + +#define sysfs_hprint(file, val) \ +do { \ + if (attr == &sysfs_ ## file) { \ + ssize_t ret = hprint(buf, val); \ + strcat(buf, "\n"); \ + return ret + 1; \ + } \ +} while (0) + +#define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var)) +#define var_print(_var) sysfs_print(_var, var(_var)) +#define var_hprint(_var) sysfs_hprint(_var, var(_var)) + +#define sysfs_strtoul(file, var) \ +do { \ + if (attr == &sysfs_ ## file) \ + return strtoul_safe(buf, var) ?: (ssize_t) size; \ +} while (0) + +#define sysfs_strtoul_clamp(file, var, min, max) \ +do { \ + if (attr == &sysfs_ ## file) \ + return strtoul_safe_clamp(buf, var, min, max) \ + ?: (ssize_t) size; \ +} while (0) + +#define strtoul_or_return(cp) \ +({ \ + unsigned long _v; \ + int _r = kstrtoul(cp, 10, &_v); \ + if (_r) \ + return _r; \ + _v; \ +}) + +#define strtoi_h_or_return(cp, v) \ +do { \ + int _r = strtoi_h(cp, &v); \ + if (_r) \ + return _r; \ +} while (0) + +#define sysfs_hatoi(file, var) \ +do { \ + if (attr == &sysfs_ ## file) \ + return strtoi_h(buf, &var) ?: (ssize_t) size; \ +} while (0) + +#endif /* _BCACHE_SYSFS_H_ */ diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c new file mode 100644 index 000000000000..983f9bb411bc --- /dev/null +++ b/drivers/md/bcache/trace.c @@ -0,0 +1,26 @@ +#include "bcache.h" +#include "btree.h" +#include "request.h" + +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c new file mode 100644 index 000000000000..dcec2e4f84ad --- /dev/null +++ b/drivers/md/bcache/util.c @@ -0,0 +1,389 @@ +/* + * random utiility code, for bcache but in theory not specific to bcache + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +#define simple_strtoint(c, end, base) simple_strtol(c, end, base) +#define simple_strtouint(c, end, base) simple_strtoul(c, end, base) + +#define STRTO_H(name, type) \ +int name ## _h(const char *cp, type *res) \ +{ \ + int u = 0; \ + char *e; \ + type i = simple_ ## name(cp, &e, 10); \ + \ + switch (tolower(*e)) { \ + default: \ + return -EINVAL; \ + case 'y': \ + case 'z': \ + u++; \ + case 'e': \ + u++; \ + case 'p': \ + u++; \ + case 't': \ + u++; \ + case 'g': \ + u++; \ + case 'm': \ + u++; \ + case 'k': \ + u++; \ + if (e++ == cp) \ + return -EINVAL; \ + case '\n': \ + case '\0': \ + if (*e == '\n') \ + e++; \ + } \ + \ + if (*e) \ + return -EINVAL; \ + \ + while (u--) { \ + if ((type) ~0 > 0 && \ + (type) ~0 / 1024 <= i) \ + return -EINVAL; \ + if ((i > 0 && ANYSINT_MAX(type) / 1024 < i) || \ + (i < 0 && -ANYSINT_MAX(type) / 1024 > i)) \ + return -EINVAL; \ + i *= 1024; \ + } \ + \ + *res = i; \ + return 0; \ +} \ +EXPORT_SYMBOL_GPL(name ## _h); + +STRTO_H(strtoint, int) +STRTO_H(strtouint, unsigned int) +STRTO_H(strtoll, long long) +STRTO_H(strtoull, unsigned long long) + +ssize_t hprint(char *buf, int64_t v) +{ + static const char units[] = "?kMGTPEZY"; + char dec[3] = ""; + int u, t = 0; + + for (u = 0; v >= 1024 || v <= -1024; u++) { + t = v & ~(~0 << 10); + v >>= 10; + } + + if (!u) + return sprintf(buf, "%llu", v); + + if (v < 100 && v > -100) + sprintf(dec, ".%i", t / 100); + + return sprintf(buf, "%lli%s%c", v, dec, units[u]); +} +EXPORT_SYMBOL_GPL(hprint); + +ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], + size_t selected) +{ + char *out = buf; + size_t i; + + for (i = 0; list[i]; i++) + out += snprintf(out, buf + size - out, + i == selected ? "[%s] " : "%s ", list[i]); + + out[-1] = '\n'; + return out - buf; +} +EXPORT_SYMBOL_GPL(snprint_string_list); + +ssize_t read_string_list(const char *buf, const char * const list[]) +{ + size_t i; + char *s, *d = kstrndup(buf, PAGE_SIZE - 1, GFP_KERNEL); + if (!d) + return -ENOMEM; + + s = strim(d); + + for (i = 0; list[i]; i++) + if (!strcmp(list[i], s)) + break; + + kfree(d); + + if (!list[i]) + return -EINVAL; + + return i; +} +EXPORT_SYMBOL_GPL(read_string_list); + +bool is_zero(const char *p, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + if (p[i]) + return false; + return true; +} +EXPORT_SYMBOL_GPL(is_zero); + +int parse_uuid(const char *s, char *uuid) +{ + size_t i, j, x; + memset(uuid, 0, 16); + + for (i = 0, j = 0; + i < strspn(s, "-0123456789:ABCDEFabcdef") && j < 32; + i++) { + x = s[i] | 32; + + switch (x) { + case '0'...'9': + x -= '0'; + break; + case 'a'...'f': + x -= 'a' - 10; + break; + default: + continue; + } + + if (!(j & 1)) + x <<= 4; + uuid[j++ >> 1] |= x; + } + return i; +} +EXPORT_SYMBOL_GPL(parse_uuid); + +void time_stats_update(struct time_stats *stats, uint64_t start_time) +{ + uint64_t now = local_clock(); + uint64_t duration = time_after64(now, start_time) + ? now - start_time : 0; + uint64_t last = time_after64(now, stats->last) + ? now - stats->last : 0; + + stats->max_duration = max(stats->max_duration, duration); + + if (stats->last) { + ewma_add(stats->average_duration, duration, 8, 8); + + if (stats->average_frequency) + ewma_add(stats->average_frequency, last, 8, 8); + else + stats->average_frequency = last << 8; + } else { + stats->average_duration = duration << 8; + } + + stats->last = now ?: 1; +} +EXPORT_SYMBOL_GPL(time_stats_update); + +unsigned next_delay(struct ratelimit *d, uint64_t done) +{ + uint64_t now = local_clock(); + + d->next += div_u64(done, d->rate); + + return time_after64(d->next, now) + ? div_u64(d->next - now, NSEC_PER_SEC / HZ) + : 0; +} +EXPORT_SYMBOL_GPL(next_delay); + +void bio_map(struct bio *bio, void *base) +{ + size_t size = bio->bi_size; + struct bio_vec *bv = bio->bi_io_vec; + + BUG_ON(!bio->bi_size); + BUG_ON(bio->bi_vcnt); + + bv->bv_offset = base ? ((unsigned long) base) % PAGE_SIZE : 0; + goto start; + + for (; size; bio->bi_vcnt++, bv++) { + bv->bv_offset = 0; +start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, + size); + if (base) { + bv->bv_page = is_vmalloc_addr(base) + ? vmalloc_to_page(base) + : virt_to_page(base); + + base += bv->bv_len; + } + + size -= bv->bv_len; + } +} +EXPORT_SYMBOL_GPL(bio_map); + +int bio_alloc_pages(struct bio *bio, gfp_t gfp) +{ + int i; + struct bio_vec *bv; + + bio_for_each_segment(bv, bio, i) { + bv->bv_page = alloc_page(gfp); + if (!bv->bv_page) { + while (bv-- != bio->bi_io_vec + bio->bi_idx) + __free_page(bv->bv_page); + return -ENOMEM; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(bio_alloc_pages); + +/* + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any + * use permitted, subject to terms of PostgreSQL license; see.) + + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the + * usual sort of implementation. (See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) + * If we have no working 64-bit type, then fake it with two 32-bit registers. + * + * The present implementation is a normal (not "reflected", in Williams' + * terms) 64-bit CRC, using initial all-ones register contents and a final + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): + * + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x + 1 +*/ + +static const uint64_t crc_table[256] = { + 0x0000000000000000, 0x42F0E1EBA9EA3693, 0x85E1C3D753D46D26, + 0xC711223CFA3E5BB5, 0x493366450E42ECDF, 0x0BC387AEA7A8DA4C, + 0xCCD2A5925D9681F9, 0x8E224479F47CB76A, 0x9266CC8A1C85D9BE, + 0xD0962D61B56FEF2D, 0x17870F5D4F51B498, 0x5577EEB6E6BB820B, + 0xDB55AACF12C73561, 0x99A54B24BB2D03F2, 0x5EB4691841135847, + 0x1C4488F3E8F96ED4, 0x663D78FF90E185EF, 0x24CD9914390BB37C, + 0xE3DCBB28C335E8C9, 0xA12C5AC36ADFDE5A, 0x2F0E1EBA9EA36930, + 0x6DFEFF5137495FA3, 0xAAEFDD6DCD770416, 0xE81F3C86649D3285, + 0xF45BB4758C645C51, 0xB6AB559E258E6AC2, 0x71BA77A2DFB03177, + 0x334A9649765A07E4, 0xBD68D2308226B08E, 0xFF9833DB2BCC861D, + 0x388911E7D1F2DDA8, 0x7A79F00C7818EB3B, 0xCC7AF1FF21C30BDE, + 0x8E8A101488293D4D, 0x499B3228721766F8, 0x0B6BD3C3DBFD506B, + 0x854997BA2F81E701, 0xC7B97651866BD192, 0x00A8546D7C558A27, + 0x4258B586D5BFBCB4, 0x5E1C3D753D46D260, 0x1CECDC9E94ACE4F3, + 0xDBFDFEA26E92BF46, 0x990D1F49C77889D5, 0x172F5B3033043EBF, + 0x55DFBADB9AEE082C, 0x92CE98E760D05399, 0xD03E790CC93A650A, + 0xAA478900B1228E31, 0xE8B768EB18C8B8A2, 0x2FA64AD7E2F6E317, + 0x6D56AB3C4B1CD584, 0xE374EF45BF6062EE, 0xA1840EAE168A547D, + 0x66952C92ECB40FC8, 0x2465CD79455E395B, 0x3821458AADA7578F, + 0x7AD1A461044D611C, 0xBDC0865DFE733AA9, 0xFF3067B657990C3A, + 0x711223CFA3E5BB50, 0x33E2C2240A0F8DC3, 0xF4F3E018F031D676, + 0xB60301F359DBE0E5, 0xDA050215EA6C212F, 0x98F5E3FE438617BC, + 0x5FE4C1C2B9B84C09, 0x1D14202910527A9A, 0x93366450E42ECDF0, + 0xD1C685BB4DC4FB63, 0x16D7A787B7FAA0D6, 0x5427466C1E109645, + 0x4863CE9FF6E9F891, 0x0A932F745F03CE02, 0xCD820D48A53D95B7, + 0x8F72ECA30CD7A324, 0x0150A8DAF8AB144E, 0x43A04931514122DD, + 0x84B16B0DAB7F7968, 0xC6418AE602954FFB, 0xBC387AEA7A8DA4C0, + 0xFEC89B01D3679253, 0x39D9B93D2959C9E6, 0x7B2958D680B3FF75, + 0xF50B1CAF74CF481F, 0xB7FBFD44DD257E8C, 0x70EADF78271B2539, + 0x321A3E938EF113AA, 0x2E5EB66066087D7E, 0x6CAE578BCFE24BED, + 0xABBF75B735DC1058, 0xE94F945C9C3626CB, 0x676DD025684A91A1, + 0x259D31CEC1A0A732, 0xE28C13F23B9EFC87, 0xA07CF2199274CA14, + 0x167FF3EACBAF2AF1, 0x548F120162451C62, 0x939E303D987B47D7, + 0xD16ED1D631917144, 0x5F4C95AFC5EDC62E, 0x1DBC74446C07F0BD, + 0xDAAD56789639AB08, 0x985DB7933FD39D9B, 0x84193F60D72AF34F, + 0xC6E9DE8B7EC0C5DC, 0x01F8FCB784FE9E69, 0x43081D5C2D14A8FA, + 0xCD2A5925D9681F90, 0x8FDAB8CE70822903, 0x48CB9AF28ABC72B6, + 0x0A3B7B1923564425, 0x70428B155B4EAF1E, 0x32B26AFEF2A4998D, + 0xF5A348C2089AC238, 0xB753A929A170F4AB, 0x3971ED50550C43C1, + 0x7B810CBBFCE67552, 0xBC902E8706D82EE7, 0xFE60CF6CAF321874, + 0xE224479F47CB76A0, 0xA0D4A674EE214033, 0x67C58448141F1B86, + 0x253565A3BDF52D15, 0xAB1721DA49899A7F, 0xE9E7C031E063ACEC, + 0x2EF6E20D1A5DF759, 0x6C0603E6B3B7C1CA, 0xF6FAE5C07D3274CD, + 0xB40A042BD4D8425E, 0x731B26172EE619EB, 0x31EBC7FC870C2F78, + 0xBFC9838573709812, 0xFD39626EDA9AAE81, 0x3A28405220A4F534, + 0x78D8A1B9894EC3A7, 0x649C294A61B7AD73, 0x266CC8A1C85D9BE0, + 0xE17DEA9D3263C055, 0xA38D0B769B89F6C6, 0x2DAF4F0F6FF541AC, + 0x6F5FAEE4C61F773F, 0xA84E8CD83C212C8A, 0xEABE6D3395CB1A19, + 0x90C79D3FEDD3F122, 0xD2377CD44439C7B1, 0x15265EE8BE079C04, + 0x57D6BF0317EDAA97, 0xD9F4FB7AE3911DFD, 0x9B041A914A7B2B6E, + 0x5C1538ADB04570DB, 0x1EE5D94619AF4648, 0x02A151B5F156289C, + 0x4051B05E58BC1E0F, 0x87409262A28245BA, 0xC5B073890B687329, + 0x4B9237F0FF14C443, 0x0962D61B56FEF2D0, 0xCE73F427ACC0A965, + 0x8C8315CC052A9FF6, 0x3A80143F5CF17F13, 0x7870F5D4F51B4980, + 0xBF61D7E80F251235, 0xFD913603A6CF24A6, 0x73B3727A52B393CC, + 0x31439391FB59A55F, 0xF652B1AD0167FEEA, 0xB4A25046A88DC879, + 0xA8E6D8B54074A6AD, 0xEA16395EE99E903E, 0x2D071B6213A0CB8B, + 0x6FF7FA89BA4AFD18, 0xE1D5BEF04E364A72, 0xA3255F1BE7DC7CE1, + 0x64347D271DE22754, 0x26C49CCCB40811C7, 0x5CBD6CC0CC10FAFC, + 0x1E4D8D2B65FACC6F, 0xD95CAF179FC497DA, 0x9BAC4EFC362EA149, + 0x158E0A85C2521623, 0x577EEB6E6BB820B0, 0x906FC95291867B05, + 0xD29F28B9386C4D96, 0xCEDBA04AD0952342, 0x8C2B41A1797F15D1, + 0x4B3A639D83414E64, 0x09CA82762AAB78F7, 0x87E8C60FDED7CF9D, + 0xC51827E4773DF90E, 0x020905D88D03A2BB, 0x40F9E43324E99428, + 0x2CFFE7D5975E55E2, 0x6E0F063E3EB46371, 0xA91E2402C48A38C4, + 0xEBEEC5E96D600E57, 0x65CC8190991CB93D, 0x273C607B30F68FAE, + 0xE02D4247CAC8D41B, 0xA2DDA3AC6322E288, 0xBE992B5F8BDB8C5C, + 0xFC69CAB42231BACF, 0x3B78E888D80FE17A, 0x7988096371E5D7E9, + 0xF7AA4D1A85996083, 0xB55AACF12C735610, 0x724B8ECDD64D0DA5, + 0x30BB6F267FA73B36, 0x4AC29F2A07BFD00D, 0x08327EC1AE55E69E, + 0xCF235CFD546BBD2B, 0x8DD3BD16FD818BB8, 0x03F1F96F09FD3CD2, + 0x41011884A0170A41, 0x86103AB85A2951F4, 0xC4E0DB53F3C36767, + 0xD8A453A01B3A09B3, 0x9A54B24BB2D03F20, 0x5D45907748EE6495, + 0x1FB5719CE1045206, 0x919735E51578E56C, 0xD367D40EBC92D3FF, + 0x1476F63246AC884A, 0x568617D9EF46BED9, 0xE085162AB69D5E3C, + 0xA275F7C11F7768AF, 0x6564D5FDE549331A, 0x279434164CA30589, + 0xA9B6706FB8DFB2E3, 0xEB46918411358470, 0x2C57B3B8EB0BDFC5, + 0x6EA7525342E1E956, 0x72E3DAA0AA188782, 0x30133B4B03F2B111, + 0xF7021977F9CCEAA4, 0xB5F2F89C5026DC37, 0x3BD0BCE5A45A6B5D, + 0x79205D0E0DB05DCE, 0xBE317F32F78E067B, 0xFCC19ED95E6430E8, + 0x86B86ED5267CDBD3, 0xC4488F3E8F96ED40, 0x0359AD0275A8B6F5, + 0x41A94CE9DC428066, 0xCF8B0890283E370C, 0x8D7BE97B81D4019F, + 0x4A6ACB477BEA5A2A, 0x089A2AACD2006CB9, 0x14DEA25F3AF9026D, + 0x562E43B4931334FE, 0x913F6188692D6F4B, 0xD3CF8063C0C759D8, + 0x5DEDC41A34BBEEB2, 0x1F1D25F19D51D821, 0xD80C07CD676F8394, + 0x9AFCE626CE85B507 +}; + +uint64_t crc64_update(uint64_t crc, const void *_data, size_t len) +{ + const unsigned char *data = _data; + + while (len--) { + int i = ((int) (crc >> 56) ^ *data++) & 0xFF; + crc = crc_table[i] ^ (crc << 8); + } + + return crc; +} +EXPORT_SYMBOL(crc64_update); + +uint64_t crc64(const void *data, size_t len) +{ + uint64_t crc = 0xffffffffffffffff; + + crc = crc64_update(crc, data, len); + + return crc ^ 0xffffffffffffffff; +} +EXPORT_SYMBOL(crc64); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h new file mode 100644 index 000000000000..56705fdcc149 --- /dev/null +++ b/drivers/md/bcache/util.h @@ -0,0 +1,589 @@ + +#ifndef _BCACHE_UTIL_H +#define _BCACHE_UTIL_H + +#include +#include +#include +#include +#include +#include + +#include "closure.h" + +#define PAGE_SECTORS (PAGE_SIZE / 512) + +struct closure; + +#include + +#ifdef CONFIG_BCACHE_EDEBUG + +#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) +#define atomic_inc_bug(v, i) BUG_ON(atomic_inc_return(v) <= i) + +#else /* EDEBUG */ + +#define atomic_dec_bug(v) atomic_dec(v) +#define atomic_inc_bug(v, i) atomic_inc(v) + +#endif + +#define BITMASK(name, type, field, offset, size) \ +static inline uint64_t name(const type *k) \ +{ return (k->field >> offset) & ~(((uint64_t) ~0) << size); } \ + \ +static inline void SET_##name(type *k, uint64_t v) \ +{ \ + k->field &= ~(~((uint64_t) ~0 << size) << offset); \ + k->field |= v << offset; \ +} + +#define DECLARE_HEAP(type, name) \ + struct { \ + size_t size, used; \ + type *data; \ + } name + +#define init_heap(heap, _size, gfp) \ +({ \ + size_t _bytes; \ + (heap)->used = 0; \ + (heap)->size = (_size); \ + _bytes = (heap)->size * sizeof(*(heap)->data); \ + (heap)->data = NULL; \ + if (_bytes < KMALLOC_MAX_SIZE) \ + (heap)->data = kmalloc(_bytes, (gfp)); \ + if ((!(heap)->data) && ((gfp) & GFP_KERNEL)) \ + (heap)->data = vmalloc(_bytes); \ + (heap)->data; \ +}) + +#define free_heap(heap) \ +do { \ + if (is_vmalloc_addr((heap)->data)) \ + vfree((heap)->data); \ + else \ + kfree((heap)->data); \ + (heap)->data = NULL; \ +} while (0) + +#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) + +#define heap_sift(h, i, cmp) \ +do { \ + size_t _r, _j = i; \ + \ + for (; _j * 2 + 1 < (h)->used; _j = _r) { \ + _r = _j * 2 + 1; \ + if (_r + 1 < (h)->used && \ + cmp((h)->data[_r], (h)->data[_r + 1])) \ + _r++; \ + \ + if (cmp((h)->data[_r], (h)->data[_j])) \ + break; \ + heap_swap(h, _r, _j); \ + } \ +} while (0) + +#define heap_sift_down(h, i, cmp) \ +do { \ + while (i) { \ + size_t p = (i - 1) / 2; \ + if (cmp((h)->data[i], (h)->data[p])) \ + break; \ + heap_swap(h, i, p); \ + i = p; \ + } \ +} while (0) + +#define heap_add(h, d, cmp) \ +({ \ + bool _r = !heap_full(h); \ + if (_r) { \ + size_t _i = (h)->used++; \ + (h)->data[_i] = d; \ + \ + heap_sift_down(h, _i, cmp); \ + heap_sift(h, _i, cmp); \ + } \ + _r; \ +}) + +#define heap_pop(h, d, cmp) \ +({ \ + bool _r = (h)->used; \ + if (_r) { \ + (d) = (h)->data[0]; \ + (h)->used--; \ + heap_swap(h, 0, (h)->used); \ + heap_sift(h, 0, cmp); \ + } \ + _r; \ +}) + +#define heap_peek(h) ((h)->size ? (h)->data[0] : NULL) + +#define heap_full(h) ((h)->used == (h)->size) + +#define DECLARE_FIFO(type, name) \ + struct { \ + size_t front, back, size, mask; \ + type *data; \ + } name + +#define fifo_for_each(c, fifo, iter) \ + for (iter = (fifo)->front; \ + c = (fifo)->data[iter], iter != (fifo)->back; \ + iter = (iter + 1) & (fifo)->mask) + +#define __init_fifo(fifo, gfp) \ +({ \ + size_t _allocated_size, _bytes; \ + BUG_ON(!(fifo)->size); \ + \ + _allocated_size = roundup_pow_of_two((fifo)->size + 1); \ + _bytes = _allocated_size * sizeof(*(fifo)->data); \ + \ + (fifo)->mask = _allocated_size - 1; \ + (fifo)->front = (fifo)->back = 0; \ + (fifo)->data = NULL; \ + \ + if (_bytes < KMALLOC_MAX_SIZE) \ + (fifo)->data = kmalloc(_bytes, (gfp)); \ + if ((!(fifo)->data) && ((gfp) & GFP_KERNEL)) \ + (fifo)->data = vmalloc(_bytes); \ + (fifo)->data; \ +}) + +#define init_fifo_exact(fifo, _size, gfp) \ +({ \ + (fifo)->size = (_size); \ + __init_fifo(fifo, gfp); \ +}) + +#define init_fifo(fifo, _size, gfp) \ +({ \ + (fifo)->size = (_size); \ + if ((fifo)->size > 4) \ + (fifo)->size = roundup_pow_of_two((fifo)->size) - 1; \ + __init_fifo(fifo, gfp); \ +}) + +#define free_fifo(fifo) \ +do { \ + if (is_vmalloc_addr((fifo)->data)) \ + vfree((fifo)->data); \ + else \ + kfree((fifo)->data); \ + (fifo)->data = NULL; \ +} while (0) + +#define fifo_used(fifo) (((fifo)->back - (fifo)->front) & (fifo)->mask) +#define fifo_free(fifo) ((fifo)->size - fifo_used(fifo)) + +#define fifo_empty(fifo) (!fifo_used(fifo)) +#define fifo_full(fifo) (!fifo_free(fifo)) + +#define fifo_front(fifo) ((fifo)->data[(fifo)->front]) +#define fifo_back(fifo) \ + ((fifo)->data[((fifo)->back - 1) & (fifo)->mask]) + +#define fifo_idx(fifo, p) (((p) - &fifo_front(fifo)) & (fifo)->mask) + +#define fifo_push_back(fifo, i) \ +({ \ + bool _r = !fifo_full((fifo)); \ + if (_r) { \ + (fifo)->data[(fifo)->back++] = (i); \ + (fifo)->back &= (fifo)->mask; \ + } \ + _r; \ +}) + +#define fifo_pop_front(fifo, i) \ +({ \ + bool _r = !fifo_empty((fifo)); \ + if (_r) { \ + (i) = (fifo)->data[(fifo)->front++]; \ + (fifo)->front &= (fifo)->mask; \ + } \ + _r; \ +}) + +#define fifo_push_front(fifo, i) \ +({ \ + bool _r = !fifo_full((fifo)); \ + if (_r) { \ + --(fifo)->front; \ + (fifo)->front &= (fifo)->mask; \ + (fifo)->data[(fifo)->front] = (i); \ + } \ + _r; \ +}) + +#define fifo_pop_back(fifo, i) \ +({ \ + bool _r = !fifo_empty((fifo)); \ + if (_r) { \ + --(fifo)->back; \ + (fifo)->back &= (fifo)->mask; \ + (i) = (fifo)->data[(fifo)->back] \ + } \ + _r; \ +}) + +#define fifo_push(fifo, i) fifo_push_back(fifo, (i)) +#define fifo_pop(fifo, i) fifo_pop_front(fifo, (i)) + +#define fifo_swap(l, r) \ +do { \ + swap((l)->front, (r)->front); \ + swap((l)->back, (r)->back); \ + swap((l)->size, (r)->size); \ + swap((l)->mask, (r)->mask); \ + swap((l)->data, (r)->data); \ +} while (0) + +#define fifo_move(dest, src) \ +do { \ + typeof(*((dest)->data)) _t; \ + while (!fifo_full(dest) && \ + fifo_pop(src, _t)) \ + fifo_push(dest, _t); \ +} while (0) + +/* + * Simple array based allocator - preallocates a number of elements and you can + * never allocate more than that, also has no locking. + * + * Handy because if you know you only need a fixed number of elements you don't + * have to worry about memory allocation failure, and sometimes a mempool isn't + * what you want. + * + * We treat the free elements as entries in a singly linked list, and the + * freelist as a stack - allocating and freeing push and pop off the freelist. + */ + +#define DECLARE_ARRAY_ALLOCATOR(type, name, size) \ + struct { \ + type *freelist; \ + type data[size]; \ + } name + +#define array_alloc(array) \ +({ \ + typeof((array)->freelist) _ret = (array)->freelist; \ + \ + if (_ret) \ + (array)->freelist = *((typeof((array)->freelist) *) _ret);\ + \ + _ret; \ +}) + +#define array_free(array, ptr) \ +do { \ + typeof((array)->freelist) _ptr = ptr; \ + \ + *((typeof((array)->freelist) *) _ptr) = (array)->freelist; \ + (array)->freelist = _ptr; \ +} while (0) + +#define array_allocator_init(array) \ +do { \ + typeof((array)->freelist) _i; \ + \ + BUILD_BUG_ON(sizeof((array)->data[0]) < sizeof(void *)); \ + (array)->freelist = NULL; \ + \ + for (_i = (array)->data; \ + _i < (array)->data + ARRAY_SIZE((array)->data); \ + _i++) \ + array_free(array, _i); \ +} while (0) + +#define array_freelist_empty(array) ((array)->freelist == NULL) + +#define ANYSINT_MAX(t) \ + ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) + +int strtoint_h(const char *, int *); +int strtouint_h(const char *, unsigned int *); +int strtoll_h(const char *, long long *); +int strtoull_h(const char *, unsigned long long *); + +static inline int strtol_h(const char *cp, long *res) +{ +#if BITS_PER_LONG == 32 + return strtoint_h(cp, (int *) res); +#else + return strtoll_h(cp, (long long *) res); +#endif +} + +static inline int strtoul_h(const char *cp, long *res) +{ +#if BITS_PER_LONG == 32 + return strtouint_h(cp, (unsigned int *) res); +#else + return strtoull_h(cp, (unsigned long long *) res); +#endif +} + +#define strtoi_h(cp, res) \ + (__builtin_types_compatible_p(typeof(*res), int) \ + ? strtoint_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), long) \ + ? strtol_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), long long) \ + ? strtoll_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), unsigned int) \ + ? strtouint_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), unsigned long) \ + ? strtoul_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), unsigned long long)\ + ? strtoull_h(cp, (void *) res) : -EINVAL) + +#define strtoul_safe(cp, var) \ +({ \ + unsigned long _v; \ + int _r = kstrtoul(cp, 10, &_v); \ + if (!_r) \ + var = _v; \ + _r; \ +}) + +#define strtoul_safe_clamp(cp, var, min, max) \ +({ \ + unsigned long _v; \ + int _r = kstrtoul(cp, 10, &_v); \ + if (!_r) \ + var = clamp_t(typeof(var), _v, min, max); \ + _r; \ +}) + +#define snprint(buf, size, var) \ + snprintf(buf, size, \ + __builtin_types_compatible_p(typeof(var), int) \ + ? "%i\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned) \ + ? "%u\n" : \ + __builtin_types_compatible_p(typeof(var), long) \ + ? "%li\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned long)\ + ? "%lu\n" : \ + __builtin_types_compatible_p(typeof(var), int64_t) \ + ? "%lli\n" : \ + __builtin_types_compatible_p(typeof(var), uint64_t) \ + ? "%llu\n" : \ + __builtin_types_compatible_p(typeof(var), const char *) \ + ? "%s\n" : "%i\n", var) + +ssize_t hprint(char *buf, int64_t v); + +bool is_zero(const char *p, size_t n); +int parse_uuid(const char *s, char *uuid); + +ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], + size_t selected); + +ssize_t read_string_list(const char *buf, const char * const list[]); + +struct time_stats { + /* + * all fields are in nanoseconds, averages are ewmas stored left shifted + * by 8 + */ + uint64_t max_duration; + uint64_t average_duration; + uint64_t average_frequency; + uint64_t last; +}; + +void time_stats_update(struct time_stats *stats, uint64_t time); + +#define NSEC_PER_ns 1L +#define NSEC_PER_us NSEC_PER_USEC +#define NSEC_PER_ms NSEC_PER_MSEC +#define NSEC_PER_sec NSEC_PER_SEC + +#define __print_time_stat(stats, name, stat, units) \ + sysfs_print(name ## _ ## stat ## _ ## units, \ + div_u64((stats)->stat >> 8, NSEC_PER_ ## units)) + +#define sysfs_print_time_stats(stats, name, \ + frequency_units, \ + duration_units) \ +do { \ + __print_time_stat(stats, name, \ + average_frequency, frequency_units); \ + __print_time_stat(stats, name, \ + average_duration, duration_units); \ + __print_time_stat(stats, name, \ + max_duration, duration_units); \ + \ + sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ + ? div_s64(local_clock() - (stats)->last, \ + NSEC_PER_ ## frequency_units) \ + : -1LL); \ +} while (0) + +#define sysfs_time_stats_attribute(name, \ + frequency_units, \ + duration_units) \ +read_attribute(name ## _average_frequency_ ## frequency_units); \ +read_attribute(name ## _average_duration_ ## duration_units); \ +read_attribute(name ## _max_duration_ ## duration_units); \ +read_attribute(name ## _last_ ## frequency_units) + +#define sysfs_time_stats_attribute_list(name, \ + frequency_units, \ + duration_units) \ +&sysfs_ ## name ## _average_frequency_ ## frequency_units, \ +&sysfs_ ## name ## _average_duration_ ## duration_units, \ +&sysfs_ ## name ## _max_duration_ ## duration_units, \ +&sysfs_ ## name ## _last_ ## frequency_units, + +#define ewma_add(ewma, val, weight, factor) \ +({ \ + (ewma) *= (weight) - 1; \ + (ewma) += (val) << factor; \ + (ewma) /= (weight); \ + (ewma) >> factor; \ +}) + +struct ratelimit { + uint64_t next; + unsigned rate; +}; + +static inline void ratelimit_reset(struct ratelimit *d) +{ + d->next = local_clock(); +} + +unsigned next_delay(struct ratelimit *d, uint64_t done); + +#define __DIV_SAFE(n, d, zero) \ +({ \ + typeof(n) _n = (n); \ + typeof(d) _d = (d); \ + _d ? _n / _d : zero; \ +}) + +#define DIV_SAFE(n, d) __DIV_SAFE(n, d, 0) + +#define container_of_or_null(ptr, type, member) \ +({ \ + typeof(ptr) _ptr = ptr; \ + _ptr ? container_of(_ptr, type, member) : NULL; \ +}) + +#define RB_INSERT(root, new, member, cmp) \ +({ \ + __label__ dup; \ + struct rb_node **n = &(root)->rb_node, *parent = NULL; \ + typeof(new) this; \ + int res, ret = -1; \ + \ + while (*n) { \ + parent = *n; \ + this = container_of(*n, typeof(*(new)), member); \ + res = cmp(new, this); \ + if (!res) \ + goto dup; \ + n = res < 0 \ + ? &(*n)->rb_left \ + : &(*n)->rb_right; \ + } \ + \ + rb_link_node(&(new)->member, parent, n); \ + rb_insert_color(&(new)->member, root); \ + ret = 0; \ +dup: \ + ret; \ +}) + +#define RB_SEARCH(root, search, member, cmp) \ +({ \ + struct rb_node *n = (root)->rb_node; \ + typeof(&(search)) this, ret = NULL; \ + int res; \ + \ + while (n) { \ + this = container_of(n, typeof(search), member); \ + res = cmp(&(search), this); \ + if (!res) { \ + ret = this; \ + break; \ + } \ + n = res < 0 \ + ? n->rb_left \ + : n->rb_right; \ + } \ + ret; \ +}) + +#define RB_GREATER(root, search, member, cmp) \ +({ \ + struct rb_node *n = (root)->rb_node; \ + typeof(&(search)) this, ret = NULL; \ + int res; \ + \ + while (n) { \ + this = container_of(n, typeof(search), member); \ + res = cmp(&(search), this); \ + if (res < 0) { \ + ret = this; \ + n = n->rb_left; \ + } else \ + n = n->rb_right; \ + } \ + ret; \ +}) + +#define RB_FIRST(root, type, member) \ + container_of_or_null(rb_first(root), type, member) + +#define RB_LAST(root, type, member) \ + container_of_or_null(rb_last(root), type, member) + +#define RB_NEXT(ptr, member) \ + container_of_or_null(rb_next(&(ptr)->member), typeof(*ptr), member) + +#define RB_PREV(ptr, member) \ + container_of_or_null(rb_prev(&(ptr)->member), typeof(*ptr), member) + +/* Does linear interpolation between powers of two */ +static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) +{ + unsigned fract = x & ~(~0 << fract_bits); + + x >>= fract_bits; + x = 1 << x; + x += (x * fract) >> fract_bits; + + return x; +} + +#define bio_end(bio) ((bio)->bi_sector + bio_sectors(bio)) + +void bio_map(struct bio *bio, void *base); + +int bio_alloc_pages(struct bio *bio, gfp_t gfp); + +static inline sector_t bdev_sectors(struct block_device *bdev) +{ + return bdev->bd_inode->i_size >> 9; +} + +#define closure_bio_submit(bio, cl, dev) \ +do { \ + closure_get(cl); \ + bch_generic_make_request(bio, &(dev)->bio_split_hook); \ +} while (0) + +uint64_t crc64_update(uint64_t, const void *, size_t); +uint64_t crc64(const void *, size_t); + +#endif /* _BCACHE_UTIL_H */ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c new file mode 100644 index 000000000000..a80ee5373fd8 --- /dev/null +++ b/drivers/md/bcache/writeback.c @@ -0,0 +1,414 @@ +/* + * background writeback - scan btree for dirty data and write it to the backing + * device + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" + +static struct workqueue_struct *dirty_wq; + +static void read_dirty(struct closure *); + +struct dirty_io { + struct closure cl; + struct cached_dev *dc; + struct bio bio; +}; + +/* Rate limiting */ + +static void __update_writeback_rate(struct cached_dev *dc) +{ + struct cache_set *c = dc->disk.c; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_dirty_target = + div_u64(cache_sectors * dc->writeback_percent, 100); + + int64_t target = div64_u64(cache_dirty_target * bdev_sectors(dc->bdev), + c->cached_dev_sectors); + + /* PD controller */ + + int change = 0; + int64_t error; + int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); + int64_t derivative = dirty - dc->disk.sectors_dirty_last; + + dc->disk.sectors_dirty_last = dirty; + + derivative *= dc->writeback_rate_d_term; + derivative = clamp(derivative, -dirty, dirty); + + derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, + dc->writeback_rate_d_smooth, 0); + + /* Avoid divide by zero */ + if (!target) + goto out; + + error = div64_s64((dirty + derivative - target) << 8, target); + + change = div_s64((dc->writeback_rate.rate * error) >> 8, + dc->writeback_rate_p_term_inverse); + + /* Don't increase writeback rate if the device isn't keeping up */ + if (change > 0 && + time_after64(local_clock(), + dc->writeback_rate.next + 10 * NSEC_PER_MSEC)) + change = 0; + + dc->writeback_rate.rate = + clamp_t(int64_t, dc->writeback_rate.rate + change, + 1, NSEC_PER_MSEC); +out: + dc->writeback_rate_derivative = derivative; + dc->writeback_rate_change = change; + dc->writeback_rate_target = target; + + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); +} + +static void update_writeback_rate(struct work_struct *work) +{ + struct cached_dev *dc = container_of(to_delayed_work(work), + struct cached_dev, + writeback_rate_update); + + down_read(&dc->writeback_lock); + + if (atomic_read(&dc->has_dirty) && + dc->writeback_percent) + __update_writeback_rate(dc); + + up_read(&dc->writeback_lock); +} + +static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) +{ + if (atomic_read(&dc->disk.detaching) || + !dc->writeback_percent) + return 0; + + return next_delay(&dc->writeback_rate, sectors * 10000000ULL); +} + +/* Background writeback */ + +static bool dirty_pred(struct keybuf *buf, struct bkey *k) +{ + return KEY_DIRTY(k); +} + +static void dirty_init(struct keybuf_key *w) +{ + struct dirty_io *io = w->private; + struct bio *bio = &io->bio; + + bio_init(bio); + if (!io->dc->writeback_percent) + bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + + bio->bi_size = KEY_SIZE(&w->key) << 9; + bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS); + bio->bi_private = w; + bio->bi_io_vec = bio->bi_inline_vecs; + bio_map(bio, NULL); +} + +static void refill_dirty(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, + writeback.cl); + struct keybuf *buf = &dc->writeback_keys; + bool searched_from_start = false; + struct bkey end = MAX_KEY; + SET_KEY_INODE(&end, dc->disk.id); + + if (!atomic_read(&dc->disk.detaching) && + !dc->writeback_running) + closure_return(cl); + + down_write(&dc->writeback_lock); + + if (!atomic_read(&dc->has_dirty)) { + SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); + bch_write_bdev_super(dc, NULL); + + up_write(&dc->writeback_lock); + closure_return(cl); + } + + if (bkey_cmp(&buf->last_scanned, &end) >= 0) { + buf->last_scanned = KEY(dc->disk.id, 0, 0); + searched_from_start = true; + } + + bch_refill_keybuf(dc->disk.c, buf, &end); + + if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { + /* Searched the entire btree - delay awhile */ + + if (RB_EMPTY_ROOT(&buf->keys)) { + atomic_set(&dc->has_dirty, 0); + cached_dev_put(dc); + } + + if (!atomic_read(&dc->disk.detaching)) + closure_delay(&dc->writeback, dc->writeback_delay * HZ); + } + + up_write(&dc->writeback_lock); + + ratelimit_reset(&dc->writeback_rate); + + /* Punt to workqueue only so we don't recurse and blow the stack */ + continue_at(cl, read_dirty, dirty_wq); +} + +void bch_writeback_queue(struct cached_dev *dc) +{ + if (closure_trylock(&dc->writeback.cl, &dc->disk.cl)) { + if (!atomic_read(&dc->disk.detaching)) + closure_delay(&dc->writeback, dc->writeback_delay * HZ); + + continue_at(&dc->writeback.cl, refill_dirty, dirty_wq); + } +} + +void bch_writeback_add(struct cached_dev *dc, unsigned sectors) +{ + atomic_long_add(sectors, &dc->disk.sectors_dirty); + + if (!atomic_read(&dc->has_dirty) && + !atomic_xchg(&dc->has_dirty, 1)) { + atomic_inc(&dc->count); + + if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) { + SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY); + /* XXX: should do this synchronously */ + bch_write_bdev_super(dc, NULL); + } + + bch_writeback_queue(dc); + + if (dc->writeback_percent) + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); + } +} + +/* Background writeback - IO loop */ + +static void dirty_io_destructor(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + kfree(io); +} + +static void write_dirty_finish(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + struct keybuf_key *w = io->bio.bi_private; + struct cached_dev *dc = io->dc; + struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt); + + while (bv-- != io->bio.bi_io_vec) + __free_page(bv->bv_page); + + /* This is kind of a dumb way of signalling errors. */ + if (KEY_DIRTY(&w->key)) { + unsigned i; + struct btree_op op; + bch_btree_op_init_stack(&op); + + op.type = BTREE_REPLACE; + bkey_copy(&op.replace, &w->key); + + SET_KEY_DIRTY(&w->key, false); + bch_keylist_add(&op.keys, &w->key); + + for (i = 0; i < KEY_PTRS(&w->key); i++) + atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); + + pr_debug("clearing %s", pkey(&w->key)); + bch_btree_insert(&op, dc->disk.c); + closure_sync(&op.cl); + + atomic_long_inc(op.insert_collision + ? &dc->disk.c->writeback_keys_failed + : &dc->disk.c->writeback_keys_done); + } + + bch_keybuf_del(&dc->writeback_keys, w); + atomic_dec_bug(&dc->in_flight); + + closure_wake_up(&dc->writeback_wait); + + closure_return_with_destructor(cl, dirty_io_destructor); +} + +static void dirty_endio(struct bio *bio, int error) +{ + struct keybuf_key *w = bio->bi_private; + struct dirty_io *io = w->private; + + if (error) + SET_KEY_DIRTY(&w->key, false); + + closure_put(&io->cl); +} + +static void write_dirty(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + struct keybuf_key *w = io->bio.bi_private; + + dirty_init(w); + io->bio.bi_rw = WRITE; + io->bio.bi_sector = KEY_START(&w->key); + io->bio.bi_bdev = io->dc->bdev; + io->bio.bi_end_io = dirty_endio; + + trace_bcache_write_dirty(&io->bio); + closure_bio_submit(&io->bio, cl, &io->dc->disk); + + continue_at(cl, write_dirty_finish, dirty_wq); +} + +static void read_dirty_endio(struct bio *bio, int error) +{ + struct keybuf_key *w = bio->bi_private; + struct dirty_io *io = w->private; + + bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), + error, "reading dirty data from cache"); + + dirty_endio(bio, error); +} + +static void read_dirty_submit(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + + trace_bcache_read_dirty(&io->bio); + closure_bio_submit(&io->bio, cl, &io->dc->disk); + + continue_at(cl, write_dirty, dirty_wq); +} + +static void read_dirty(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, + writeback.cl); + unsigned delay = writeback_delay(dc, 0); + struct keybuf_key *w; + struct dirty_io *io; + + /* + * XXX: if we error, background writeback just spins. Should use some + * mempools. + */ + + while (1) { + w = bch_keybuf_next(&dc->writeback_keys); + if (!w) + break; + + BUG_ON(ptr_stale(dc->disk.c, &w->key, 0)); + + if (delay > 0 && + (KEY_START(&w->key) != dc->last_read || + jiffies_to_msecs(delay) > 50)) { + w->private = NULL; + + closure_delay(&dc->writeback, delay); + continue_at(cl, read_dirty, dirty_wq); + } + + dc->last_read = KEY_OFFSET(&w->key); + + io = kzalloc(sizeof(struct dirty_io) + sizeof(struct bio_vec) + * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), + GFP_KERNEL); + if (!io) + goto err; + + w->private = io; + io->dc = dc; + + dirty_init(w); + io->bio.bi_sector = PTR_OFFSET(&w->key, 0); + io->bio.bi_bdev = PTR_CACHE(dc->disk.c, + &w->key, 0)->bdev; + io->bio.bi_rw = READ; + io->bio.bi_end_io = read_dirty_endio; + + if (bio_alloc_pages(&io->bio, GFP_KERNEL)) + goto err_free; + + pr_debug("%s", pkey(&w->key)); + + closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); + + delay = writeback_delay(dc, KEY_SIZE(&w->key)); + + atomic_inc(&dc->in_flight); + + if (!closure_wait_event(&dc->writeback_wait, cl, + atomic_read(&dc->in_flight) < 64)) + continue_at(cl, read_dirty, dirty_wq); + } + + if (0) { +err_free: + kfree(w->private); +err: + bch_keybuf_del(&dc->writeback_keys, w); + } + + refill_dirty(cl); +} + +void bch_writeback_init_cached_dev(struct cached_dev *dc) +{ + closure_init_unlocked(&dc->writeback); + init_rwsem(&dc->writeback_lock); + + bch_keybuf_init(&dc->writeback_keys, dirty_pred); + + dc->writeback_metadata = true; + dc->writeback_running = true; + dc->writeback_percent = 10; + dc->writeback_delay = 30; + dc->writeback_rate.rate = 1024; + + dc->writeback_rate_update_seconds = 30; + dc->writeback_rate_d_term = 16; + dc->writeback_rate_p_term_inverse = 64; + dc->writeback_rate_d_smooth = 8; + + INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); +} + +void bch_writeback_exit(void) +{ + if (dirty_wq) + destroy_workqueue(dirty_wq); +} + +int __init bch_writeback_init(void) +{ + dirty_wq = create_singlethread_workqueue("bcache_writeback"); + if (!dirty_wq) + return -ENOMEM; + + return 0; +} diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index f204a7a9cf38..6e7ec64b69ab 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -78,3 +78,9 @@ SUBSYS(hugetlb) #endif /* */ + +#ifdef CONFIG_CGROUP_BCACHE +SUBSYS(bcache) +#endif + +/* */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..a8482d063bc3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1576,6 +1576,10 @@ struct task_struct { #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif +#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) + unsigned int sequential_io; + unsigned int sequential_io_avg; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h new file mode 100644 index 000000000000..3cc5a0b278c3 --- /dev/null +++ b/include/trace/events/bcache.h @@ -0,0 +1,271 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bcache + +#if !defined(_TRACE_BCACHE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BCACHE_H + +#include + +struct search; + +DECLARE_EVENT_CLASS(bcache_request, + + TP_PROTO(struct search *s, struct bio *bio), + + TP_ARGS(s, bio), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(unsigned int, orig_major ) + __field(unsigned int, orig_minor ) + __field(sector_t, sector ) + __field(dev_t, orig_sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __array(char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->orig_major = s->d->disk->major; + __entry->orig_minor = s->d->disk->first_minor; + __entry->sector = bio->bi_sector; + __entry->orig_sector = bio->bi_sector - 16; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm, + __entry->orig_major, __entry->orig_minor, + (unsigned long long)__entry->orig_sector) +); + +DEFINE_EVENT(bcache_request, bcache_request_start, + + TP_PROTO(struct search *s, struct bio *bio), + + TP_ARGS(s, bio) +); + +DEFINE_EVENT(bcache_request, bcache_request_end, + + TP_PROTO(struct search *s, struct bio *bio), + + TP_ARGS(s, bio) +); + +DECLARE_EVENT_CLASS(bcache_bio, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __array(char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + + +DEFINE_EVENT(bcache_bio, bcache_passthrough, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_cache_hit, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_cache_miss, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_read_retry, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_writethrough, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_writeback, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_write_skip, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_btree_read, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_btree_write, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_write_dirty, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_read_dirty, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_write_moving, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_read_moving, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_journal_write, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DECLARE_EVENT_CLASS(bcache_cache_bio, + + TP_PROTO(struct bio *bio, + sector_t orig_sector, + struct block_device* orig_bdev), + + TP_ARGS(bio, orig_sector, orig_bdev), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(dev_t, orig_dev ) + __field(sector_t, sector ) + __field(sector_t, orig_sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __array(char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->orig_dev = orig_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->orig_sector = orig_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d %llu)", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm, + MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev), + (unsigned long long)__entry->orig_sector) +); + +DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert, + + TP_PROTO(struct bio *bio, + sector_t orig_sector, + struct block_device *orig_bdev), + + TP_ARGS(bio, orig_sector, orig_bdev) +); + +DECLARE_EVENT_CLASS(bcache_gc, + + TP_PROTO(uint8_t *uuid), + + TP_ARGS(uuid), + + TP_STRUCT__entry( + __field(uint8_t *, uuid) + ), + + TP_fast_assign( + __entry->uuid = uuid; + ), + + TP_printk("%pU", __entry->uuid) +); + + +DEFINE_EVENT(bcache_gc, bcache_gc_start, + + TP_PROTO(uint8_t *uuid), + + TP_ARGS(uuid) +); + +DEFINE_EVENT(bcache_gc, bcache_gc_end, + + TP_PROTO(uint8_t *uuid), + + TP_ARGS(uuid) +); + +#endif /* _TRACE_BCACHE_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/fork.c b/kernel/fork.c index 1766d324d5e3..7b54fb62332c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1303,6 +1303,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif +#ifdef CONFIG_BCACHE + p->sequential_io = 0; + p->sequential_io_avg = 0; +#endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p); -- cgit From 5f9296ba21b3c395e53dd84e7ff9578f97f24295 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 28 Feb 2012 18:26:31 +0530 Subject: i2c: Add bus recovery infrastructure Add i2c bus recovery infrastructure to i2c adapters as specified in the i2c protocol Rev. 03 section 3.1.16 titled "Bus clear". http://www.nxp.com/documents/user_manual/UM10204.pdf Sometimes during operation i2c bus hangs and we need to give dummy clocks to slave device to start the transfer again. Now we may have capability in the bus controller to generate these clocks or platform may have gpio pins which can be toggled to generate dummy clocks. This patch supports both. This patch also adds in generic bus recovery routines gpio or scl line based which can be used by bus controller. In addition controller driver may provide its own version of the bus recovery routine. This doesn't support multi-master recovery for now. Signed-off-by: Viresh Kumar [wsa: changed gpio type to int and minor reformatting] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/i2c.h | 41 +++++++++++++ 2 files changed, 200 insertions(+) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index f7dfe878a51b..0d873ba2e82e 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -27,7 +27,9 @@ #include #include +#include #include +#include #include #include #include @@ -109,6 +111,130 @@ static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env) #define i2c_device_uevent NULL #endif /* CONFIG_HOTPLUG */ +/* i2c bus recovery routines */ +static int get_scl_gpio_value(struct i2c_adapter *adap) +{ + return gpio_get_value(adap->bus_recovery_info->scl_gpio); +} + +static void set_scl_gpio_value(struct i2c_adapter *adap, int val) +{ + gpio_set_value(adap->bus_recovery_info->scl_gpio, val); +} + +static int get_sda_gpio_value(struct i2c_adapter *adap) +{ + return gpio_get_value(adap->bus_recovery_info->sda_gpio); +} + +static int i2c_get_gpios_for_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + struct device *dev = &adap->dev; + int ret = 0; + + ret = gpio_request_one(bri->scl_gpio, GPIOF_OPEN_DRAIN | + GPIOF_OUT_INIT_HIGH, "i2c-scl"); + if (ret) { + dev_warn(dev, "Can't get SCL gpio: %d\n", bri->scl_gpio); + return ret; + } + + if (bri->get_sda) { + if (gpio_request_one(bri->sda_gpio, GPIOF_IN, "i2c-sda")) { + /* work without SDA polling */ + dev_warn(dev, "Can't get SDA gpio: %d. Not using SDA polling\n", + bri->sda_gpio); + bri->get_sda = NULL; + } + } + + return ret; +} + +static void i2c_put_gpios_for_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + + if (bri->get_sda) + gpio_free(bri->sda_gpio); + + gpio_free(bri->scl_gpio); +} + +/* + * We are generating clock pulses. ndelay() determines durating of clk pulses. + * We will generate clock with rate 100 KHz and so duration of both clock levels + * is: delay in ns = (10^6 / 100) / 2 + */ +#define RECOVERY_NDELAY 5000 +#define RECOVERY_CLK_CNT 9 + +static int i2c_generic_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + int i = 0, val = 1, ret = 0; + + if (bri->prepare_recovery) + bri->prepare_recovery(bri); + + /* + * By this time SCL is high, as we need to give 9 falling-rising edges + */ + while (i++ < RECOVERY_CLK_CNT * 2) { + if (val) { + /* Break if SDA is high */ + if (bri->get_sda && bri->get_sda(adap)) + break; + /* SCL shouldn't be low here */ + if (!bri->get_scl(adap)) { + dev_err(&adap->dev, + "SCL is stuck low, exit recovery\n"); + ret = -EBUSY; + break; + } + } + + val = !val; + bri->set_scl(adap, val); + ndelay(RECOVERY_NDELAY); + } + + if (bri->unprepare_recovery) + bri->unprepare_recovery(bri); + + return ret; +} + +int i2c_generic_scl_recovery(struct i2c_adapter *adap) +{ + adap->bus_recovery_info->set_scl(adap, 1); + return i2c_generic_recovery(adap); +} + +int i2c_generic_gpio_recovery(struct i2c_adapter *adap) +{ + int ret; + + ret = i2c_get_gpios_for_recovery(adap); + if (ret) + return ret; + + ret = i2c_generic_recovery(adap); + i2c_put_gpios_for_recovery(adap); + + return ret; +} + +int i2c_recover_bus(struct i2c_adapter *adap) +{ + if (!adap->bus_recovery_info) + return -EOPNOTSUPP; + + dev_dbg(&adap->dev, "Trying i2c bus recovery\n"); + return adap->bus_recovery_info->recover_bus(adap); +} + static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); @@ -902,6 +1028,39 @@ static int i2c_register_adapter(struct i2c_adapter *adap) "Failed to create compatibility class link\n"); #endif + /* bus recovery specific initialization */ + if (adap->bus_recovery_info) { + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + + if (!bri->recover_bus) { + dev_err(&adap->dev, "No recover_bus() found, not using recovery\n"); + adap->bus_recovery_info = NULL; + goto exit_recovery; + } + + /* Generic GPIO recovery */ + if (bri->recover_bus == i2c_generic_gpio_recovery) { + if (!gpio_is_valid(bri->scl_gpio)) { + dev_err(&adap->dev, "Invalid SCL gpio, not using recovery\n"); + adap->bus_recovery_info = NULL; + goto exit_recovery; + } + + if (gpio_is_valid(bri->sda_gpio)) + bri->get_sda = get_sda_gpio_value; + else + bri->get_sda = NULL; + + bri->get_scl = get_scl_gpio_value; + bri->set_scl = set_scl_gpio_value; + } else if (!bri->set_scl || !bri->get_scl) { + /* Generic SCL recovery */ + dev_err(&adap->dev, "No {get|set}_gpio() found, not using recovery\n"); + adap->bus_recovery_info = NULL; + } + } + +exit_recovery: /* create pre-declared device nodes */ if (adap->nr < __i2c_first_dynamic_bus_num) i2c_scan_static_board_info(adap); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index d0c4db7b4872..2eca3860b77f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -370,6 +370,45 @@ struct i2c_algorithm { u32 (*functionality) (struct i2c_adapter *); }; +/** + * struct i2c_bus_recovery_info - I2C bus recovery information + * @recover_bus: Recover routine. Either pass driver's recover_bus() routine, or + * i2c_generic_scl_recovery() or i2c_generic_gpio_recovery(). + * @get_scl: This gets current value of SCL line. Mandatory for generic SCL + * recovery. Used internally for generic GPIO recovery. + * @set_scl: This sets/clears SCL line. Mandatory for generic SCL recovery. Used + * internally for generic GPIO recovery. + * @get_sda: This gets current value of SDA line. Optional for generic SCL + * recovery. Used internally, if sda_gpio is a valid GPIO, for generic GPIO + * recovery. + * @prepare_recovery: This will be called before starting recovery. Platform may + * configure padmux here for SDA/SCL line or something else they want. + * @unprepare_recovery: This will be called after completing recovery. Platform + * may configure padmux here for SDA/SCL line or something else they want. + * @scl_gpio: gpio number of the SCL line. Only required for GPIO recovery. + * @sda_gpio: gpio number of the SDA line. Only required for GPIO recovery. + */ +struct i2c_bus_recovery_info { + int (*recover_bus)(struct i2c_adapter *); + + int (*get_scl)(struct i2c_adapter *); + void (*set_scl)(struct i2c_adapter *, int val); + int (*get_sda)(struct i2c_adapter *); + + void (*prepare_recovery)(struct i2c_bus_recovery_info *bri); + void (*unprepare_recovery)(struct i2c_bus_recovery_info *bri); + + /* gpio recovery */ + int scl_gpio; + int sda_gpio; +}; + +int i2c_recover_bus(struct i2c_adapter *adap); + +/* Generic recovery routines */ +int i2c_generic_gpio_recovery(struct i2c_adapter *adap); +int i2c_generic_scl_recovery(struct i2c_adapter *adap); + /* * i2c_adapter is the structure used to identify a physical i2c bus along * with the access algorithms necessary to access it. @@ -393,6 +432,8 @@ struct i2c_adapter { struct mutex userspace_clients_lock; struct list_head userspace_clients; + + struct i2c_bus_recovery_info *bus_recovery_info; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) -- cgit From 49a64ac555f1dabd2b94325553187d0db6ecac16 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 21 Mar 2013 08:08:46 +0000 Subject: i2c: tegra: assume CONFIG_OF, remove platform data Tegra only supports, and always enables, device tree. Remove all ifdefs and runtime checks for DT support from the driver. Platform data is therefore no longer required. Delete the header that defines it. Signed-off-by: Stephen Warren Signed-off-by: Wolfram Sang --- arch/arm/mach-tegra/board-dt-tegra20.c | 2 -- drivers/i2c/busses/i2c-tegra.c | 26 +++++++------------------- include/linux/i2c-tegra.h | 25 ------------------------- 3 files changed, 7 insertions(+), 46 deletions(-) delete mode 100644 include/linux/i2c-tegra.h (limited to 'include/linux') diff --git a/arch/arm/mach-tegra/board-dt-tegra20.c b/arch/arm/mach-tegra/board-dt-tegra20.c index a0edf2510280..6e1c9a91848f 100644 --- a/arch/arm/mach-tegra/board-dt-tegra20.c +++ b/arch/arm/mach-tegra/board-dt-tegra20.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include #include diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index b714776b6ddd..b60ff90adc39 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -172,7 +171,7 @@ struct tegra_i2c_dev { u8 *msg_buf; size_t msg_buf_remaining; int msg_read; - unsigned long bus_clk_rate; + u32 bus_clk_rate; bool is_suspended; }; @@ -694,7 +693,6 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { .clk_divisor_std_fast_mode = 0x19, }; -#if defined(CONFIG_OF) /* Match table for of_platform binding */ static const struct of_device_id tegra_i2c_of_match[] = { { .compatible = "nvidia,tegra114-i2c", .data = &tegra114_i2c_hw, }, @@ -704,16 +702,13 @@ static const struct of_device_id tegra_i2c_of_match[] = { {}, }; MODULE_DEVICE_TABLE(of, tegra_i2c_of_match); -#endif static int tegra_i2c_probe(struct platform_device *pdev) { struct tegra_i2c_dev *i2c_dev; - struct tegra_i2c_platform_data *pdata = pdev->dev.platform_data; struct resource *res; struct clk *div_clk; struct clk *fast_clk; - const unsigned int *prop; void __iomem *base; int irq; int ret = 0; @@ -754,23 +749,16 @@ static int tegra_i2c_probe(struct platform_device *pdev) i2c_dev->cont_id = pdev->id; i2c_dev->dev = &pdev->dev; - i2c_dev->bus_clk_rate = 100000; /* default clock rate */ - if (pdata) { - i2c_dev->bus_clk_rate = pdata->bus_clk_rate; - - } else if (i2c_dev->dev->of_node) { /* if there is a device tree node ... */ - prop = of_get_property(i2c_dev->dev->of_node, - "clock-frequency", NULL); - if (prop) - i2c_dev->bus_clk_rate = be32_to_cpup(prop); - } + ret = of_property_read_u32(i2c_dev->dev->of_node, "clock-frequency", + &i2c_dev->bus_clk_rate); + if (ret) + i2c_dev->bus_clk_rate = 100000; /* default clock rate */ i2c_dev->hw = &tegra20_i2c_hw; if (pdev->dev.of_node) { const struct of_device_id *match; - match = of_match_device(of_match_ptr(tegra_i2c_of_match), - &pdev->dev); + match = of_match_device(tegra_i2c_of_match, &pdev->dev); i2c_dev->hw = match->data; i2c_dev->is_dvc = of_device_is_compatible(pdev->dev.of_node, "nvidia,tegra20-i2c-dvc"); @@ -876,7 +864,7 @@ static struct platform_driver tegra_i2c_driver = { .driver = { .name = "tegra-i2c", .owner = THIS_MODULE, - .of_match_table = of_match_ptr(tegra_i2c_of_match), + .of_match_table = tegra_i2c_of_match, .pm = TEGRA_I2C_PM, }, }; diff --git a/include/linux/i2c-tegra.h b/include/linux/i2c-tegra.h deleted file mode 100644 index 9c85da49857a..000000000000 --- a/include/linux/i2c-tegra.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * drivers/i2c/busses/i2c-tegra.c - * - * Copyright (C) 2010 Google, Inc. - * Author: Colin Cross - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef _LINUX_I2C_TEGRA_H -#define _LINUX_I2C_TEGRA_H - -struct tegra_i2c_platform_data { - unsigned long bus_clk_rate; -}; - -#endif /* _LINUX_I2C_TEGRA_H */ -- cgit From d4e1a692e9e85f9cbee090ea8d6158b133d32157 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Mon, 4 Mar 2013 21:30:41 +0000 Subject: ACPI: Remove acpi_device dependency in acpi_device_set_id() This patch updates the internal operations of acpi_device_set_id() to setup acpi_device_pnp without using acpi_device. There is no functional change to acpi_device_set_id() in this patch. acpi_pnp_type is added to acpi_device_pnp, so that PNPID type is self-contained within acpi_device_pnp. acpi_add_id(), acpi_bay_match(), acpi_dock_match(), acpi_ibm_smbus_match() and acpi_is_video_device() are changed to take acpi_handle as an argument, instead of acpi_device. Signed-off-by: Toshi Kani Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 69 +++++++++++++++++------------------ drivers/acpi/video_detect.c | 25 ++++++------- drivers/gpu/drm/i915/intel_opregion.c | 4 +- include/acpi/acpi_bus.h | 14 +++++-- include/linux/acpi.h | 4 +- 5 files changed, 59 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index d69d77ab9c7e..f9c698d766f1 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -526,7 +526,7 @@ static int acpi_device_setup_files(struct acpi_device *dev) goto end; } - if (dev->flags.bus_address) + if (dev->pnp.type.bus_address) result = device_create_file(&dev->dev, &dev_attr_adr); if (dev->pnp.unique_id) result = device_create_file(&dev->dev, &dev_attr_uid); @@ -599,7 +599,7 @@ static void acpi_device_remove_files(struct acpi_device *dev) if (dev->pnp.unique_id) device_remove_file(&dev->dev, &dev_attr_uid); - if (dev->flags.bus_address) + if (dev->pnp.type.bus_address) device_remove_file(&dev->dev, &dev_attr_adr); device_remove_file(&dev->dev, &dev_attr_modalias); device_remove_file(&dev->dev, &dev_attr_hid); @@ -1406,19 +1406,17 @@ static void acpi_device_get_busid(struct acpi_device *device) } /* - * acpi_bay_match - see if a device is an ejectable driver bay + * acpi_bay_match - see if an acpi object is an ejectable driver bay * * If an acpi object is ejectable and has one of the ACPI ATA methods defined, * then we can safely call it an ejectable drive bay */ -static int acpi_bay_match(struct acpi_device *device){ +static int acpi_bay_match(acpi_handle handle) +{ acpi_status status; - acpi_handle handle; acpi_handle tmp; acpi_handle phandle; - handle = device->handle; - status = acpi_get_handle(handle, "_EJ0", &tmp); if (ACPI_FAILURE(status)) return -ENODEV; @@ -1442,12 +1440,12 @@ static int acpi_bay_match(struct acpi_device *device){ } /* - * acpi_dock_match - see if a device has a _DCK method + * acpi_dock_match - see if an acpi object has a _DCK method */ -static int acpi_dock_match(struct acpi_device *device) +static int acpi_dock_match(acpi_handle handle) { acpi_handle tmp; - return acpi_get_handle(device->handle, "_DCK", &tmp); + return acpi_get_handle(handle, "_DCK", &tmp); } const char *acpi_device_hid(struct acpi_device *device) @@ -1462,7 +1460,7 @@ const char *acpi_device_hid(struct acpi_device *device) } EXPORT_SYMBOL(acpi_device_hid); -static void acpi_add_id(struct acpi_device *device, const char *dev_id) +static void acpi_add_id(struct acpi_device_pnp *pnp, const char *dev_id) { struct acpi_hardware_id *id; @@ -1476,7 +1474,8 @@ static void acpi_add_id(struct acpi_device *device, const char *dev_id) return; } - list_add_tail(&id->list, &device->pnp.ids); + list_add_tail(&id->list, &pnp->ids); + pnp->type.hardware_id = 1; } /* @@ -1484,7 +1483,7 @@ static void acpi_add_id(struct acpi_device *device, const char *dev_id) * lacks the SMBUS01 HID and the methods do not have the necessary "_" * prefix. Work around this. */ -static int acpi_ibm_smbus_match(struct acpi_device *device) +static int acpi_ibm_smbus_match(acpi_handle handle) { acpi_handle h_dummy; struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; @@ -1494,7 +1493,7 @@ static int acpi_ibm_smbus_match(struct acpi_device *device) return -ENODEV; /* Look for SMBS object */ - result = acpi_get_name(device->handle, ACPI_SINGLE_NAME, &path); + result = acpi_get_name(handle, ACPI_SINGLE_NAME, &path); if (result) return result; @@ -1505,9 +1504,9 @@ static int acpi_ibm_smbus_match(struct acpi_device *device) /* Does it have the necessary (but misnamed) methods? */ result = -ENODEV; - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "SBI", &h_dummy)) && - ACPI_SUCCESS(acpi_get_handle(device->handle, "SBR", &h_dummy)) && - ACPI_SUCCESS(acpi_get_handle(device->handle, "SBW", &h_dummy))) + if (ACPI_SUCCESS(acpi_get_handle(handle, "SBI", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(handle, "SBR", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(handle, "SBW", &h_dummy))) result = 0; out: kfree(path.pointer); @@ -1524,7 +1523,7 @@ static void acpi_device_set_id(struct acpi_device *device) switch (device->device_type) { case ACPI_BUS_TYPE_DEVICE: if (ACPI_IS_ROOT_DEVICE(device)) { - acpi_add_id(device, ACPI_SYSTEM_HID); + acpi_add_id(&device->pnp, ACPI_SYSTEM_HID); break; } @@ -1535,15 +1534,15 @@ static void acpi_device_set_id(struct acpi_device *device) } if (info->valid & ACPI_VALID_HID) - acpi_add_id(device, info->hardware_id.string); + acpi_add_id(&device->pnp, info->hardware_id.string); if (info->valid & ACPI_VALID_CID) { cid_list = &info->compatible_id_list; for (i = 0; i < cid_list->count; i++) - acpi_add_id(device, cid_list->ids[i].string); + acpi_add_id(&device->pnp, cid_list->ids[i].string); } if (info->valid & ACPI_VALID_ADR) { device->pnp.bus_address = info->address; - device->flags.bus_address = 1; + device->pnp.type.bus_address = 1; } if (info->valid & ACPI_VALID_UID) device->pnp.unique_id = kstrdup(info->unique_id.string, @@ -1555,36 +1554,36 @@ static void acpi_device_set_id(struct acpi_device *device) * Some devices don't reliably have _HIDs & _CIDs, so add * synthetic HIDs to make sure drivers can find them. */ - if (acpi_is_video_device(device)) - acpi_add_id(device, ACPI_VIDEO_HID); - else if (ACPI_SUCCESS(acpi_bay_match(device))) - acpi_add_id(device, ACPI_BAY_HID); - else if (ACPI_SUCCESS(acpi_dock_match(device))) - acpi_add_id(device, ACPI_DOCK_HID); - else if (!acpi_ibm_smbus_match(device)) - acpi_add_id(device, ACPI_SMBUS_IBM_HID); + if (acpi_is_video_device(device->handle)) + acpi_add_id(&device->pnp, ACPI_VIDEO_HID); + else if (ACPI_SUCCESS(acpi_bay_match(device->handle))) + acpi_add_id(&device->pnp, ACPI_BAY_HID); + else if (ACPI_SUCCESS(acpi_dock_match(device->handle))) + acpi_add_id(&device->pnp, ACPI_DOCK_HID); + else if (!acpi_ibm_smbus_match(device->handle)) + acpi_add_id(&device->pnp, ACPI_SMBUS_IBM_HID); else if (list_empty(&device->pnp.ids) && ACPI_IS_ROOT_DEVICE(device->parent)) { - acpi_add_id(device, ACPI_BUS_HID); /* \_SB, LNXSYBUS */ + acpi_add_id(&device->pnp, ACPI_BUS_HID); /* \_SB, LNXSYBUS */ strcpy(device->pnp.device_name, ACPI_BUS_DEVICE_NAME); strcpy(device->pnp.device_class, ACPI_BUS_CLASS); } break; case ACPI_BUS_TYPE_POWER: - acpi_add_id(device, ACPI_POWER_HID); + acpi_add_id(&device->pnp, ACPI_POWER_HID); break; case ACPI_BUS_TYPE_PROCESSOR: - acpi_add_id(device, ACPI_PROCESSOR_OBJECT_HID); + acpi_add_id(&device->pnp, ACPI_PROCESSOR_OBJECT_HID); break; case ACPI_BUS_TYPE_THERMAL: - acpi_add_id(device, ACPI_THERMAL_HID); + acpi_add_id(&device->pnp, ACPI_THERMAL_HID); break; case ACPI_BUS_TYPE_POWER_BUTTON: - acpi_add_id(device, ACPI_BUTTON_HID_POWERF); + acpi_add_id(&device->pnp, ACPI_BUTTON_HID_POWERF); break; case ACPI_BUS_TYPE_SLEEP_BUTTON: - acpi_add_id(device, ACPI_BUTTON_HID_SLEEPF); + acpi_add_id(&device->pnp, ACPI_BUTTON_HID_SLEEPF); break; } } diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 4ac2593234e7..66f67626f02e 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -67,40 +67,37 @@ acpi_backlight_cap_match(acpi_handle handle, u32 level, void *context, return 0; } -/* Returns true if the device is a video device which can be handled by - * video.ko. +/* Returns true if the ACPI object is a video device which can be + * handled by video.ko. * The device will get a Linux specific CID added in scan.c to * identify the device as an ACPI graphics device * Be aware that the graphics device may not be physically present * Use acpi_video_get_capabilities() to detect general ACPI video * capabilities of present cards */ -long acpi_is_video_device(struct acpi_device *device) +long acpi_is_video_device(acpi_handle handle) { acpi_handle h_dummy; long video_caps = 0; - if (!device) - return 0; - /* Is this device able to support video switching ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOD", &h_dummy)) || - ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOS", &h_dummy))) + if (ACPI_SUCCESS(acpi_get_handle(handle, "_DOD", &h_dummy)) || + ACPI_SUCCESS(acpi_get_handle(handle, "_DOS", &h_dummy))) video_caps |= ACPI_VIDEO_OUTPUT_SWITCHING; /* Is this device able to retrieve a video ROM ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_ROM", &h_dummy))) + if (ACPI_SUCCESS(acpi_get_handle(handle, "_ROM", &h_dummy))) video_caps |= ACPI_VIDEO_ROM_AVAILABLE; /* Is this device able to configure which video head to be POSTed ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_VPO", &h_dummy)) && - ACPI_SUCCESS(acpi_get_handle(device->handle, "_GPD", &h_dummy)) && - ACPI_SUCCESS(acpi_get_handle(device->handle, "_SPD", &h_dummy))) + if (ACPI_SUCCESS(acpi_get_handle(handle, "_VPO", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(handle, "_GPD", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(handle, "_SPD", &h_dummy))) video_caps |= ACPI_VIDEO_DEVICE_POSTING; /* Only check for backlight functionality if one of the above hit. */ if (video_caps) - acpi_walk_namespace(ACPI_TYPE_DEVICE, device->handle, + acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, ACPI_UINT32_MAX, acpi_backlight_cap_match, NULL, &video_caps, NULL); @@ -127,7 +124,7 @@ find_video(acpi_handle handle, u32 lvl, void *context, void **rv) if (!dev) return AE_OK; pci_dev_put(dev); - *cap |= acpi_is_video_device(acpi_dev); + *cap |= acpi_is_video_device(handle); } return AE_OK; } diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 4d338740f2cb..a8117e614009 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -350,11 +350,11 @@ static void intel_didl_outputs(struct drm_device *dev) if (!handle || acpi_bus_get_device(handle, &acpi_dev)) return; - if (acpi_is_video_device(acpi_dev)) + if (acpi_is_video_device(handle)) acpi_video_bus = acpi_dev; else { list_for_each_entry(acpi_cdev, &acpi_dev->children, node) { - if (acpi_is_video_device(acpi_cdev)) { + if (acpi_is_video_device(acpi_cdev->handle)) { acpi_video_bus = acpi_cdev; break; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 533ef039c5e0..3cb3da8ac9d9 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -161,7 +161,6 @@ struct acpi_device_status { struct acpi_device_flags { u32 dynamic_status:1; - u32 bus_address:1; u32 removable:1; u32 ejectable:1; u32 suprise_removal_ok:1; @@ -169,7 +168,7 @@ struct acpi_device_flags { u32 performance_manageable:1; u32 eject_pending:1; u32 match_driver:1; - u32 reserved:23; + u32 reserved:24; }; /* File System */ @@ -192,10 +191,17 @@ struct acpi_hardware_id { char *id; }; +struct acpi_pnp_type { + u32 hardware_id:1; + u32 bus_address:1; + u32 reserved:30; +}; + struct acpi_device_pnp { - acpi_bus_id bus_id; /* Object name */ + acpi_bus_id bus_id; /* Object name */ + struct acpi_pnp_type type; /* ID type */ acpi_bus_address bus_address; /* _ADR */ - char *unique_id; /* _UID */ + char *unique_id; /* _UID */ struct list_head ids; /* _HID and _CIDs */ acpi_device_name device_name; /* Driver-determined */ acpi_device_class device_class; /* " */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index bcbdd7484e58..edaf311473e5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -204,7 +204,7 @@ extern bool wmi_has_guid(const char *guid); #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) extern long acpi_video_get_capabilities(acpi_handle graphics_dev_handle); -extern long acpi_is_video_device(struct acpi_device *device); +extern long acpi_is_video_device(acpi_handle handle); extern void acpi_video_dmi_promote_vendor(void); extern void acpi_video_dmi_demote_vendor(void); extern int acpi_video_backlight_support(void); @@ -217,7 +217,7 @@ static inline long acpi_video_get_capabilities(acpi_handle graphics_dev_handle) return 0; } -static inline long acpi_is_video_device(struct acpi_device *device) +static inline long acpi_is_video_device(acpi_handle handle) { return 0; } -- cgit From c58c844187df61ef7cc103d0abb5dd6198bcfcd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Mar 2013 19:45:14 -0400 Subject: NFS: Don't accept more reads/writes if the open context recovery failed If the state recovery failed, we want to ensure that the application doesn't try to use the same file descriptor for more reads or writes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 8 ++++++++ fs/nfs/nfs4state.c | 16 ++++++++++++++++ fs/nfs/pagelist.c | 2 ++ fs/nfs/read.c | 2 ++ fs/nfs/write.c | 2 ++ include/linux/nfs_fs.h | 1 + 6 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 1ee5737211d7..4ba32e23eddd 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -305,6 +305,10 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(rdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_read(rdata); @@ -407,6 +411,10 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(wdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_write(wdata); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index fec1c5bb4863..8db102c7add6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1328,9 +1328,25 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } +static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + set_bit(NFS_CONTEXT_BAD, &ctx->flags); + } + spin_unlock(&inode->i_lock); +} + static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) { set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); + nfs4_state_mark_open_context_bad(state); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e56e846e9d2d..7f0933086b36 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -104,6 +104,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *req; struct nfs_lock_context *l_ctx; + if (test_bit(NFS_CONTEXT_BAD, &ctx->flags)) + return ERR_PTR(-EBADF); /* try to allocate the request struct */ req = nfs_page_alloc(); if (req == NULL) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a5e5d9899d56..70a26c651f09 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -514,6 +514,8 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + rpc_exit(task, -EIO); } static const struct rpc_call_ops nfs_read_common_ops = { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c483cc50b82e..a2c7c28049d5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1251,6 +1251,8 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + rpc_exit(task, -EIO); } void nfs_commit_prepare(struct rpc_task *task, void *calldata) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1cc25682b20b..f6b1956f3c86 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,7 @@ struct nfs_open_context { unsigned long flags; #define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) +#define NFS_CONTEXT_BAD (2) int error; struct list_head list; -- cgit From 9b20614988199fb03580b335a28250922e902098 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Mar 2013 15:52:00 -0400 Subject: NFSv4: The stateid must remain the same for replayed RPC calls If we replay a READ or WRITE call, we should not be changing the stateid. Currently, we may end up doing so, because the stateid is only selected at xdr encode time. This patch ensures that we select the stateid after we get an NFSv4.1 session slot, and that we keep that same stateid across retries. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 ++++ fs/nfs/nfs4filelayout.c | 14 ++++++++++---- fs/nfs/nfs4proc.c | 27 +++++++++++++++++++++++---- fs/nfs/nfs4xdr.c | 28 ++-------------------------- include/linux/nfs_xdr.h | 2 ++ 5 files changed, 41 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9ce90135bf22..8309e98c44f9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -234,6 +234,10 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; +extern void nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode); #if defined(CONFIG_NFS_V4_1) static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4ba32e23eddd..22d10623f5ee 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -317,10 +317,13 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) } rdata->read_done_cb = filelayout_read_done_cb; - nfs41_setup_sequence(rdata->ds_clp->cl_session, + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ); } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -421,10 +424,13 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - nfs41_setup_sequence(wdata->ds_clp->cl_session, + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE); } static void filelayout_write_call_done(struct rpc_task *task, void *data) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c3bbb6c53d61..26176ce3d96a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3454,6 +3454,19 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +void nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + const struct nfs_lockowner *lockowner = NULL; + + if (l_ctx != NULL) + lockowner = &l_ctx->lockowner; + nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); +} +EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); + void __nfs4_read_done_cb(struct nfs_read_data *data) { nfs_invalidate_atime(data->header->inode); @@ -3496,10 +3509,13 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_READ); } static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3560,10 +3576,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_WRITE); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e3edda554ac7..9d328777b4c1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1506,35 +1506,12 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_open_stateid(struct xdr_stream *xdr, - const struct nfs_open_context *ctx, - const struct nfs_lock_context *l_ctx, - fmode_t fmode, - int zero_seqid) -{ - nfs4_stateid stateid; - - if (ctx->state != NULL) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, lockowner); - if (zero_seqid) - stateid.seqid = 0; - encode_nfs4_stateid(xdr, &stateid); - } else - encode_nfs4_stateid(xdr, &zero_stateid); -} - static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_READ, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); @@ -1670,8 +1647,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg __be32 *p; encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_WRITE, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4b993d358dad..90a4aa190b43 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -486,6 +486,7 @@ struct nfs_readargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; unsigned int pgbase; @@ -507,6 +508,7 @@ struct nfs_writeargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; enum nfs3_stable_how stable; -- cgit From 3b66486c4c7136f8d4bbe1306d581fadc6bce4c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Mar 2013 15:31:15 -0400 Subject: NFSv4.1: Select the "most recent locking state" for read/write/setattr stateids Follow the practice described in section 8.2.2 of RFC5661: When sending a read/write or setattr stateid, set the seqid field to zero in order to signal that the NFS server should apply the most recent locking state. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4state.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 22c80f9ed824..625a729ebcca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6841,7 +6841,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR - | NFS_CAP_POSIX_LOCK, + | NFS_CAP_POSIX_LOCK + | NFS_CAP_STATEID_NFSV41, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4e95bd72f480..685b1e953ed8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1053,6 +1053,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, goto out; ret = nfs4_copy_open_stateid(dst, state); out: + if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) + dst->seqid = 0; return ret; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6c6ed153a9b4..74c9e52c9338 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -197,5 +197,6 @@ struct nfs_server { #define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) +#define NFS_CAP_STATEID_NFSV41 (1U << 16) #endif -- cgit From 49f9a0fafd844c32f2abada047c0b9a5ba0d6255 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Mar 2013 16:44:28 -0400 Subject: NFSv4.1: Enable open-by-filehandle Sometimes, we actually _want_ to do open-by-filehandle, for instance when recovering opens after a network partition, or when called from nfs4_file_open. Enable that functionality using a new capability NFS_CAP_ATOMIC_OPEN_V1, and which is only enabled for NFSv4.1 servers that support it. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 ++ fs/nfs/nfs4proc.c | 53 ++++++++++++++++++++++++++++++++++++++++------- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 49 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f23f455be42b..e093e73178b7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1486,6 +1486,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto no_open; if (d_mountpoint(dentry)) goto no_open; + if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) + goto no_open; inode = dentry->d_inode; parent = dget_parent(dentry); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7bbb06f280fc..732b76f703d6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -767,6 +767,35 @@ struct nfs4_opendata { int cancelled; }; +static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + if (err != -EINVAL) + return false; + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) + return false; + server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1; + exception->retry = 1; + return true; +} + +static enum open_claim_type4 +nfs4_map_atomic_open_claim(struct nfs_server *server, + enum open_claim_type4 claim) +{ + if (server->caps & NFS_CAP_ATOMIC_OPEN_V1) + return claim; + switch (claim) { + default: + return claim; + case NFS4_OPEN_CLAIM_FH: + return NFS4_OPEN_CLAIM_NULL; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + return NFS4_OPEN_CLAIM_DELEGATE_CUR; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + return NFS4_OPEN_CLAIM_DELEGATE_PREV; + } +} static void nfs4_init_opendata_res(struct nfs4_opendata *p) { @@ -818,8 +847,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.claim = claim; - switch (claim) { + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); + switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_DELEGATE_PREV: @@ -1326,6 +1355,8 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -1741,7 +1772,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s int ret; opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_NULL); + NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); @@ -1759,6 +1790,8 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state do { err = _nfs4_open_expired(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; switch (err) { default: goto out; @@ -1926,6 +1959,7 @@ static int _nfs4_do_open(struct inode *dir, struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *opendata; + enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; int status; /* Protect against reboot recovery conflicts */ @@ -1941,9 +1975,10 @@ static int _nfs4_do_open(struct inode *dir, if (dentry->d_inode != NULL) nfs4_return_incompatible_delegation(dentry->d_inode, fmode); status = -ENOMEM; + if (dentry->d_inode) + claim = NFS4_OPEN_CLAIM_FH; opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, - NFS4_OPEN_CLAIM_NULL, - GFP_KERNEL); + claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -2001,6 +2036,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct rpc_cred *cred, struct nfs4_threshold **ctx_th) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; int status; @@ -2044,7 +2080,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + if (nfs4_clear_cap_atomic_open_v1(server, status, &exception)) + continue; + res = ERR_PTR(nfs4_handle_exception(server, status, &exception)); } while (exception.retry); return res; @@ -6858,7 +6896,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK - | NFS_CAP_STATEID_NFSV41, + | NFS_CAP_STATEID_NFSV41 + | NFS_CAP_ATOMIC_OPEN_V1, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 74c9e52c9338..d8fdfdc7a8fe 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -198,5 +198,6 @@ struct nfs_server { #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) +#define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17) #endif -- cgit From e44b0ceee4cc2a926225e73ac1e20b9a5bb22c2d Mon Sep 17 00:00:00 2001 From: Kenneth Heitke Date: Tue, 12 Mar 2013 11:41:46 -0700 Subject: add single-wire serial bus interface (SSBI) driver SSBI is the Qualcomm single-wire serial bus interface used to connect the MSM devices to the PMIC and other devices. Since SSBI only supports a single slave, the driver gets the name of the slave device passed in from the board file through the master device's platform data. SSBI registers pretty early (postcore), so that the PMIC can come up before the board init. This is useful if the board init requires the use of gpios that are connected through the PMIC. Based on a patch by Dima Zavin that can be found at: http://android.git.kernel.org/?p=kernel/msm.git;a=commitdiff;h=eb060bac4 This patch adds PMIC Arbiter support for the MSM8660. The PMIC Arbiter is a hardware wrapper around the SSBI 2.0 controller that is designed to overcome concurrency issues and security limitations. A controller_type field is added to the platform data to specify the type of the SSBI controller (1.0, 2.0, or PMIC Arbiter). [davidb@codeaurora.org: I've moved this driver into drivers/ssbi/ and added an include for linux/module.h so that it will compile] Signed-off-by: Kenneth Heitke Signed-off-by: David Brown Signed-off-by: Greg Kroah-Hartman --- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/ssbi/Kconfig | 16 ++ drivers/ssbi/Makefile | 1 + drivers/ssbi/ssbi.c | 397 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/msm_ssbi.h | 49 ++++++ 6 files changed, 466 insertions(+) create mode 100644 drivers/ssbi/Kconfig create mode 100644 drivers/ssbi/Makefile create mode 100644 drivers/ssbi/ssbi.c create mode 100644 include/linux/msm_ssbi.h (limited to 'include/linux') diff --git a/drivers/Kconfig b/drivers/Kconfig index 202fa6d051b9..78a956e286e6 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -52,6 +52,8 @@ source "drivers/i2c/Kconfig" source "drivers/spi/Kconfig" +source "drivers/ssbi/Kconfig" + source "drivers/hsi/Kconfig" source "drivers/pps/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index dce39a95fa71..778821ba3f68 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -114,6 +114,7 @@ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ obj-$(CONFIG_ARCH_SHMOBILE) += sh/ +obj-$(CONFIG_MSM_SSBI) += ssbi/ ifndef CONFIG_ARCH_USES_GETTIMEOFFSET obj-y += clocksource/ endif diff --git a/drivers/ssbi/Kconfig b/drivers/ssbi/Kconfig new file mode 100644 index 000000000000..b57c41bd0119 --- /dev/null +++ b/drivers/ssbi/Kconfig @@ -0,0 +1,16 @@ +# +# MSM SSBI bus support +# + +menu "Qualcomm MSM SSBI bus support" + +config MSM_SSBI + bool "Qualcomm Single-wire Serial Bus Interface (SSBI)" + help + If you say yes to this option, support will be included for the + built-in SSBI interface on Qualcomm MSM family processors. + + This is required for communicating with Qualcomm PMICs and + other devices that have the SSBI interface. + +endmenu diff --git a/drivers/ssbi/Makefile b/drivers/ssbi/Makefile new file mode 100644 index 000000000000..22e408f45d61 --- /dev/null +++ b/drivers/ssbi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MSM_SSBI) += ssbi.o diff --git a/drivers/ssbi/ssbi.c b/drivers/ssbi/ssbi.c new file mode 100644 index 000000000000..8b0b10d6e1de --- /dev/null +++ b/drivers/ssbi/ssbi.c @@ -0,0 +1,397 @@ +/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved. + * Copyright (c) 2010, Google Inc. + * + * Original authors: Code Aurora Forum + * + * Author: Dima Zavin + * - Largely rewritten from original to not be an i2c driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* SSBI 2.0 controller registers */ +#define SSBI2_CMD 0x0008 +#define SSBI2_RD 0x0010 +#define SSBI2_STATUS 0x0014 +#define SSBI2_MODE2 0x001C + +/* SSBI_CMD fields */ +#define SSBI_CMD_RDWRN (1 << 24) + +/* SSBI_STATUS fields */ +#define SSBI_STATUS_RD_READY (1 << 2) +#define SSBI_STATUS_READY (1 << 1) +#define SSBI_STATUS_MCHN_BUSY (1 << 0) + +/* SSBI_MODE2 fields */ +#define SSBI_MODE2_REG_ADDR_15_8_SHFT 0x04 +#define SSBI_MODE2_REG_ADDR_15_8_MASK (0x7f << SSBI_MODE2_REG_ADDR_15_8_SHFT) + +#define SET_SSBI_MODE2_REG_ADDR_15_8(MD, AD) \ + (((MD) & 0x0F) | ((((AD) >> 8) << SSBI_MODE2_REG_ADDR_15_8_SHFT) & \ + SSBI_MODE2_REG_ADDR_15_8_MASK)) + +/* SSBI PMIC Arbiter command registers */ +#define SSBI_PA_CMD 0x0000 +#define SSBI_PA_RD_STATUS 0x0004 + +/* SSBI_PA_CMD fields */ +#define SSBI_PA_CMD_RDWRN (1 << 24) +#define SSBI_PA_CMD_ADDR_MASK 0x7fff /* REG_ADDR_7_0, REG_ADDR_8_14*/ + +/* SSBI_PA_RD_STATUS fields */ +#define SSBI_PA_RD_STATUS_TRANS_DONE (1 << 27) +#define SSBI_PA_RD_STATUS_TRANS_DENIED (1 << 26) + +#define SSBI_TIMEOUT_US 100 + +struct msm_ssbi { + struct device *dev; + struct device *slave; + void __iomem *base; + spinlock_t lock; + enum msm_ssbi_controller_type controller_type; + int (*read)(struct msm_ssbi *, u16 addr, u8 *buf, int len); + int (*write)(struct msm_ssbi *, u16 addr, u8 *buf, int len); +}; + +#define to_msm_ssbi(dev) platform_get_drvdata(to_platform_device(dev)) + +static inline u32 ssbi_readl(struct msm_ssbi *ssbi, u32 reg) +{ + return readl(ssbi->base + reg); +} + +static inline void ssbi_writel(struct msm_ssbi *ssbi, u32 val, u32 reg) +{ + writel(val, ssbi->base + reg); +} + +static int ssbi_wait_mask(struct msm_ssbi *ssbi, u32 set_mask, u32 clr_mask) +{ + u32 timeout = SSBI_TIMEOUT_US; + u32 val; + + while (timeout--) { + val = ssbi_readl(ssbi, SSBI2_STATUS); + if (((val & set_mask) == set_mask) && ((val & clr_mask) == 0)) + return 0; + udelay(1); + } + + dev_err(ssbi->dev, "%s: timeout (status %x set_mask %x clr_mask %x)\n", + __func__, ssbi_readl(ssbi, SSBI2_STATUS), set_mask, clr_mask); + return -ETIMEDOUT; +} + +static int +msm_ssbi_read_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +{ + u32 cmd = SSBI_CMD_RDWRN | ((addr & 0xff) << 16); + int ret = 0; + + if (ssbi->controller_type == MSM_SBI_CTRL_SSBI2) { + u32 mode2 = ssbi_readl(ssbi, SSBI2_MODE2); + mode2 = SET_SSBI_MODE2_REG_ADDR_15_8(mode2, addr); + ssbi_writel(ssbi, mode2, SSBI2_MODE2); + } + + while (len) { + ret = ssbi_wait_mask(ssbi, SSBI_STATUS_READY, 0); + if (ret) + goto err; + + ssbi_writel(ssbi, cmd, SSBI2_CMD); + ret = ssbi_wait_mask(ssbi, SSBI_STATUS_RD_READY, 0); + if (ret) + goto err; + *buf++ = ssbi_readl(ssbi, SSBI2_RD) & 0xff; + len--; + } + +err: + return ret; +} + +static int +msm_ssbi_write_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +{ + int ret = 0; + + if (ssbi->controller_type == MSM_SBI_CTRL_SSBI2) { + u32 mode2 = ssbi_readl(ssbi, SSBI2_MODE2); + mode2 = SET_SSBI_MODE2_REG_ADDR_15_8(mode2, addr); + ssbi_writel(ssbi, mode2, SSBI2_MODE2); + } + + while (len) { + ret = ssbi_wait_mask(ssbi, SSBI_STATUS_READY, 0); + if (ret) + goto err; + + ssbi_writel(ssbi, ((addr & 0xff) << 16) | *buf, SSBI2_CMD); + ret = ssbi_wait_mask(ssbi, 0, SSBI_STATUS_MCHN_BUSY); + if (ret) + goto err; + buf++; + len--; + } + +err: + return ret; +} + +static inline int +msm_ssbi_pa_transfer(struct msm_ssbi *ssbi, u32 cmd, u8 *data) +{ + u32 timeout = SSBI_TIMEOUT_US; + u32 rd_status = 0; + + ssbi_writel(ssbi, cmd, SSBI_PA_CMD); + + while (timeout--) { + rd_status = ssbi_readl(ssbi, SSBI_PA_RD_STATUS); + + if (rd_status & SSBI_PA_RD_STATUS_TRANS_DENIED) { + dev_err(ssbi->dev, "%s: transaction denied (0x%x)\n", + __func__, rd_status); + return -EPERM; + } + + if (rd_status & SSBI_PA_RD_STATUS_TRANS_DONE) { + if (data) + *data = rd_status & 0xff; + return 0; + } + udelay(1); + } + + dev_err(ssbi->dev, "%s: timeout, status 0x%x\n", __func__, rd_status); + return -ETIMEDOUT; +} + +static int +msm_ssbi_pa_read_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +{ + u32 cmd; + int ret = 0; + + cmd = SSBI_PA_CMD_RDWRN | (addr & SSBI_PA_CMD_ADDR_MASK) << 8; + + while (len) { + ret = msm_ssbi_pa_transfer(ssbi, cmd, buf); + if (ret) + goto err; + buf++; + len--; + } + +err: + return ret; +} + +static int +msm_ssbi_pa_write_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +{ + u32 cmd; + int ret = 0; + + while (len) { + cmd = (addr & SSBI_PA_CMD_ADDR_MASK) << 8 | *buf; + ret = msm_ssbi_pa_transfer(ssbi, cmd, NULL); + if (ret) + goto err; + buf++; + len--; + } + +err: + return ret; +} + +int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len) +{ + struct msm_ssbi *ssbi = to_msm_ssbi(dev); + unsigned long flags; + int ret; + + if (ssbi->dev != dev) + return -ENXIO; + + spin_lock_irqsave(&ssbi->lock, flags); + ret = ssbi->read(ssbi, addr, buf, len); + spin_unlock_irqrestore(&ssbi->lock, flags); + + return ret; +} +EXPORT_SYMBOL(msm_ssbi_read); + +int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len) +{ + struct msm_ssbi *ssbi = to_msm_ssbi(dev); + unsigned long flags; + int ret; + + if (ssbi->dev != dev) + return -ENXIO; + + spin_lock_irqsave(&ssbi->lock, flags); + ret = ssbi->write(ssbi, addr, buf, len); + spin_unlock_irqrestore(&ssbi->lock, flags); + + return ret; +} +EXPORT_SYMBOL(msm_ssbi_write); + +static int msm_ssbi_add_slave(struct msm_ssbi *ssbi, + const struct msm_ssbi_slave_info *slave) +{ + struct platform_device *slave_pdev; + int ret; + + if (ssbi->slave) { + pr_err("slave already attached??\n"); + return -EBUSY; + } + + slave_pdev = platform_device_alloc(slave->name, -1); + if (!slave_pdev) { + pr_err("cannot allocate pdev for slave '%s'", slave->name); + ret = -ENOMEM; + goto err; + } + + slave_pdev->dev.parent = ssbi->dev; + slave_pdev->dev.platform_data = slave->platform_data; + + ret = platform_device_add(slave_pdev); + if (ret) { + pr_err("cannot add slave platform device for '%s'\n", + slave->name); + goto err; + } + + ssbi->slave = &slave_pdev->dev; + return 0; + +err: + if (slave_pdev) + platform_device_put(slave_pdev); + return ret; +} + +static int msm_ssbi_probe(struct platform_device *pdev) +{ + const struct msm_ssbi_platform_data *pdata = pdev->dev.platform_data; + struct resource *mem_res; + struct msm_ssbi *ssbi; + int ret = 0; + + if (!pdata) { + pr_err("missing platform data\n"); + return -EINVAL; + } + + pr_debug("%s\n", pdata->slave.name); + + ssbi = kzalloc(sizeof(struct msm_ssbi), GFP_KERNEL); + if (!ssbi) { + pr_err("can not allocate ssbi_data\n"); + return -ENOMEM; + } + + mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem_res) { + pr_err("missing mem resource\n"); + ret = -EINVAL; + goto err_get_mem_res; + } + + ssbi->base = ioremap(mem_res->start, resource_size(mem_res)); + if (!ssbi->base) { + pr_err("ioremap of 0x%p failed\n", (void *)mem_res->start); + ret = -EINVAL; + goto err_ioremap; + } + ssbi->dev = &pdev->dev; + platform_set_drvdata(pdev, ssbi); + + ssbi->controller_type = pdata->controller_type; + if (ssbi->controller_type == MSM_SBI_CTRL_PMIC_ARBITER) { + ssbi->read = msm_ssbi_pa_read_bytes; + ssbi->write = msm_ssbi_pa_write_bytes; + } else { + ssbi->read = msm_ssbi_read_bytes; + ssbi->write = msm_ssbi_write_bytes; + } + + spin_lock_init(&ssbi->lock); + + ret = msm_ssbi_add_slave(ssbi, &pdata->slave); + if (ret) + goto err_ssbi_add_slave; + + return 0; + +err_ssbi_add_slave: + platform_set_drvdata(pdev, NULL); + iounmap(ssbi->base); +err_ioremap: +err_get_mem_res: + kfree(ssbi); + return ret; +} + +static int msm_ssbi_remove(struct platform_device *pdev) +{ + struct msm_ssbi *ssbi = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + iounmap(ssbi->base); + kfree(ssbi); + return 0; +} + +static struct platform_driver msm_ssbi_driver = { + .probe = msm_ssbi_probe, + .remove = __exit_p(msm_ssbi_remove), + .driver = { + .name = "msm_ssbi", + .owner = THIS_MODULE, + }, +}; + +static int __init msm_ssbi_init(void) +{ + return platform_driver_register(&msm_ssbi_driver); +} +postcore_initcall(msm_ssbi_init); + +static void __exit msm_ssbi_exit(void) +{ + platform_driver_unregister(&msm_ssbi_driver); +} +module_exit(msm_ssbi_exit) + +MODULE_LICENSE("GPL v2"); +MODULE_VERSION("1.0"); +MODULE_ALIAS("platform:msm_ssbi"); +MODULE_AUTHOR("Dima Zavin "); diff --git a/include/linux/msm_ssbi.h b/include/linux/msm_ssbi.h new file mode 100644 index 000000000000..cfa47df6d003 --- /dev/null +++ b/include/linux/msm_ssbi.h @@ -0,0 +1,49 @@ +/* Copyright (C) 2010 Google, Inc. + * Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * Author: Dima Zavin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_MSM_SSBI_H +#define _LINUX_MSM_SSBI_H + +#include + +struct msm_ssbi_slave_info { + const char *name; + void *platform_data; +}; + +enum msm_ssbi_controller_type { + MSM_SBI_CTRL_SSBI = 0, + MSM_SBI_CTRL_SSBI2, + MSM_SBI_CTRL_PMIC_ARBITER, +}; + +struct msm_ssbi_platform_data { + struct msm_ssbi_slave_info slave; + enum msm_ssbi_controller_type controller_type; +}; + +#ifdef CONFIG_MSM_SSBI +int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len); +int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len); +#else +static inline int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len) +{ + return -ENXIO; +} +static inline int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len) +{ + return -ENXIO; +} +#endif +#endif -- cgit From 4a6692e2ac4c6b09235a9568468dd83a380c271d Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 12 Mar 2013 11:41:49 -0700 Subject: ssbi: Allow compilation as a module The ssbi driver's read/write entry points are protected with wrappers in the case when the driver isn't enabled. These wrappers don't make any sense, since a client of the SSBI bus won't work without it. Make these just regular functions, so that the SSBI driver can be built as a module. Signed-off-by: David Brown Signed-off-by: Greg Kroah-Hartman --- drivers/ssbi/Kconfig | 2 +- include/linux/msm_ssbi.h | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/ssbi/Kconfig b/drivers/ssbi/Kconfig index b57c41bd0119..c7bc534ddf50 100644 --- a/drivers/ssbi/Kconfig +++ b/drivers/ssbi/Kconfig @@ -5,7 +5,7 @@ menu "Qualcomm MSM SSBI bus support" config MSM_SSBI - bool "Qualcomm Single-wire Serial Bus Interface (SSBI)" + tristate "Qualcomm Single-wire Serial Bus Interface (SSBI)" help If you say yes to this option, support will be included for the built-in SSBI interface on Qualcomm MSM family processors. diff --git a/include/linux/msm_ssbi.h b/include/linux/msm_ssbi.h index cfa47df6d003..0fe245bb2940 100644 --- a/include/linux/msm_ssbi.h +++ b/include/linux/msm_ssbi.h @@ -33,17 +33,6 @@ struct msm_ssbi_platform_data { enum msm_ssbi_controller_type controller_type; }; -#ifdef CONFIG_MSM_SSBI int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len); int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len); -#else -static inline int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len) -{ - return -ENXIO; -} -static inline int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len) -{ - return -ENXIO; -} -#endif #endif -- cgit From ce44bf5b5544cbe6358abb01f039361a99b80901 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 12 Mar 2013 11:41:54 -0700 Subject: SSBI: Remove MSM_ prefix from SSBI drivers Although the SSBI sub is currently only used on MSM SoCs, it is still a bus in its own right. Remove this msm_ prefix from the driver and it's symbols. Clients can now refer directly to ssbi_write() and ssbi_read(). Signed-off-by: David Brown Signed-off-by: Greg Kroah-Hartman --- drivers/Makefile | 2 +- drivers/mfd/Kconfig | 2 +- drivers/mfd/pm8921-core.c | 14 ++++---- drivers/ssbi/Kconfig | 4 +-- drivers/ssbi/Makefile | 2 +- drivers/ssbi/ssbi.c | 86 +++++++++++++++++++++++------------------------ include/linux/msm_ssbi.h | 38 --------------------- include/linux/ssbi.h | 38 +++++++++++++++++++++ 8 files changed, 93 insertions(+), 93 deletions(-) delete mode 100644 include/linux/msm_ssbi.h create mode 100644 include/linux/ssbi.h (limited to 'include/linux') diff --git a/drivers/Makefile b/drivers/Makefile index 778821ba3f68..4865ed24708a 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -114,7 +114,7 @@ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ obj-$(CONFIG_ARCH_SHMOBILE) += sh/ -obj-$(CONFIG_MSM_SSBI) += ssbi/ +obj-$(CONFIG_SSBI) += ssbi/ ifndef CONFIG_ARCH_USES_GETTIMEOFFSET obj-y += clocksource/ endif diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 671f5b171c73..5bfa7bb555b7 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -990,7 +990,7 @@ config MFD_PM8XXX config MFD_PM8921_CORE tristate "Qualcomm PM8921 PMIC chip" - depends on MSM_SSBI + depends on SSBI select MFD_CORE select MFD_PM8XXX help diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c index d4b297cbd801..ecc137ffa8c3 100644 --- a/drivers/mfd/pm8921-core.c +++ b/drivers/mfd/pm8921-core.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -35,7 +35,7 @@ static int pm8921_readb(const struct device *dev, u16 addr, u8 *val) const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev); const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data; - return msm_ssbi_read(pmic->dev->parent, addr, val, 1); + return ssbi_read(pmic->dev->parent, addr, val, 1); } static int pm8921_writeb(const struct device *dev, u16 addr, u8 val) @@ -43,7 +43,7 @@ static int pm8921_writeb(const struct device *dev, u16 addr, u8 val) const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev); const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data; - return msm_ssbi_write(pmic->dev->parent, addr, &val, 1); + return ssbi_write(pmic->dev->parent, addr, &val, 1); } static int pm8921_read_buf(const struct device *dev, u16 addr, u8 *buf, @@ -52,7 +52,7 @@ static int pm8921_read_buf(const struct device *dev, u16 addr, u8 *buf, const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev); const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data; - return msm_ssbi_read(pmic->dev->parent, addr, buf, cnt); + return ssbi_read(pmic->dev->parent, addr, buf, cnt); } static int pm8921_write_buf(const struct device *dev, u16 addr, u8 *buf, @@ -61,7 +61,7 @@ static int pm8921_write_buf(const struct device *dev, u16 addr, u8 *buf, const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev); const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data; - return msm_ssbi_write(pmic->dev->parent, addr, buf, cnt); + return ssbi_write(pmic->dev->parent, addr, buf, cnt); } static int pm8921_read_irq_stat(const struct device *dev, int irq) @@ -124,7 +124,7 @@ static int pm8921_probe(struct platform_device *pdev) } /* Read PMIC chip revision */ - rc = msm_ssbi_read(pdev->dev.parent, REG_HWREV, &val, sizeof(val)); + rc = ssbi_read(pdev->dev.parent, REG_HWREV, &val, sizeof(val)); if (rc) { pr_err("Failed to read hw rev reg %d:rc=%d\n", REG_HWREV, rc); goto err_read_rev; @@ -133,7 +133,7 @@ static int pm8921_probe(struct platform_device *pdev) rev = val; /* Read PMIC chip revision 2 */ - rc = msm_ssbi_read(pdev->dev.parent, REG_HWREV_2, &val, sizeof(val)); + rc = ssbi_read(pdev->dev.parent, REG_HWREV_2, &val, sizeof(val)); if (rc) { pr_err("Failed to read hw rev 2 reg %d:rc=%d\n", REG_HWREV_2, rc); diff --git a/drivers/ssbi/Kconfig b/drivers/ssbi/Kconfig index c7bc534ddf50..1ae4040afedd 100644 --- a/drivers/ssbi/Kconfig +++ b/drivers/ssbi/Kconfig @@ -1,10 +1,10 @@ # -# MSM SSBI bus support +# SSBI bus support # menu "Qualcomm MSM SSBI bus support" -config MSM_SSBI +config SSBI tristate "Qualcomm Single-wire Serial Bus Interface (SSBI)" help If you say yes to this option, support will be included for the diff --git a/drivers/ssbi/Makefile b/drivers/ssbi/Makefile index 22e408f45d61..38fb70c31caf 100644 --- a/drivers/ssbi/Makefile +++ b/drivers/ssbi/Makefile @@ -1 +1 @@ -obj-$(CONFIG_MSM_SSBI) += ssbi.o +obj-$(CONFIG_SSBI) += ssbi.o diff --git a/drivers/ssbi/ssbi.c b/drivers/ssbi/ssbi.c index b056a072c3b3..f32da0258a8e 100644 --- a/drivers/ssbi/ssbi.c +++ b/drivers/ssbi/ssbi.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2009-2013, The Linux Foundation. All rights reserved. * Copyright (c) 2010, Google Inc. * * Original authors: Code Aurora Forum @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -65,23 +65,23 @@ #define SSBI_TIMEOUT_US 100 -struct msm_ssbi { +struct ssbi { struct device *slave; void __iomem *base; spinlock_t lock; - enum msm_ssbi_controller_type controller_type; - int (*read)(struct msm_ssbi *, u16 addr, u8 *buf, int len); - int (*write)(struct msm_ssbi *, u16 addr, u8 *buf, int len); + enum ssbi_controller_type controller_type; + int (*read)(struct ssbi *, u16 addr, u8 *buf, int len); + int (*write)(struct ssbi *, u16 addr, u8 *buf, int len); }; -#define to_msm_ssbi(dev) platform_get_drvdata(to_platform_device(dev)) +#define to_ssbi(dev) platform_get_drvdata(to_platform_device(dev)) -static inline u32 ssbi_readl(struct msm_ssbi *ssbi, u32 reg) +static inline u32 ssbi_readl(struct ssbi *ssbi, u32 reg) { return readl(ssbi->base + reg); } -static inline void ssbi_writel(struct msm_ssbi *ssbi, u32 val, u32 reg) +static inline void ssbi_writel(struct ssbi *ssbi, u32 val, u32 reg) { writel(val, ssbi->base + reg); } @@ -95,7 +95,7 @@ static inline void ssbi_writel(struct msm_ssbi *ssbi, u32 val, u32 reg) * * As such, this wait merely spins, with a udelay. */ -static int ssbi_wait_mask(struct msm_ssbi *ssbi, u32 set_mask, u32 clr_mask) +static int ssbi_wait_mask(struct ssbi *ssbi, u32 set_mask, u32 clr_mask) { u32 timeout = SSBI_TIMEOUT_US; u32 val; @@ -111,7 +111,7 @@ static int ssbi_wait_mask(struct msm_ssbi *ssbi, u32 set_mask, u32 clr_mask) } static int -msm_ssbi_read_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +ssbi_read_bytes(struct ssbi *ssbi, u16 addr, u8 *buf, int len) { u32 cmd = SSBI_CMD_RDWRN | ((addr & 0xff) << 16); int ret = 0; @@ -140,7 +140,7 @@ err: } static int -msm_ssbi_write_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +ssbi_write_bytes(struct ssbi *ssbi, u16 addr, u8 *buf, int len) { int ret = 0; @@ -172,7 +172,7 @@ err: * busywait. */ static inline int -msm_ssbi_pa_transfer(struct msm_ssbi *ssbi, u32 cmd, u8 *data) +ssbi_pa_transfer(struct ssbi *ssbi, u32 cmd, u8 *data) { u32 timeout = SSBI_TIMEOUT_US; u32 rd_status = 0; @@ -197,7 +197,7 @@ msm_ssbi_pa_transfer(struct msm_ssbi *ssbi, u32 cmd, u8 *data) } static int -msm_ssbi_pa_read_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +ssbi_pa_read_bytes(struct ssbi *ssbi, u16 addr, u8 *buf, int len) { u32 cmd; int ret = 0; @@ -205,7 +205,7 @@ msm_ssbi_pa_read_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) cmd = SSBI_PA_CMD_RDWRN | (addr & SSBI_PA_CMD_ADDR_MASK) << 8; while (len) { - ret = msm_ssbi_pa_transfer(ssbi, cmd, buf); + ret = ssbi_pa_transfer(ssbi, cmd, buf); if (ret) goto err; buf++; @@ -217,14 +217,14 @@ err: } static int -msm_ssbi_pa_write_bytes(struct msm_ssbi *ssbi, u16 addr, u8 *buf, int len) +ssbi_pa_write_bytes(struct ssbi *ssbi, u16 addr, u8 *buf, int len) { u32 cmd; int ret = 0; while (len) { cmd = (addr & SSBI_PA_CMD_ADDR_MASK) << 8 | *buf; - ret = msm_ssbi_pa_transfer(ssbi, cmd, NULL); + ret = ssbi_pa_transfer(ssbi, cmd, NULL); if (ret) goto err; buf++; @@ -235,9 +235,9 @@ err: return ret; } -int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len) +int ssbi_read(struct device *dev, u16 addr, u8 *buf, int len) { - struct msm_ssbi *ssbi = to_msm_ssbi(dev); + struct ssbi *ssbi = to_ssbi(dev); unsigned long flags; int ret; @@ -247,11 +247,11 @@ int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len) return ret; } -EXPORT_SYMBOL_GPL(msm_ssbi_read); +EXPORT_SYMBOL_GPL(ssbi_read); -int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len) +int ssbi_write(struct device *dev, u16 addr, u8 *buf, int len) { - struct msm_ssbi *ssbi = to_msm_ssbi(dev); + struct ssbi *ssbi = to_ssbi(dev); unsigned long flags; int ret; @@ -261,17 +261,17 @@ int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len) return ret; } -EXPORT_SYMBOL_GPL(msm_ssbi_write); +EXPORT_SYMBOL_GPL(ssbi_write); -static int msm_ssbi_probe(struct platform_device *pdev) +static int ssbi_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct resource *mem_res; - struct msm_ssbi *ssbi; + struct ssbi *ssbi; int ret = 0; const char *type; - ssbi = kzalloc(sizeof(struct msm_ssbi), GFP_KERNEL); + ssbi = kzalloc(sizeof(struct ssbi), GFP_KERNEL); if (!ssbi) { pr_err("can not allocate ssbi_data\n"); return -ENOMEM; @@ -312,11 +312,11 @@ static int msm_ssbi_probe(struct platform_device *pdev) } if (ssbi->controller_type == MSM_SBI_CTRL_PMIC_ARBITER) { - ssbi->read = msm_ssbi_pa_read_bytes; - ssbi->write = msm_ssbi_pa_write_bytes; + ssbi->read = ssbi_pa_read_bytes; + ssbi->write = ssbi_pa_write_bytes; } else { - ssbi->read = msm_ssbi_read_bytes; - ssbi->write = msm_ssbi_write_bytes; + ssbi->read = ssbi_read_bytes; + ssbi->write = ssbi_write_bytes; } spin_lock_init(&ssbi->lock); @@ -336,9 +336,9 @@ err_get_mem_res: return ret; } -static int msm_ssbi_remove(struct platform_device *pdev) +static int ssbi_remove(struct platform_device *pdev) { - struct msm_ssbi *ssbi = platform_get_drvdata(pdev); + struct ssbi *ssbi = platform_get_drvdata(pdev); platform_set_drvdata(pdev, NULL); iounmap(ssbi->base); @@ -351,29 +351,29 @@ static struct of_device_id ssbi_match_table[] = { {} }; -static struct platform_driver msm_ssbi_driver = { - .probe = msm_ssbi_probe, - .remove = msm_ssbi_remove, +static struct platform_driver ssbi_driver = { + .probe = ssbi_probe, + .remove = ssbi_remove, .driver = { - .name = "msm_ssbi", + .name = "ssbi", .owner = THIS_MODULE, .of_match_table = ssbi_match_table, }, }; -static int __init msm_ssbi_init(void) +static int __init ssbi_init(void) { - return platform_driver_register(&msm_ssbi_driver); + return platform_driver_register(&ssbi_driver); } -module_init(msm_ssbi_init); +module_init(ssbi_init); -static void __exit msm_ssbi_exit(void) +static void __exit ssbi_exit(void) { - platform_driver_unregister(&msm_ssbi_driver); + platform_driver_unregister(&ssbi_driver); } -module_exit(msm_ssbi_exit) +module_exit(ssbi_exit) MODULE_LICENSE("GPL v2"); MODULE_VERSION("1.0"); -MODULE_ALIAS("platform:msm_ssbi"); +MODULE_ALIAS("platform:ssbi"); MODULE_AUTHOR("Dima Zavin "); diff --git a/include/linux/msm_ssbi.h b/include/linux/msm_ssbi.h deleted file mode 100644 index 0fe245bb2940..000000000000 --- a/include/linux/msm_ssbi.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright (C) 2010 Google, Inc. - * Copyright (c) 2011, Code Aurora Forum. All rights reserved. - * Author: Dima Zavin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _LINUX_MSM_SSBI_H -#define _LINUX_MSM_SSBI_H - -#include - -struct msm_ssbi_slave_info { - const char *name; - void *platform_data; -}; - -enum msm_ssbi_controller_type { - MSM_SBI_CTRL_SSBI = 0, - MSM_SBI_CTRL_SSBI2, - MSM_SBI_CTRL_PMIC_ARBITER, -}; - -struct msm_ssbi_platform_data { - struct msm_ssbi_slave_info slave; - enum msm_ssbi_controller_type controller_type; -}; - -int msm_ssbi_write(struct device *dev, u16 addr, u8 *buf, int len); -int msm_ssbi_read(struct device *dev, u16 addr, u8 *buf, int len); -#endif diff --git a/include/linux/ssbi.h b/include/linux/ssbi.h new file mode 100644 index 000000000000..44ef5da21470 --- /dev/null +++ b/include/linux/ssbi.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2010 Google, Inc. + * Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * Author: Dima Zavin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_SSBI_H +#define _LINUX_SSBI_H + +#include + +struct ssbi_slave_info { + const char *name; + void *platform_data; +}; + +enum ssbi_controller_type { + MSM_SBI_CTRL_SSBI = 0, + MSM_SBI_CTRL_SSBI2, + MSM_SBI_CTRL_PMIC_ARBITER, +}; + +struct ssbi_platform_data { + struct ssbi_slave_info slave; + enum ssbi_controller_type controller_type; +}; + +int ssbi_write(struct device *dev, u16 addr, u8 *buf, int len); +int ssbi_read(struct device *dev, u16 addr, u8 *buf, int len); +#endif -- cgit From 303f0847925ece27129487a2bfc05199ab2a0b51 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Mar 2013 12:08:53 +0800 Subject: USB: adds comment on suspend callback This patch adds comments on interface driver suspend callback to emphasize that the failure return value is ignored by USB core in system sleep context, so do not try to recover device for this case and let resume/reset_resume callback handle the suspend failure if needed. Also kerneldoc for usb_suspend_both() is updated with the fact. Acked-by: Alan Stern Signed-off-by: Ming Lei Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 11 ++++++++--- include/linux/usb.h | 7 ++++++- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index d938b2b99e31..eb1d00a3543a 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1196,9 +1196,14 @@ done: * * This is the central routine for suspending USB devices. It calls the * suspend methods for all the interface drivers in @udev and then calls - * the suspend method for @udev itself. If an error occurs at any stage, - * all the interfaces which were suspended are resumed so that they remain - * in the same state as the device. + * the suspend method for @udev itself. When the routine is called in + * autosuspend, if an error occurs at any stage, all the interfaces + * which were suspended are resumed so that they remain in the same + * state as the device, but when called from system sleep, all error + * from suspend methods of interfaces and the non-root-hub device itself + * are simply ignored, so all suspended interfaces are only resumed + * to the device's state when @udev is root-hub and its suspend method + * returns failure. * * Autosuspend requests originating from a child device or an interface * driver may be made without the protection of @udev's device lock, but diff --git a/include/linux/usb.h b/include/linux/usb.h index 52464fb2389b..8d4bc173d66a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -976,7 +976,12 @@ struct usbdrv_wrap { * the "usbfs" filesystem. This lets devices provide ways to * expose information to user space regardless of where they * do (or don't) show up otherwise in the filesystem. - * @suspend: Called when the device is going to be suspended by the system. + * @suspend: Called when the device is going to be suspended by the + * system either from system sleep or runtime suspend context. The + * return value will be ignored in system sleep context, so do NOT + * try to continue using the device if suspend fails in this case. + * Instead, let the resume or reset-resume routine recover from + * the failure. * @resume: Called when the device is being resumed by the system. * @reset_resume: Called when the suspended device has been reset instead * of being resumed. -- cgit From f91a595d0b813c3a2f3b848c165e865ca4c5b8cc Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Sat, 16 Mar 2013 11:46:44 +0530 Subject: memory: emif: Handle devices which are not rated for >85C As per JESD209-2E specification for LPDDR2, http://www.jedec.org/standards-documents/results/jesd209-2E Table 73, LPDDR2 memories come in two flavors - Standard and Extended. The Standard types can operate from -25C to +85C However, beyond that and upto +105C can only be supported by Extended types. Unfortunately, it seems there is no info in MR0(device info) or MR[1,2](device feature) for run time detection of this capability as far as seen on the spec. Hence, we provide a custom_config flag to be populated by platforms which have these "extended" type memories. For the "Standard" memories, we need to consider MR4 notifications of temperature triggers >85C as equivalent to thermal shutdown events (equivalent to Spec specified thermal shutdown events for "extended" parts). Reported-by: Richard Woodruff Signed-off-by: Nishanth Menon Signed-off-by: Lokesh Vutla Acked-by: Santosh Shilimkar Signed-off-by: Greg Kroah-Hartman --- drivers/memory/emif.c | 27 +++++++++++++++++++++++++++ include/linux/platform_data/emif_plat.h | 1 + 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 249222905c94..96add5b9ce5d 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -918,6 +918,7 @@ static irqreturn_t handle_temp_alert(void __iomem *base, struct emif_data *emif) { u32 old_temp_level; irqreturn_t ret = IRQ_HANDLED; + struct emif_custom_configs *custom_configs; spin_lock_irqsave(&emif_lock, irq_state); old_temp_level = emif->temperature_level; @@ -930,6 +931,29 @@ static irqreturn_t handle_temp_alert(void __iomem *base, struct emif_data *emif) goto out; } + custom_configs = emif->plat_data->custom_configs; + + /* + * IF we detect higher than "nominal rating" from DDR sensor + * on an unsupported DDR part, shutdown system + */ + if (custom_configs && !(custom_configs->mask & + EMIF_CUSTOM_CONFIG_EXTENDED_TEMP_PART)) { + if (emif->temperature_level >= SDRAM_TEMP_HIGH_DERATE_REFRESH) { + dev_err(emif->dev, + "%s:NOT Extended temperature capable memory." + "Converting MR4=0x%02x as shutdown event\n", + __func__, emif->temperature_level); + /* + * Temperature far too high - do kernel_power_off() + * from thread context + */ + emif->temperature_level = SDRAM_TEMP_VERY_HIGH_SHUTDOWN; + ret = IRQ_WAKE_THREAD; + goto out; + } + } + if (emif->temperature_level < old_temp_level || emif->temperature_level == SDRAM_TEMP_VERY_HIGH_SHUTDOWN) { /* @@ -1228,6 +1252,9 @@ static void __init_or_module of_get_custom_configs(struct device_node *np_emif, cust_cfgs->temp_alert_poll_interval_ms = *poll_intvl; } + if (of_find_property(np_emif, "extended-temp-part", &len)) + cust_cfgs->mask |= EMIF_CUSTOM_CONFIG_EXTENDED_TEMP_PART; + if (!is_custom_config_valid(cust_cfgs, emif->dev)) { devm_kfree(emif->dev, cust_cfgs); return; diff --git a/include/linux/platform_data/emif_plat.h b/include/linux/platform_data/emif_plat.h index 03378ca84061..5c19a2a647c4 100644 --- a/include/linux/platform_data/emif_plat.h +++ b/include/linux/platform_data/emif_plat.h @@ -40,6 +40,7 @@ /* Custom config requests */ #define EMIF_CUSTOM_CONFIG_LPMODE 0x00000001 #define EMIF_CUSTOM_CONFIG_TEMP_ALERT_POLL_INTERVAL 0x00000002 +#define EMIF_CUSTOM_CONFIG_EXTENDED_TEMP_PART 0x00000004 #ifndef __ASSEMBLY__ /** -- cgit From 3edce1cf813aa6a087df7730cec0e67d57288300 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Sun, 17 Mar 2013 21:00:06 +0100 Subject: USB: cdc-wdm: implement IOCTL_WDM_MAX_COMMAND MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace applications need to know the maximum supported message size. The cdc-wdm driver translates between a character device stream and a message based protocol. Each message is transported as a usb control message with no further encapsulation or syncronization. Each read or write on the character device should translate to exactly one usb control message to ensure that message boundaries are kept intact. That means that the userspace application must know the maximum message size supported by the device and driver, making this size a vital part of the cdc-wdm character device API. CDC WDM and CDC MBIM functions export the maximum supported message size through CDC functional descriptors. The cdc-wdm and cdc_mbim drivers will parse these descriptors and use the value chosen by the device. The only current way for a userspace application to retrive the value is by duplicating the descriptor parsing. This is an unnecessary complex task, and application writers are likely to postpone it, using a fixed value and adding a "todo" item. QMI functions have no way to tell the host what message size they support. The qmi_wwan driver use a fixed value based on protocol recommendations and observed device behaviour. Userspace applications must know and hard code the same value. This scheme will break if we ever encounter a QMI device needing a device specific message size quirk. We are currently unable to support such a device because using a non default size would break the implicit userspace API. The message size is currently a hidden attribute of the cdc-wdm userspace API. Retrieving it is unnecessarily complex, increasing the possibility of drivers and applications using different limits. The resulting errors are hard to debug, and can only be replicated on identical hardware. Exporting the maximum message size from the driver simplifies the task for the userspace application, and creates a unified information source independent of device and function class. It also serves to document that the message size is part of the cdc-wdm userspace API. This proposed API extension has been presented for the authors of userspace applications and libraries using the current API: libmbim, libqmi, uqmi, oFono and ModemManager. The replies were: Aleksander Morgado: "We do really need max message size for MBIM; and as you say, it may be good to have the max message size info also for QMI, so the new ioctl seems a good addition. So +1 from my side, for what it's worth." Dan Williams: "Yeah, +1 here. I'd prefer the sysfs file, but the fact that that doesn't work for fd passing pretty much kills it." No negative replies are so far received. Cc: Aleksander Morgado Cc: Dan Williams Signed-off-by: Bjørn Mork Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- Documentation/ioctl/ioctl-number.txt | 1 + drivers/usb/class/cdc-wdm.c | 19 +++++++++++++++++++ include/linux/usb/cdc-wdm.h | 2 ++ include/uapi/linux/usb/cdc-wdm.h | 21 +++++++++++++++++++++ 4 files changed, 43 insertions(+) create mode 100644 include/uapi/linux/usb/cdc-wdm.h (limited to 'include/linux') diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 3210540f8bd3..237acab169dd 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -131,6 +131,7 @@ Code Seq#(hex) Include File Comments 'H' 40-4F sound/hdspm.h conflict! 'H' 40-4F sound/hdsp.h conflict! 'H' 90 sound/usb/usx2y/usb_stream.h +'H' A0 uapi/linux/usb/cdc-wdm.h 'H' C0-F0 net/bluetooth/hci.h conflict! 'H' C0-DF net/bluetooth/hidp/hidp.h conflict! 'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict! diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 122d056d96d5..8a230f0ef77c 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include @@ -644,6 +645,22 @@ static int wdm_release(struct inode *inode, struct file *file) return 0; } +static long wdm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct wdm_device *desc = file->private_data; + int rv = 0; + + switch (cmd) { + case IOCTL_WDM_MAX_COMMAND: + if (copy_to_user((void __user *)arg, &desc->wMaxCommand, sizeof(desc->wMaxCommand))) + rv = -EFAULT; + break; + default: + rv = -ENOTTY; + } + return rv; +} + static const struct file_operations wdm_fops = { .owner = THIS_MODULE, .read = wdm_read, @@ -652,6 +669,8 @@ static const struct file_operations wdm_fops = { .flush = wdm_flush, .release = wdm_release, .poll = wdm_poll, + .unlocked_ioctl = wdm_ioctl, + .compat_ioctl = wdm_ioctl, .llseek = noop_llseek, }; diff --git a/include/linux/usb/cdc-wdm.h b/include/linux/usb/cdc-wdm.h index 719c332620fa..0b3f4295c025 100644 --- a/include/linux/usb/cdc-wdm.h +++ b/include/linux/usb/cdc-wdm.h @@ -11,6 +11,8 @@ #ifndef __LINUX_USB_CDC_WDM_H #define __LINUX_USB_CDC_WDM_H +#include + extern struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf, struct usb_endpoint_descriptor *ep, int bufsize, diff --git a/include/uapi/linux/usb/cdc-wdm.h b/include/uapi/linux/usb/cdc-wdm.h new file mode 100644 index 000000000000..f03134feebd6 --- /dev/null +++ b/include/uapi/linux/usb/cdc-wdm.h @@ -0,0 +1,21 @@ +/* + * USB CDC Device Management userspace API definitions + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#ifndef _UAPI__LINUX_USB_CDC_WDM_H +#define _UAPI__LINUX_USB_CDC_WDM_H + +/* + * This IOCTL is used to retrieve the wMaxCommand for the device, + * defining the message limit for both reading and writing. + * + * For CDC WDM functions this will be the wMaxCommand field of the + * Device Management Functional Descriptor. + */ +#define IOCTL_WDM_MAX_COMMAND _IOR('H', 0xA0, __u16) + +#endif /* _UAPI__LINUX_USB_CDC_WDM_H */ -- cgit From c5116e9d8d2de324f13a91fe5afc308cd6b0ca93 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Tue, 19 Mar 2013 16:58:58 +0100 Subject: ssb: define more board types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/ssb/ssb.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 8b1322296fed..c64999fd1660 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -340,13 +340,61 @@ enum ssb_bustype { #define SSB_BOARDVENDOR_DELL 0x1028 /* Dell */ #define SSB_BOARDVENDOR_HP 0x0E11 /* HP */ /* board_type */ +#define SSB_BOARD_BCM94301CB 0x0406 +#define SSB_BOARD_BCM94301MP 0x0407 +#define SSB_BOARD_BU4309 0x040A +#define SSB_BOARD_BCM94309CB 0x040B +#define SSB_BOARD_BCM4309MP 0x040C +#define SSB_BOARD_BU4306 0x0416 #define SSB_BOARD_BCM94306MP 0x0418 #define SSB_BOARD_BCM4309G 0x0421 #define SSB_BOARD_BCM4306CB 0x0417 -#define SSB_BOARD_BCM4309MP 0x040C +#define SSB_BOARD_BCM94306PC 0x0425 /* pcmcia 3.3v 4306 card */ +#define SSB_BOARD_BCM94306CBSG 0x042B /* with SiGe PA */ +#define SSB_BOARD_PCSG94306 0x042D /* with SiGe PA */ +#define SSB_BOARD_BU4704SD 0x042E /* with sdram */ +#define SSB_BOARD_BCM94704AGR 0x042F /* dual 11a/11g Router */ +#define SSB_BOARD_BCM94308MP 0x0430 /* 11a-only minipci */ +#define SSB_BOARD_BU4318 0x0447 +#define SSB_BOARD_CB4318 0x0448 +#define SSB_BOARD_MPG4318 0x0449 #define SSB_BOARD_MP4318 0x044A -#define SSB_BOARD_BU4306 0x0416 -#define SSB_BOARD_BU4309 0x040A +#define SSB_BOARD_SD4318 0x044B +#define SSB_BOARD_BCM94306P 0x044C /* with SiGe */ +#define SSB_BOARD_BCM94303MP 0x044E +#define SSB_BOARD_BCM94306MPM 0x0450 +#define SSB_BOARD_BCM94306MPL 0x0453 +#define SSB_BOARD_PC4303 0x0454 /* pcmcia */ +#define SSB_BOARD_BCM94306MPLNA 0x0457 +#define SSB_BOARD_BCM94306MPH 0x045B +#define SSB_BOARD_BCM94306PCIV 0x045C +#define SSB_BOARD_BCM94318MPGH 0x0463 +#define SSB_BOARD_BU4311 0x0464 +#define SSB_BOARD_BCM94311MC 0x0465 +#define SSB_BOARD_BCM94311MCAG 0x0466 +/* 4321 boards */ +#define SSB_BOARD_BU4321 0x046B +#define SSB_BOARD_BU4321E 0x047C +#define SSB_BOARD_MP4321 0x046C +#define SSB_BOARD_CB2_4321 0x046D +#define SSB_BOARD_CB2_4321_AG 0x0066 +#define SSB_BOARD_MC4321 0x046E +/* 4325 boards */ +#define SSB_BOARD_BCM94325DEVBU 0x0490 +#define SSB_BOARD_BCM94325BGABU 0x0491 +#define SSB_BOARD_BCM94325SDGWB 0x0492 +#define SSB_BOARD_BCM94325SDGMDL 0x04AA +#define SSB_BOARD_BCM94325SDGMDL2 0x04C6 +#define SSB_BOARD_BCM94325SDGMDL3 0x04C9 +#define SSB_BOARD_BCM94325SDABGWBA 0x04E1 +/* 4322 boards */ +#define SSB_BOARD_BCM94322MC 0x04A4 +#define SSB_BOARD_BCM94322USB 0x04A8 /* dualband */ +#define SSB_BOARD_BCM94322HM 0x04B0 +#define SSB_BOARD_BCM94322USB2D 0x04Bf /* single band discrete front end */ +/* 4312 boards */ +#define SSB_BOARD_BU4312 0x048A +#define SSB_BOARD_BCM4312MCGSG 0x04B5 /* chip_package */ #define SSB_CHIPPACK_BCM4712S 1 /* Small 200pin 4712 */ #define SSB_CHIPPACK_BCM4712M 2 /* Medium 225pin 4712 */ -- cgit From 3e6998574fde0ab7a3329c9229394dd80462ead2 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Tue, 19 Mar 2013 16:58:59 +0100 Subject: bcma: define board types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using that IDs we can write workarounds for various cards Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/bcma/bcma.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index e0ce311011c0..0ab6712fd76b 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -173,6 +173,60 @@ struct bcma_host_ops { #define BCMA_CHIP_ID_BCM53572 53572 #define BCMA_PKG_ID_BCM47188 9 +/* Board types (on PCI usually equals to the subsystem dev id) */ +/* BCM4313 */ +#define BCMA_BOARD_TYPE_BCM94313BU 0X050F +#define BCMA_BOARD_TYPE_BCM94313HM 0X0510 +#define BCMA_BOARD_TYPE_BCM94313EPA 0X0511 +#define BCMA_BOARD_TYPE_BCM94313HMG 0X051C +/* BCM4716 */ +#define BCMA_BOARD_TYPE_BCM94716NR2 0X04CD +/* BCM43224 */ +#define BCMA_BOARD_TYPE_BCM943224X21 0X056E +#define BCMA_BOARD_TYPE_BCM943224X21_FCC 0X00D1 +#define BCMA_BOARD_TYPE_BCM943224X21B 0X00E9 +#define BCMA_BOARD_TYPE_BCM943224M93 0X008B +#define BCMA_BOARD_TYPE_BCM943224M93A 0X0090 +#define BCMA_BOARD_TYPE_BCM943224X16 0X0093 +#define BCMA_BOARD_TYPE_BCM94322X9 0X008D +#define BCMA_BOARD_TYPE_BCM94322M35E 0X008E +/* BCM43228 */ +#define BCMA_BOARD_TYPE_BCM943228BU8 0X0540 +#define BCMA_BOARD_TYPE_BCM943228BU9 0X0541 +#define BCMA_BOARD_TYPE_BCM943228BU 0X0542 +#define BCMA_BOARD_TYPE_BCM943227HM4L 0X0543 +#define BCMA_BOARD_TYPE_BCM943227HMB 0X0544 +#define BCMA_BOARD_TYPE_BCM943228HM4L 0X0545 +#define BCMA_BOARD_TYPE_BCM943228SD 0X0573 +/* BCM4331 */ +#define BCMA_BOARD_TYPE_BCM94331X19 0X00D6 +#define BCMA_BOARD_TYPE_BCM94331X28 0X00E4 +#define BCMA_BOARD_TYPE_BCM94331X28B 0X010E +#define BCMA_BOARD_TYPE_BCM94331PCIEBT3AX 0X00E4 +#define BCMA_BOARD_TYPE_BCM94331X12_2G 0X00EC +#define BCMA_BOARD_TYPE_BCM94331X12_5G 0X00ED +#define BCMA_BOARD_TYPE_BCM94331X29B 0X00EF +#define BCMA_BOARD_TYPE_BCM94331CSAX 0X00EF +#define BCMA_BOARD_TYPE_BCM94331X19C 0X00F5 +#define BCMA_BOARD_TYPE_BCM94331X33 0X00F4 +#define BCMA_BOARD_TYPE_BCM94331BU 0X0523 +#define BCMA_BOARD_TYPE_BCM94331S9BU 0X0524 +#define BCMA_BOARD_TYPE_BCM94331MC 0X0525 +#define BCMA_BOARD_TYPE_BCM94331MCI 0X0526 +#define BCMA_BOARD_TYPE_BCM94331PCIEBT4 0X0527 +#define BCMA_BOARD_TYPE_BCM94331HM 0X0574 +#define BCMA_BOARD_TYPE_BCM94331PCIEDUAL 0X059B +#define BCMA_BOARD_TYPE_BCM94331MCH5 0X05A9 +#define BCMA_BOARD_TYPE_BCM94331CS 0X05C6 +#define BCMA_BOARD_TYPE_BCM94331CD 0X05DA +/* BCM53572 */ +#define BCMA_BOARD_TYPE_BCM953572BU 0X058D +#define BCMA_BOARD_TYPE_BCM953572NR2 0X058E +#define BCMA_BOARD_TYPE_BCM947188NR2 0X058F +#define BCMA_BOARD_TYPE_BCM953572SDRNR2 0X0590 +/* BCM43142 */ +#define BCMA_BOARD_TYPE_BCM943142HM 0X05E0 + struct bcma_device { struct bcma_bus *bus; struct bcma_device_id id; -- cgit From 12bef78f0a806639daef58b1770be6ea19b2e94d Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 21 Mar 2013 16:26:19 +0100 Subject: ssb: fix sprom constant for ant_available_{bg,a} This was done accordingly to new specs. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 6ecfa02ddbac..3a7256955b10 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -289,11 +289,11 @@ #define SSB_SPROM4_ETHPHY_ET1A_SHIFT 5 #define SSB_SPROM4_ETHPHY_ET0M (1<<14) /* MDIO for enet0 */ #define SSB_SPROM4_ETHPHY_ET1M (1<<15) /* MDIO for enet1 */ -#define SSB_SPROM4_ANTAVAIL 0x005D /* Antenna available bitfields */ -#define SSB_SPROM4_ANTAVAIL_A 0x00FF /* A-PHY bitfield */ -#define SSB_SPROM4_ANTAVAIL_A_SHIFT 0 -#define SSB_SPROM4_ANTAVAIL_BG 0xFF00 /* B-PHY and G-PHY bitfield */ -#define SSB_SPROM4_ANTAVAIL_BG_SHIFT 8 +#define SSB_SPROM4_ANTAVAIL 0x005C /* Antenna available bitfields */ +#define SSB_SPROM4_ANTAVAIL_BG 0x00FF /* B-PHY and G-PHY bitfield */ +#define SSB_SPROM4_ANTAVAIL_BG_SHIFT 0 +#define SSB_SPROM4_ANTAVAIL_A 0xFF00 /* A-PHY bitfield */ +#define SSB_SPROM4_ANTAVAIL_A_SHIFT 8 #define SSB_SPROM4_AGAIN01 0x005E /* Antenna Gain (in dBm Q5.2) */ #define SSB_SPROM4_AGAIN0 0x00FF /* Antenna 0 */ #define SSB_SPROM4_AGAIN0_SHIFT 0 -- cgit From 0f16cfe39eeef47c91aa3c3bf2b49954d5313a58 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 21 Mar 2013 12:36:42 +0100 Subject: USB: serial: remove generic disconnect callback Remove the now empty generic disconnect callback and make the disconnect callback non-mandatory. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/generic.c | 6 ------ drivers/usb/serial/usb-serial.c | 4 ++-- include/linux/usb/serial.h | 1 - 3 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index a6d0ac638e0a..4d421f3f8a7c 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -45,7 +45,6 @@ struct usb_serial_driver usb_serial_generic_device = { }, .id_table = generic_device_ids, .num_ports = 1, - .disconnect = usb_serial_generic_disconnect, .release = usb_serial_generic_release, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, @@ -500,11 +499,6 @@ int usb_serial_generic_resume(struct usb_serial *serial) } EXPORT_SYMBOL_GPL(usb_serial_generic_resume); -void usb_serial_generic_disconnect(struct usb_serial *serial) -{ -} -EXPORT_SYMBOL_GPL(usb_serial_generic_disconnect); - void usb_serial_generic_release(struct usb_serial *serial) { } diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index e7f97b58e914..569b6792c218 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1095,7 +1095,8 @@ static void usb_serial_disconnect(struct usb_interface *interface) device_del(&port->dev); } } - serial->type->disconnect(serial); + if (serial->type->disconnect) + serial->type->disconnect(serial); /* let the last holder of this object cause it to be cleaned up */ usb_serial_put(serial); @@ -1304,7 +1305,6 @@ static void fixup_generic(struct usb_serial_driver *device) set_to_generic_if_null(device, chars_in_buffer); set_to_generic_if_null(device, read_bulk_callback); set_to_generic_if_null(device, write_bulk_callback); - set_to_generic_if_null(device, disconnect); set_to_generic_if_null(device, release); set_to_generic_if_null(device, process_read_urb); set_to_generic_if_null(device, prepare_write_buffer); diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 1819b59aab2a..437dfd6787f9 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -329,7 +329,6 @@ extern void usb_serial_generic_read_bulk_callback(struct urb *urb); extern void usb_serial_generic_write_bulk_callback(struct urb *urb); extern void usb_serial_generic_throttle(struct tty_struct *tty); extern void usb_serial_generic_unthrottle(struct tty_struct *tty); -extern void usb_serial_generic_disconnect(struct usb_serial *serial); extern void usb_serial_generic_release(struct usb_serial *serial); extern int usb_serial_generic_register(void); extern void usb_serial_generic_deregister(void); -- cgit From 79b80b8a1141ba0605e917a6fc12d44383ab29b8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 21 Mar 2013 12:36:43 +0100 Subject: USB: serial: remove generic release callback Remove empty generic release implementation and make the release callback non-mandatory (like attach, probe and disconnect). Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/generic.c | 5 ----- drivers/usb/serial/usb-serial.c | 3 +-- include/linux/usb/serial.h | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 4d421f3f8a7c..aa71f6e72f61 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -45,7 +45,6 @@ struct usb_serial_driver usb_serial_generic_device = { }, .id_table = generic_device_ids, .num_ports = 1, - .release = usb_serial_generic_release, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, .resume = usb_serial_generic_resume, @@ -498,7 +497,3 @@ int usb_serial_generic_resume(struct usb_serial *serial) return c ? -EIO : 0; } EXPORT_SYMBOL_GPL(usb_serial_generic_resume); - -void usb_serial_generic_release(struct usb_serial *serial) -{ -} diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 569b6792c218..4819fd9a639a 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -137,7 +137,7 @@ static void destroy_serial(struct kref *kref) if (serial->minor != SERIAL_TTY_NO_MINOR) return_serial(serial); - if (serial->attached) + if (serial->attached && serial->type->release) serial->type->release(serial); /* Now that nothing is using the ports, they can be freed */ @@ -1305,7 +1305,6 @@ static void fixup_generic(struct usb_serial_driver *device) set_to_generic_if_null(device, chars_in_buffer); set_to_generic_if_null(device, read_bulk_callback); set_to_generic_if_null(device, write_bulk_callback); - set_to_generic_if_null(device, release); set_to_generic_if_null(device, process_read_urb); set_to_generic_if_null(device, prepare_write_buffer); } diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 437dfd6787f9..3f8f5e3c76d5 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -329,7 +329,6 @@ extern void usb_serial_generic_read_bulk_callback(struct urb *urb); extern void usb_serial_generic_write_bulk_callback(struct urb *urb); extern void usb_serial_generic_throttle(struct tty_struct *tty); extern void usb_serial_generic_unthrottle(struct tty_struct *tty); -extern void usb_serial_generic_release(struct usb_serial *serial); extern int usb_serial_generic_register(void); extern void usb_serial_generic_deregister(void); extern int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port, -- cgit From 143d9d961608b737d90a813deaaf91affb41c83c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 21 Mar 2013 12:36:51 +0100 Subject: USB: serial: add tiocmiwait subdriver operation Add tiocmiwait operation to struct usb_serial_driver. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial.c | 15 +++++++++++---- include/linux/usb/serial.h | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 0b39d013c505..ada400d6594b 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -402,10 +402,17 @@ static int serial_ioctl(struct tty_struct *tty, dev_dbg(tty->dev, "%s - cmd 0x%.4x\n", __func__, cmd); - if (port->serial->type->ioctl) - retval = port->serial->type->ioctl(tty, cmd, arg); - else - retval = -ENOIOCTLCMD; + switch (cmd) { + case TIOCMIWAIT: + if (port->serial->type->tiocmiwait) + retval = port->serial->type->tiocmiwait(tty, arg); + break; + default: + if (port->serial->type->ioctl) + retval = port->serial->type->ioctl(tty, cmd, arg); + else + retval = -ENOIOCTLCMD; + } return retval; } diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 3f8f5e3c76d5..9c8b53f80f48 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -272,6 +272,7 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty); int (*tiocmset)(struct tty_struct *tty, unsigned int set, unsigned int clear); + int (*tiocmiwait)(struct tty_struct *tty, unsigned long arg); int (*get_icount)(struct tty_struct *tty, struct serial_icounter_struct *icount); /* Called by the tty layer for port level work. There may or may not -- cgit From 980373b7918b8023be6b7df03857f494ae124d0b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 21 Mar 2013 12:36:52 +0100 Subject: USB: serial: add generic TIOCMIWAIT implementation Add generic TIOCMIWAIT implementation which correctly handles hangup, USB-device disconnect, does not rely on the deprecated sleep_on functions and hence does not suffer from the races currently affecting several usb-serial drivers. This makes it much easier to add TIOCMIWAIT support to subdrivers as the tricky details related to hangup and disconnect (e.g. atomicity, that the private port data may have been freed when woken up, and waking up processes at disconnect) have been handled once and for all. To add support to a subdriver, simply set the tiocmiwait-port-operation field, update the port icount fields and wake up any process sleeping on the tty-port modem-status-change wait queue on changes. Note that the tty-port initialised flag can be used to detect disconnected as the port will be hung up as part of disconnect (and cannot be reactivated due to the disconnected flag). However, as the tty-port implementation currently wakes up processes before calling port shutdown, the tty-hupping flag must also be checked to detect hangup for now. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/generic.c | 58 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/serial.h | 5 ++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index aa71f6e72f61..18bc74e20fe1 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -418,6 +418,64 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty) } EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle); +static bool usb_serial_generic_msr_changed(struct tty_struct *tty, + unsigned long arg, struct async_icount *cprev) +{ + struct usb_serial_port *port = tty->driver_data; + struct async_icount cnow; + unsigned long flags; + bool ret; + + /* + * Use tty-port initialised flag to detect all hangups including the + * one generated at USB-device disconnect. + * + * FIXME: Remove hupping check once tty_port_hangup calls shutdown + * (which clears the initialised flag) before wake up. + */ + if (test_bit(TTY_HUPPING, &tty->flags)) + return true; + if (!test_bit(ASYNCB_INITIALIZED, &port->port.flags)) + return true; + + spin_lock_irqsave(&port->lock, flags); + cnow = port->icount; /* atomic copy*/ + spin_unlock_irqrestore(&port->lock, flags); + + ret = ((arg & TIOCM_RNG) && (cnow.rng != cprev->rng)) || + ((arg & TIOCM_DSR) && (cnow.dsr != cprev->dsr)) || + ((arg & TIOCM_CD) && (cnow.dcd != cprev->dcd)) || + ((arg & TIOCM_CTS) && (cnow.cts != cprev->cts)); + + *cprev = cnow; + + return ret; +} + +int usb_serial_generic_tiocmiwait(struct tty_struct *tty, unsigned long arg) +{ + struct usb_serial_port *port = tty->driver_data; + struct async_icount cnow; + unsigned long flags; + int ret; + + spin_lock_irqsave(&port->lock, flags); + cnow = port->icount; /* atomic copy */ + spin_unlock_irqrestore(&port->lock, flags); + + ret = wait_event_interruptible(port->port.delta_msr_wait, + usb_serial_generic_msr_changed(tty, arg, &cnow)); + if (!ret) { + if (test_bit(TTY_HUPPING, &tty->flags)) + ret = -EIO; + if (!test_bit(ASYNCB_INITIALIZED, &port->port.flags)) + ret = -EIO; + } + + return ret; +} +EXPORT_SYMBOL_GPL(usb_serial_generic_tiocmiwait); + #ifdef CONFIG_MAGIC_SYSRQ int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 9c8b53f80f48..47c8d2c506c8 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -61,6 +62,7 @@ * @bulk_out_buffers: pointers to the bulk out buffers for this port * @write_urbs: pointers to the bulk out urbs for this port * @write_urbs_free: status bitmap the for bulk out urbs + * @icount: interrupt counters * @tx_bytes: number of bytes currently in host stack queues * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this * port. @@ -109,6 +111,7 @@ struct usb_serial_port { unsigned long write_urbs_free; __u8 bulk_out_endpointAddress; + struct async_icount icount; int tx_bytes; unsigned long flags; @@ -330,6 +333,8 @@ extern void usb_serial_generic_read_bulk_callback(struct urb *urb); extern void usb_serial_generic_write_bulk_callback(struct urb *urb); extern void usb_serial_generic_throttle(struct tty_struct *tty); extern void usb_serial_generic_unthrottle(struct tty_struct *tty); +extern int usb_serial_generic_tiocmiwait(struct tty_struct *tty, + unsigned long arg); extern int usb_serial_generic_register(void); extern void usb_serial_generic_deregister(void); extern int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port, -- cgit From befefcda4bddc52d29248931801961a72aeef28b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 21 Mar 2013 12:36:54 +0100 Subject: USB: serial: add generic get_icount implementation Add generic get_icount implementation that subdrivers relying on the port interrupt counters can use. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/generic.c | 27 +++++++++++++++++++++++++++ include/linux/usb/serial.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 18bc74e20fe1..5e55761b2cb8 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -476,6 +476,33 @@ int usb_serial_generic_tiocmiwait(struct tty_struct *tty, unsigned long arg) } EXPORT_SYMBOL_GPL(usb_serial_generic_tiocmiwait); +int usb_serial_generic_get_icount(struct tty_struct *tty, + struct serial_icounter_struct *icount) +{ + struct usb_serial_port *port = tty->driver_data; + struct async_icount cnow; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + cnow = port->icount; /* atomic copy */ + spin_unlock_irqrestore(&port->lock, flags); + + icount->cts = cnow.cts; + icount->dsr = cnow.dsr; + icount->rng = cnow.rng; + icount->dcd = cnow.dcd; + icount->tx = cnow.tx; + icount->rx = cnow.rx; + icount->frame = cnow.frame; + icount->parity = cnow.parity; + icount->overrun = cnow.overrun; + icount->brk = cnow.brk; + icount->buf_overrun = cnow.buf_overrun; + + return 0; +} +EXPORT_SYMBOL_GPL(usb_serial_generic_get_icount); + #ifdef CONFIG_MAGIC_SYSRQ int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 47c8d2c506c8..c786ee7fca8f 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -335,6 +335,8 @@ extern void usb_serial_generic_throttle(struct tty_struct *tty); extern void usb_serial_generic_unthrottle(struct tty_struct *tty); extern int usb_serial_generic_tiocmiwait(struct tty_struct *tty, unsigned long arg); +extern int usb_serial_generic_get_icount(struct tty_struct *tty, + struct serial_icounter_struct *icount); extern int usb_serial_generic_register(void); extern void usb_serial_generic_deregister(void); extern int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port, -- cgit From 53ab34dc50ad99366257d34cdb8a84f24250d611 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 21 Mar 2013 12:37:35 +0100 Subject: USB: serial: remove unused MSR-wait queue Remove the port MSR-wait queue now that all drivers have been migrated to the tty-port queue. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/serial.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index c786ee7fca8f..b9b0f7b4e43b 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -68,7 +68,6 @@ * port. * @flags: usb serial port flags * @write_wait: a wait_queue_head_t used by the port. - * @delta_msr_wait: modem-status-change wait queue * @work: work queue entry for the line discipline waking up. * @throttled: nonzero if the read urb is inactive to throttle the device * @throttle_req: nonzero if the tty wants to throttle us @@ -116,7 +115,6 @@ struct usb_serial_port { unsigned long flags; wait_queue_head_t write_wait; - wait_queue_head_t delta_msr_wait; struct work_struct work; char throttled; char throttle_req; -- cgit From 1e9663c62b32f695af37fec4afc473b59f5ca9b4 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 25 Mar 2013 08:58:00 +0000 Subject: iio:trigger: Introduce iio_tigger_{set,get}_drvdata Introduce iio_tigger_{set,get}_drvdata which allows to attach driver specific data to a trigger. The functions wrap access to the triggers private_data field and all current users are updated to use iio_tigger_{set,get}_drvdata instead of directly accessing the private_data field. This is the first step towards removing the private_data field from the iio_trigger struct. The following coccinelle script has been used to update the drivers: @@ struct iio_trigger *trigger; expression priv; @@ -trigger->private_data = priv +iio_trigger_set_drv_data(trigger, priv) @@ struct iio_trigger *trigger; @@ -trigger->private_data +iio_trigger_get_drv_data(trigger) Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_buffer.c | 2 +- drivers/iio/adc/ad_sigma_delta.c | 2 +- drivers/iio/adc/at91_adc.c | 4 ++-- .../iio/common/hid-sensors/hid-sensor-trigger.c | 4 ++-- drivers/iio/common/st_sensors/st_sensors_trigger.c | 2 +- drivers/iio/gyro/itg3200_buffer.c | 4 ++-- drivers/iio/gyro/st_gyro_buffer.c | 2 +- drivers/iio/imu/adis_trigger.c | 4 ++-- drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c | 4 ++-- drivers/staging/iio/accel/lis3l02dq_ring.c | 6 +++--- drivers/staging/iio/adc/mxs-lradc.c | 4 ++-- drivers/staging/iio/meter/ade7758_trigger.c | 6 +++--- drivers/staging/iio/trigger/iio-trig-bfin-timer.c | 8 ++++---- drivers/staging/iio/trigger/iio-trig-gpio.c | 6 +++--- .../staging/iio/trigger/iio-trig-periodic-rtc.c | 12 +++++------ drivers/staging/iio/trigger/iio-trig-sysfs.c | 4 ++-- include/linux/iio/trigger.h | 24 ++++++++++++++++++++++ 17 files changed, 61 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c index 6bd82c7f769c..d9b350756f90 100644 --- a/drivers/iio/accel/st_accel_buffer.c +++ b/drivers/iio/accel/st_accel_buffer.c @@ -25,7 +25,7 @@ int st_accel_trig_set_state(struct iio_trigger *trig, bool state) { - struct iio_dev *indio_dev = trig->private_data; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); return st_sensors_set_dataready_irq(indio_dev, state); } diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index afe6d78c8ff0..f0d6335ae087 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -470,7 +470,7 @@ static int ad_sd_probe_trigger(struct iio_dev *indio_dev) disable_irq_nosync(sigma_delta->spi->irq); } sigma_delta->trig->dev.parent = &sigma_delta->spi->dev; - sigma_delta->trig->private_data = sigma_delta; + iio_trigger_set_drvdata(sigma_delta->trig, sigma_delta); ret = iio_trigger_register(sigma_delta->trig); if (ret) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 92eb6a5b9e72..6fc43c15f028 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -188,7 +188,7 @@ static u8 at91_adc_get_trigger_value_by_name(struct iio_dev *idev, static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) { - struct iio_dev *idev = trig->private_data; + struct iio_dev *idev = iio_trigger_get_drvdata(trig); struct at91_adc_state *st = iio_priv(idev); struct iio_buffer *buffer = idev->buffer; struct at91_adc_reg_desc *reg = st->registers; @@ -254,7 +254,7 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *idev, return NULL; trig->dev.parent = idev->dev.parent; - trig->private_data = idev; + iio_trigger_set_drvdata(trig, idev); trig->ops = &at91_adc_trigger_ops; ret = iio_trigger_register(trig); diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 7a525a91105d..87419c41b991 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -31,7 +31,7 @@ static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) { - struct hid_sensor_common *st = trig->private_data; + struct hid_sensor_common *st = iio_trigger_get_drvdata(trig); int state_val; state_val = state ? 1 : 0; @@ -76,7 +76,7 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, } trig->dev.parent = indio_dev->dev.parent; - trig->private_data = attrb; + iio_trigger_set_drvdata(trig, attrb); trig->ops = &hid_sensor_trigger_ops; ret = iio_trigger_register(trig); diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 139ed030abb0..8fc3a97eb266 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -40,7 +40,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, if (err) goto request_irq_error; - sdata->trig->private_data = indio_dev; + iio_trigger_set_drvdata(sdata->trig, indio_dev); sdata->trig->ops = trigger_ops; sdata->trig->dev.parent = sdata->dev; diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index f667d2c8c00f..6c43af9bb0a4 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -81,7 +81,7 @@ void itg3200_buffer_unconfigure(struct iio_dev *indio_dev) static int itg3200_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) { - struct iio_dev *indio_dev = trig->private_data; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); int ret; u8 msc; @@ -129,7 +129,7 @@ int itg3200_probe_trigger(struct iio_dev *indio_dev) st->trig->dev.parent = &st->i2c->dev; st->trig->ops = &itg3200_trigger_ops; - st->trig->private_data = indio_dev; + iio_trigger_set_drvdata(st->trig, indio_dev); ret = iio_trigger_register(st->trig); if (ret) goto error_free_irq; diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c index da4d122ec7dc..69017c7ec302 100644 --- a/drivers/iio/gyro/st_gyro_buffer.c +++ b/drivers/iio/gyro/st_gyro_buffer.c @@ -25,7 +25,7 @@ int st_gyro_trig_set_state(struct iio_trigger *trig, bool state) { - struct iio_dev *indio_dev = trig->private_data; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); return st_sensors_set_dataready_irq(indio_dev, state); } diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c index 5a24c9cac343..e0017c22bb9c 100644 --- a/drivers/iio/imu/adis_trigger.c +++ b/drivers/iio/imu/adis_trigger.c @@ -19,7 +19,7 @@ static int adis_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) { - struct adis *adis = trig->private_data; + struct adis *adis = iio_trigger_get_drvdata(trig); return adis_enable_irq(adis, state); } @@ -57,7 +57,7 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) adis->trig->dev.parent = &adis->spi->dev; adis->trig->ops = &adis_trigger_ops; - adis->trig->private_data = adis; + iio_trigger_set_drvdata(adis->trig, adis); ret = iio_trigger_register(adis->trig); indio_dev->trig = adis->trig; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c index e1d0869e0ad1..03b9372c1212 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c @@ -103,7 +103,7 @@ static int inv_mpu6050_set_enable(struct iio_dev *indio_dev, bool enable) static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) { - return inv_mpu6050_set_enable(trig->private_data, state); + return inv_mpu6050_set_enable(iio_trigger_get_drvdata(trig), state); } static const struct iio_trigger_ops inv_mpu_trigger_ops = { @@ -130,8 +130,8 @@ int inv_mpu6050_probe_trigger(struct iio_dev *indio_dev) if (ret) goto error_free_trig; st->trig->dev.parent = &st->client->dev; - st->trig->private_data = indio_dev; st->trig->ops = &inv_mpu_trigger_ops; + iio_trigger_set_drvdata(st->trig, indio_dev); ret = iio_trigger_register(st->trig); if (ret) goto error_free_irq; diff --git a/drivers/staging/iio/accel/lis3l02dq_ring.c b/drivers/staging/iio/accel/lis3l02dq_ring.c index e676403ea3ea..5b8f0f6c9938 100644 --- a/drivers/staging/iio/accel/lis3l02dq_ring.c +++ b/drivers/staging/iio/accel/lis3l02dq_ring.c @@ -228,7 +228,7 @@ error_ret: static int lis3l02dq_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) { - struct iio_dev *indio_dev = trig->private_data; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); int ret = 0; u8 t; @@ -252,7 +252,7 @@ static int lis3l02dq_data_rdy_trigger_set_state(struct iio_trigger *trig, */ static int lis3l02dq_trig_try_reen(struct iio_trigger *trig) { - struct iio_dev *indio_dev = trig->private_data; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); struct lis3l02dq_state *st = iio_priv(indio_dev); int i; @@ -290,7 +290,7 @@ int lis3l02dq_probe_trigger(struct iio_dev *indio_dev) st->trig->dev.parent = &st->us->dev; st->trig->ops = &lis3l02dq_trigger_ops; - st->trig->private_data = indio_dev; + iio_trigger_set_drvdata(st->trig, indio_dev); ret = iio_trigger_register(st->trig); if (ret) goto error_free_trig; diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index 25a4359a92db..eab975d11bb2 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -646,7 +646,7 @@ static irqreturn_t mxs_lradc_trigger_handler(int irq, void *p) static int mxs_lradc_configure_trigger(struct iio_trigger *trig, bool state) { - struct iio_dev *iio = trig->private_data; + struct iio_dev *iio = iio_trigger_get_drvdata(trig); struct mxs_lradc *lradc = iio_priv(iio); const uint32_t st = state ? STMP_OFFSET_REG_SET : STMP_OFFSET_REG_CLR; @@ -670,7 +670,7 @@ static int mxs_lradc_trigger_init(struct iio_dev *iio) return -ENOMEM; trig->dev.parent = iio->dev.parent; - trig->private_data = iio; + iio_trigger_set_drvdata(trig, iio); trig->ops = &mxs_lradc_trigger_ops; ret = iio_trigger_register(trig); diff --git a/drivers/staging/iio/meter/ade7758_trigger.c b/drivers/staging/iio/meter/ade7758_trigger.c index f9c6a340092b..7a94ddd42f59 100644 --- a/drivers/staging/iio/meter/ade7758_trigger.c +++ b/drivers/staging/iio/meter/ade7758_trigger.c @@ -32,7 +32,7 @@ static irqreturn_t ade7758_data_rdy_trig_poll(int irq, void *private) static int ade7758_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) { - struct iio_dev *indio_dev = trig->private_data; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); dev_dbg(&indio_dev->dev, "%s (%d)\n", __func__, state); return ade7758_set_irq(&indio_dev->dev, state); @@ -44,7 +44,7 @@ static int ade7758_data_rdy_trigger_set_state(struct iio_trigger *trig, **/ static int ade7758_trig_try_reen(struct iio_trigger *trig) { - struct iio_dev *indio_dev = trig->private_data; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); struct ade7758_state *st = iio_priv(indio_dev); enable_irq(st->us->irq); @@ -81,7 +81,7 @@ int ade7758_probe_trigger(struct iio_dev *indio_dev) st->trig->dev.parent = &st->us->dev; st->trig->ops = &ade7758_trigger_ops; - st->trig->private_data = indio_dev; + iio_trigger_set_drvdata(st->trig, indio_dev); ret = iio_trigger_register(st->trig); /* select default trigger */ diff --git a/drivers/staging/iio/trigger/iio-trig-bfin-timer.c b/drivers/staging/iio/trigger/iio-trig-bfin-timer.c index 42798da575c0..38a158b77b1d 100644 --- a/drivers/staging/iio/trigger/iio-trig-bfin-timer.c +++ b/drivers/staging/iio/trigger/iio-trig-bfin-timer.c @@ -65,7 +65,7 @@ struct bfin_tmr_state { static int iio_bfin_tmr_set_state(struct iio_trigger *trig, bool state) { - struct bfin_tmr_state *st = trig->private_data; + struct bfin_tmr_state *st = iio_trigger_get_drvdata(trig); if (get_gptimer_period(st->t->id) == 0) return -EINVAL; @@ -82,7 +82,7 @@ static ssize_t iio_bfin_tmr_frequency_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct iio_trigger *trig = to_iio_trigger(dev); - struct bfin_tmr_state *st = trig->private_data; + struct bfin_tmr_state *st = iio_trigger_get_drvdata(trig); unsigned long val; bool enabled; int ret; @@ -125,7 +125,7 @@ static ssize_t iio_bfin_tmr_frequency_show(struct device *dev, char *buf) { struct iio_trigger *trig = to_iio_trigger(dev); - struct bfin_tmr_state *st = trig->private_data; + struct bfin_tmr_state *st = iio_trigger_get_drvdata(trig); unsigned int period = get_gptimer_period(st->t->id); unsigned long val; @@ -213,9 +213,9 @@ static int iio_bfin_tmr_trigger_probe(struct platform_device *pdev) goto out1; } - st->trig->private_data = st; st->trig->ops = &iio_bfin_tmr_trigger_ops; st->trig->dev.groups = iio_bfin_tmr_trigger_attr_groups; + iio_trigger_set_drvdata(st->trig, st); ret = iio_trigger_register(st->trig); if (ret) goto out2; diff --git a/drivers/staging/iio/trigger/iio-trig-gpio.c b/drivers/staging/iio/trigger/iio-trig-gpio.c index fcc4cb048c9a..7c593d18a910 100644 --- a/drivers/staging/iio/trigger/iio-trig-gpio.c +++ b/drivers/staging/iio/trigger/iio-trig-gpio.c @@ -83,7 +83,7 @@ static int iio_gpio_trigger_probe(struct platform_device *pdev) ret = -ENOMEM; goto error_put_trigger; } - trig->private_data = trig_info; + iio_trigger_set_drvdata(trig, trig_info); trig_info->irq = irq; trig->ops = &iio_gpio_trigger_ops; ret = request_irq(irq, iio_gpio_trigger_poll, @@ -121,7 +121,7 @@ error_free_completed_registrations: trig2, &iio_gpio_trigger_list, alloc_list) { - trig_info = trig->private_data; + trig_info = iio_trigger_get_drvdata(trig); free_irq(gpio_to_irq(trig_info->irq), trig); kfree(trig_info); iio_trigger_unregister(trig); @@ -140,7 +140,7 @@ static int iio_gpio_trigger_remove(struct platform_device *pdev) trig2, &iio_gpio_trigger_list, alloc_list) { - trig_info = trig->private_data; + trig_info = iio_trigger_get_drvdata(trig); iio_trigger_unregister(trig); free_irq(trig_info->irq, trig); kfree(trig_info); diff --git a/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c b/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c index 9102b1ba2530..79695974b1d4 100644 --- a/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c +++ b/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c @@ -30,7 +30,7 @@ struct iio_prtc_trigger_info { static int iio_trig_periodic_rtc_set_state(struct iio_trigger *trig, bool state) { - struct iio_prtc_trigger_info *trig_info = trig->private_data; + struct iio_prtc_trigger_info *trig_info = iio_trigger_get_drvdata(trig); if (trig_info->frequency == 0) return -EINVAL; printk(KERN_INFO "trigger frequency is %d\n", trig_info->frequency); @@ -42,7 +42,7 @@ static ssize_t iio_trig_periodic_read_freq(struct device *dev, char *buf) { struct iio_trigger *trig = to_iio_trigger(dev); - struct iio_prtc_trigger_info *trig_info = trig->private_data; + struct iio_prtc_trigger_info *trig_info = iio_trigger_get_drvdata(trig); return sprintf(buf, "%u\n", trig_info->frequency); } @@ -52,7 +52,7 @@ static ssize_t iio_trig_periodic_write_freq(struct device *dev, size_t len) { struct iio_trigger *trig = to_iio_trigger(dev); - struct iio_prtc_trigger_info *trig_info = trig->private_data; + struct iio_prtc_trigger_info *trig_info = iio_trigger_get_drvdata(trig); unsigned long val; int ret; @@ -124,7 +124,7 @@ static int iio_trig_periodic_rtc_probe(struct platform_device *dev) ret = -ENOMEM; goto error_put_trigger_and_remove_from_list; } - trig->private_data = trig_info; + iio_trigger_set_drvdata(trig, trig_info); trig->ops = &iio_prtc_trigger_ops; /* RTC access */ trig_info->rtc @@ -158,7 +158,7 @@ error_free_completed_registrations: trig2, &iio_prtc_trigger_list, alloc_list) { - trig_info = trig->private_data; + trig_info = iio_trigger_get_drvdata(trig); rtc_irq_unregister(trig_info->rtc, &trig_info->task); rtc_class_close(trig_info->rtc); kfree(trig_info); @@ -176,7 +176,7 @@ static int iio_trig_periodic_rtc_remove(struct platform_device *dev) trig2, &iio_prtc_trigger_list, alloc_list) { - trig_info = trig->private_data; + trig_info = iio_trigger_get_drvdata(trig); rtc_irq_unregister(trig_info->rtc, &trig_info->task); rtc_class_close(trig_info->rtc); kfree(trig_info); diff --git a/drivers/staging/iio/trigger/iio-trig-sysfs.c b/drivers/staging/iio/trigger/iio-trig-sysfs.c index 3bac97224bf4..b727bde8b7fe 100644 --- a/drivers/staging/iio/trigger/iio-trig-sysfs.c +++ b/drivers/staging/iio/trigger/iio-trig-sysfs.c @@ -103,7 +103,7 @@ static ssize_t iio_sysfs_trigger_poll(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct iio_trigger *trig = to_iio_trigger(dev); - struct iio_sysfs_trig *sysfs_trig = trig->private_data; + struct iio_sysfs_trig *sysfs_trig = iio_trigger_get_drvdata(trig); irq_work_queue(&sysfs_trig->work); @@ -160,7 +160,7 @@ static int iio_sysfs_trigger_probe(int id) t->trig->dev.groups = iio_sysfs_trigger_attr_groups; t->trig->ops = &iio_sysfs_trigger_ops; t->trig->dev.parent = &iio_sysfs_trig_dev; - t->trig->private_data = t; + iio_trigger_set_drvdata(t->trig, t); init_irq_work(&t->work, iio_sysfs_trigger_work); diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index c66e0a96f6e8..b81948aba1d6 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -91,6 +91,30 @@ static inline void iio_trigger_get(struct iio_trigger *trig) __module_get(trig->ops->owner); } +/** + * iio_device_set_drvdata() - Set trigger driver data + * @trig: IIO trigger structure + * @data: Driver specific data + * + * Allows to attach an arbitrary pointer to an IIO trigger, which can later be + * retrieved by iio_trigger_get_drvdata(). + */ +static inline void iio_trigger_set_drvdata(struct iio_trigger *trig, void *data) +{ + trig->private_data = data; +} + +/** + * iio_trigger_get_drvdata() - Get trigger driver data + * @trig: IIO trigger structure + * + * Returns the data previously set with iio_trigger_set_drvdata() + */ +static inline void *iio_trigger_get_drvdata(struct iio_trigger *trig) +{ + return trig->private_data; +} + /** * iio_trigger_register() - register a trigger with the IIO core * @trig_info: trigger to be registered -- cgit From 5034bfc976928b447cb6decd311d35161107a72f Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 25 Mar 2013 08:58:00 +0000 Subject: iio:trigger: Use dev_{set,get}_drvdata for private data management Use dev_{set,get}_drvdata for managing private data attached to a trigger instead of using a custom field in the iio_trigger struct. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- drivers/staging/iio/Documentation/trigger.txt | 3 --- include/linux/iio/trigger.h | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/iio/Documentation/trigger.txt b/drivers/staging/iio/Documentation/trigger.txt index 75cc37ff1ed0..64e2e08fb4d0 100644 --- a/drivers/staging/iio/Documentation/trigger.txt +++ b/drivers/staging/iio/Documentation/trigger.txt @@ -10,9 +10,6 @@ struct iio_trig *trig = iio_trigger_alloc("", ...); allocates a trigger structure. The key elements to then fill in within a driver are: -trig->private_data - Device specific private data. - trig->owner Typically set to THIS_MODULE. Used to ensure correct ownership of core allocated resources. diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index b81948aba1d6..3869c525b052 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -44,7 +44,6 @@ struct iio_trigger_ops { * @id: [INTERN] unique id number * @name: [DRIVER] unique name * @dev: [DRIVER] associated device (if relevant) - * @private_data: [DRIVER] device specific data * @list: [INTERN] used in maintenance of global trigger list * @alloc_list: [DRIVER] used for driver specific trigger list * @use_count: use count for the trigger @@ -60,7 +59,6 @@ struct iio_trigger { const char *name; struct device dev; - void *private_data; struct list_head list; struct list_head alloc_list; int use_count; @@ -101,7 +99,7 @@ static inline void iio_trigger_get(struct iio_trigger *trig) */ static inline void iio_trigger_set_drvdata(struct iio_trigger *trig, void *data) { - trig->private_data = data; + dev_set_drvdata(&trig->dev, data); } /** @@ -112,7 +110,7 @@ static inline void iio_trigger_set_drvdata(struct iio_trigger *trig, void *data) */ static inline void *iio_trigger_get_drvdata(struct iio_trigger *trig) { - return trig->private_data; + return dev_get_drvdata(&trig->dev); } /** -- cgit From 57df8106932b57427df1eaaa13871857f75b1194 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 8 Feb 2013 14:52:06 +0800 Subject: Thermal: exynos: fix cooling state translation Signed-off-by: Zhang Rui Tested-by: Amit Daniel kachhap --- drivers/thermal/cpu_cooling.c | 11 +++++++++++ drivers/thermal/exynos_thermal.c | 24 ++---------------------- include/linux/cpu_cooling.h | 7 +++++++ include/linux/thermal.h | 5 ++++- 4 files changed, 24 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 9e208d300647..e03891b03c9b 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -196,6 +196,17 @@ static int get_property(unsigned int cpu, unsigned long input, return -EINVAL; } +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) +{ + unsigned int val; + + if (get_property(cpu, (unsigned long)freq, &val, GET_LEVEL)) + return THERMAL_CSTATE_INVALID; + return (unsigned long)val; +} + +EXPORT_SYMBOL(cpufreq_cooling_get_level); + /** * get_cpu_frequency - get the absolute value of frequency from level. * @cpu: cpu for which frequency is fetched. diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index 46568c078dee..541257888c3e 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -242,26 +242,6 @@ static int exynos_get_crit_temp(struct thermal_zone_device *thermal, return ret; } -static int exynos_get_frequency_level(unsigned int cpu, unsigned int freq) -{ - int i = 0, ret = -EINVAL; - struct cpufreq_frequency_table *table = NULL; -#ifdef CONFIG_CPU_FREQ - table = cpufreq_frequency_get_table(cpu); -#endif - if (!table) - return ret; - - while (table[i].frequency != CPUFREQ_TABLE_END) { - if (table[i].frequency == CPUFREQ_ENTRY_INVALID) - continue; - if (table[i].frequency == freq) - return i; - i++; - } - return ret; -} - /* Bind callback functions for thermal zone */ static int exynos_bind(struct thermal_zone_device *thermal, struct thermal_cooling_device *cdev) @@ -288,8 +268,8 @@ static int exynos_bind(struct thermal_zone_device *thermal, /* Bind the thermal zone to the cpufreq cooling device */ for (i = 0; i < tab_size; i++) { clip_data = (struct freq_clip_table *)&(tab_ptr[i]); - level = exynos_get_frequency_level(0, clip_data->freq_clip_max); - if (level < 0) + level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max); + if (level == THERMAL_CSTATE_INVALID) return 0; switch (GET_ZONE(i)) { case MONITOR_ZONE: diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 40b4ef54cc7d..bc479b1e0fd9 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -42,6 +42,8 @@ struct thermal_cooling_device *cpufreq_cooling_register( * @cdev: thermal cooling device pointer. */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); + +unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device *cpufreq_cooling_register( const struct cpumask *clip_cpus) @@ -53,6 +55,11 @@ static inline void cpufreq_cooling_unregister( { return; } +static inline unsigned long cpufreq_cooling_get_level(unsigned int, + unsigned int) +{ + return THERMAL_CSTATE_INVALID; +} #endif /* CONFIG_CPU_THERMAL */ #endif /* __CPU_COOLING_H__ */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f0bd7f90a90d..5a3b428daaab 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -33,8 +33,11 @@ #define THERMAL_MAX_TRIPS 12 #define THERMAL_NAME_LENGTH 20 +/* invalid cooling state */ +#define THERMAL_CSTATE_INVALID -1UL + /* No upper/lower limit requirement */ -#define THERMAL_NO_LIMIT -1UL +#define THERMAL_NO_LIMIT THERMAL_CSTATE_INVALID /* Unit conversion macros */ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ -- cgit From f8b587055a793c7719f0d4f41b7b4aeeef43aa2d Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 20 Mar 2013 21:38:07 +0000 Subject: thermal: Fix compiler warning The following warning is obtained when CONFIG_NET is not defined: In file included from drivers/thermal/mvebu_thermal.c:27:0: include/linux/thermal.h:254:12: warning: 'thermal_generate_netlink_event' defined but not used [-Wunused-function] This patch fixes the warning by properly inlining thermal_generate_netlink_event(). Signed-off-by: Ezequiel Garcia Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f0bd7f90a90d..fd7b8f3e6f42 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -251,7 +251,7 @@ void thermal_unregister_governor(struct thermal_governor *); extern int thermal_generate_netlink_event(struct thermal_zone_device *tz, enum events event); #else -static int thermal_generate_netlink_event(struct thermal_zone_device *tz, +static inline int thermal_generate_netlink_event(struct thermal_zone_device *tz, enum events event) { return 0; -- cgit From c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Dec 2012 11:30:36 -0800 Subject: rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 28 ++- include/linux/rcupdate.h | 1 + init/Kconfig | 17 +- kernel/rcutree.c | 28 +-- kernel/rcutree.h | 12 +- kernel/rcutree_plugin.h | 374 ++++++++++-------------------------- kernel/rcutree_trace.c | 2 - 7 files changed, 149 insertions(+), 313 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a17ba16c8fc8..22303b2e74bc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2490,6 +2490,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. leaf rcu_node structure. Useful for very large systems. + rcutree.jiffies_till_first_fqs= [KNL,BOOT] + Set delay from grace-period initialization to + first attempt to force quiescent states. + Units are jiffies, minimum value is zero, + and maximum value is HZ. + + rcutree.jiffies_till_next_fqs= [KNL,BOOT] + Set delay between subsequent attempts to force + quiescent states. Units are jiffies, minimum + value is one, and maximum value is HZ. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. @@ -2504,16 +2515,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] Set timeout for RCU CPU stall warning messages. - rcutree.jiffies_till_first_fqs= [KNL,BOOT] - Set delay from grace-period initialization to - first attempt to force quiescent states. - Units are jiffies, minimum value is zero, - and maximum value is HZ. + rcutree.rcu_idle_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.jiffies_till_next_fqs= [KNL,BOOT] - Set delay between subsequent attempts to force - quiescent states. Units are jiffies, minimum - value is one, and maximum value is HZ. + rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). + Lazy RCU callbacks are those which RCU can + prove do nothing more than free memory. rcutorture.fqs_duration= [KNL,BOOT] Set duration of force_quiescent_state bursts. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..9ed2c9a4de45 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#define ulong2long(a) (*(long *)(&(a))) /* Exported common interfaces */ diff --git a/init/Kconfig b/init/Kconfig index 717584064a7e..a3a2304fa6d2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -582,13 +582,16 @@ config RCU_FAST_NO_HZ depends on NO_HZ && SMP default n help - This option causes RCU to attempt to accelerate grace periods in - order to allow CPUs to enter dynticks-idle state more quickly. - On the other hand, this option increases the overhead of the - dynticks-idle checking, thus degrading scheduling latency. - - Say Y if energy efficiency is critically important, and you don't - care about real-time response. + This option permits CPUs to enter dynticks-idle state even if + they have RCU callbacks queued, and prevents RCU from waking + these CPUs up more than roughly once every four jiffies (by + default, you can adjust this using the rcutree.rcu_idle_gp_delay + parameter), thus improving energy efficiency. On the other + hand, this option increases the duration of RCU grace periods, + for example, slowing down synchronize_rcu(). + + Say Y if energy efficiency is critically important, and you + don't care about increased grace-period durations. Say N if you are unsure. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2015bce749f9..7b1d7769872a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu) } /* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. + * Return true if the specified CPU has any callback. If all_lazy is + * non-NULL, store an indication of whether all callbacks are lazy. + * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int rcu_cpu_has_callbacks(int cpu) +static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) { + bool al = true; + bool hc = false; + struct rcu_data *rdp; struct rcu_state *rsp; - /* RCU callbacks either ready or pending? */ - for_each_rcu_flavor(rsp) - if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) - return 1; - return 0; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->qlen != rdp->qlen_lazy) + al = false; + if (rdp->nxtlist) + hc = true; + } + if (all_lazy) + *all_lazy = al; + return hc; } /* @@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); - rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* Add CPU to rcu_node bitmasks. */ @@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, */ for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); - rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b6c2335efbdf..96a27f922e92 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -88,18 +88,13 @@ struct rcu_dynticks { int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ #ifdef CONFIG_RCU_FAST_NO_HZ - int dyntick_drain; /* Prepare-for-idle state variable. */ - unsigned long dyntick_holdoff; - /* No retries for the jiffy of failure. */ - struct timer_list idle_gp_timer; - /* Wake up CPU sleeping with callbacks. */ - unsigned long idle_gp_timer_expires; - /* When to wake up CPU (for repost). */ - bool idle_first_pass; /* First pass of attempt to go idle? */ + bool all_lazy; /* Are all CPU's CBs lazy? */ unsigned long nonlazy_posted; /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + unsigned long last_accelerate; + /* Last jiffy CBs were accelerated. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; @@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ static void __cpuinit rcu_prepare_kthreads(int cpu); -static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); static void rcu_idle_count_callbacks_posted(void); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 28185ad18df3..d318f9f18be5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1543,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(cpu); -} - -/* - * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ + return rcu_cpu_has_callbacks(cpu, NULL); } /* @@ -1587,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void) * * The following three proprocessor symbols control this state machine: * - * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt - * to satisfy RCU. Beyond this point, it is better to incur a periodic - * scheduling-clock interrupt than to loop through the state machine - * at full power. - * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are - * optional if RCU does not need anything immediately from this - * CPU, even if this CPU still has RCU callbacks queued. The first - * times through the state machine are mandatory: we need to give - * the state machine a chance to communicate a quiescent state - * to the RCU core. * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted * to sleep in dyntick-idle mode with RCU callbacks pending. This * is sized to be roughly one RCU grace period. Those energy-efficiency @@ -1612,15 +1595,9 @@ static void rcu_idle_count_callbacks_posted(void) * adjustment, they can be converted into kernel config parameters, though * making the state machine smarter might be a better option. */ -#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ -#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -static int rcu_idle_flushes = RCU_IDLE_FLUSHES; -module_param(rcu_idle_flushes, int, 0644); -static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; -module_param(rcu_idle_opt_flushes, int, 0644); static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; module_param(rcu_idle_gp_delay, int, 0644); static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; @@ -1629,178 +1606,97 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Does the specified flavor of RCU have non-lazy callbacks pending on - * the specified CPU? Both RCU flavor and CPU are specified by the - * rcu_data structure. - */ -static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) -{ - return rdp->qlen != rdp->qlen_lazy; -} - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/* - * Are there non-lazy RCU-preempt callbacks? (There cannot be if there - * is no RCU-preempt in the kernel.) + * Try to advance callbacks for all flavors of RCU on the current CPU. + * Afterwards, if there are any callbacks ready for immediate invocation, + * return true. */ -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +static bool rcu_try_advance_all_cbs(void) { - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - - return __rcu_cpu_has_nonlazy_callbacks(rdp); -} - -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + bool cbs_ready = false; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct rcu_state *rsp; -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) -{ - return 0; -} + for_each_rcu_flavor(rsp) { + rdp = this_cpu_ptr(rsp->rda); + rnp = rdp->mynode; -#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ + /* + * Don't bother checking unless a grace period has + * completed since we last checked and there are + * callbacks not yet ready to invoke. + */ + if (rdp->completed != rnp->completed && + rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) + rcu_process_gp_end(rsp, rdp); -/* - * Does any flavor of RCU have non-lazy callbacks on the specified CPU? - */ -static bool rcu_cpu_has_nonlazy_callbacks(int cpu) -{ - return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || - __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_cpu_has_nonlazy_callbacks(cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + cbs_ready = true; + } + return cbs_ready; } /* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! + * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready + * to invoke. If the CPU has callbacks, try to advance them. Tell the + * caller to set the timeout based on whether or not there are non-lazy + * callbacks. * - * The delta_jiffies argument is used to store the time when RCU is - * going to need the CPU again if it still has callbacks. The reason - * for this is that rcu_prepare_for_idle() might need to post a timer, - * but if so, it will do so after tick_nohz_stop_sched_tick() has set - * the wakeup time for this CPU. This means that RCU's timer can be - * delayed until the wakeup time, which defeats the purpose of posting - * a timer. + * The caller must have disabled interrupts. */ -int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +int rcu_needs_cpu(int cpu, unsigned long *dj) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - /* Flag a new idle sojourn to the idle-entry state machine. */ - rdtp->idle_first_pass = 1; + /* Snapshot to detect later posting of non-lazy callback. */ + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; + /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - *delta_jiffies = ULONG_MAX; + if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { + *dj = ULONG_MAX; return 0; } - if (rdtp->dyntick_holdoff == jiffies) { - /* RCU recently tried and failed, so don't try again. */ - *delta_jiffies = 1; + + /* Attempt to advance callbacks. */ + if (rcu_try_advance_all_cbs()) { + /* Some ready to invoke, so initiate later invocation. */ + invoke_rcu_core(); return 1; } - /* Set up for the possibility that RCU will post a timer. */ - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, - rcu_idle_gp_delay) - jiffies; + rdtp->last_accelerate = jiffies; + + /* Request timer delay depending on laziness, and round. */ + if (rdtp->all_lazy) { + *dj = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; - *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; + *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; } return 0; } /* - * Handler for smp_call_function_single(). The only point of this - * handler is to wake the CPU up, so the handler does only tracing. - */ -void rcu_idle_demigrate(void *unused) -{ - trace_rcu_prep_idle("Demigrate"); -} - -/* - * Timer handler used to force CPU to start pushing its remaining RCU - * callbacks in the case where it entered dyntick-idle mode with callbacks - * pending. The hander doesn't really need to do anything because the - * real work is done upon re-entry to idle, or by the next scheduling-clock - * interrupt should idle not be re-entered. - * - * One special case: the timer gets migrated without awakening the CPU - * on which the timer was scheduled on. In this case, we must wake up - * that CPU. We do so with smp_call_function_single(). - */ -static void rcu_idle_gp_timer_func(unsigned long cpu_in) -{ - int cpu = (int)cpu_in; - - trace_rcu_prep_idle("Timer"); - if (cpu != smp_processor_id()) - smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); - else - WARN_ON_ONCE(1); /* Getting here can hang the system... */ -} - -/* - * Initialize the timer used to pull CPUs out of dyntick-idle mode. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - rdtp->dyntick_holdoff = jiffies - 1; - setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); - rdtp->idle_gp_timer_expires = jiffies - 1; - rdtp->idle_first_pass = 1; -} - -/* - * Clean up for exit from idle. Because we are exiting from idle, there - * is no longer any point to ->idle_gp_timer, so cancel it. This will - * do nothing if this timer is not active, so just cancel it unconditionally. - */ -static void rcu_cleanup_after_idle(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - del_timer(&rdtp->idle_gp_timer); - trace_rcu_prep_idle("Cleanup after idle"); - rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); -} - -/* - * Check to see if any RCU-related work can be done by the current CPU, - * and if so, schedule a softirq to get it done. This function is part - * of the RCU implementation; it is -not- an exported member of the RCU API. - * - * The idea is for the current CPU to clear out all work required by the - * RCU core for the current grace period, so that this CPU can be permitted - * to enter dyntick-idle mode. In some cases, it will need to be awakened - * at the end of the grace period by whatever CPU ends the grace period. - * This allows CPUs to go dyntick-idle more quickly, and to reduce the - * number of wakeups by a modest integer factor. - * - * Because it is not legal to invoke rcu_process_callbacks() with irqs - * disabled, we do one pass of force_quiescent_state(), then do a - * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked - * later. The ->dyntick_drain field controls the sequencing. + * Prepare a CPU for idle from an RCU perspective. The first major task + * is to sense whether nohz mode has been enabled or disabled via sysfs. + * The second major task is to check to see if a non-lazy callback has + * arrived at a CPU that previously had only lazy callbacks. The third + * major task is to accelerate (that is, assign grace-period numbers to) + * any recently arrived callbacks. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { - struct timer_list *tp; + struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct rcu_node *rnp; + struct rcu_state *rsp; int tne; /* Handle nohz enablement switches conservatively. */ tne = ACCESS_ONCE(tick_nohz_enabled); if (tne != rdtp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(cpu)) + if (rcu_cpu_has_callbacks(cpu, NULL)) invoke_rcu_core(); /* force nohz to see update. */ rdtp->tick_nohz_enabled_snap = tne; return; @@ -1808,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu) if (!tne) return; - /* Adaptive-tick mode, where usermode execution is idle to RCU. */ - if (!is_idle_task(current)) { - rdtp->dyntick_holdoff = jiffies - 1; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("User dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else if (rcu_cpu_has_callbacks(cpu)) { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("User dyntick with lazy callbacks"); - } else { - return; - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + /* If this is a no-CBs CPU, no callbacks, just return. */ + if (is_nocb_cpu(cpu)) return; - } /* - * If this is an idle re-entry, for example, due to use of - * RCU_NONIDLE() or the new idle-loop tracing API within the idle - * loop, then don't take any state-machine actions, unless the - * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the ->idle_gp_timer if this CPU has callbacks - * pending. + * If a non-lazy callback arrived at a CPU having only lazy + * callbacks, invoke RCU core for the side-effect of recalculating + * idle duration on re-entry to idle. */ - if (!rdtp->idle_first_pass && - (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { - if (rcu_cpu_has_callbacks(cpu)) { - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - } + if (rdtp->all_lazy && + rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { + invoke_rcu_core(); return; } - rdtp->idle_first_pass = 0; - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; /* - * If there are no callbacks on this CPU, enter dyntick-idle mode. - * Also reset state to avoid prejudicing later attempts. + * If we have not yet accelerated this jiffy, accelerate all + * callbacks on this CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - rdtp->dyntick_holdoff = jiffies - 1; - rdtp->dyntick_drain = 0; - trace_rcu_prep_idle("No callbacks"); + if (rdtp->last_accelerate == jiffies) return; + rdtp->last_accelerate = jiffies; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (!*rdp->nxttail[RCU_DONE_TAIL]) + continue; + rnp = rdp->mynode; + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rcu_accelerate_cbs(rsp, rnp, rdp); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } +} - /* - * If in holdoff mode, just return. We will presumably have - * refrained from disabling the scheduling-clock tick. - */ - if (rdtp->dyntick_holdoff == jiffies) { - trace_rcu_prep_idle("In holdoff"); - return; - } +/* + * Clean up for exit from idle. Attempt to advance callbacks based on + * any grace periods that elapsed while the CPU was idle, and if any + * callbacks are now ready to invoke, initiate invocation. + */ +static void rcu_cleanup_after_idle(int cpu) +{ + struct rcu_data *rdp; + struct rcu_state *rsp; - /* Check and update the ->dyntick_drain sequencing. */ - if (rdtp->dyntick_drain <= 0) { - /* First time through, initialize the counter. */ - rdtp->dyntick_drain = rcu_idle_flushes; - } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && - !rcu_pending(cpu) && - !local_softirq_pending()) { - /* Can we go dyntick-idle despite still having callbacks? */ - rdtp->dyntick_drain = 0; - rdtp->dyntick_holdoff = jiffies; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("Dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("Dyntick with lazy callbacks"); - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; - return; /* Nothing more to do immediately. */ - } else if (--(rdtp->dyntick_drain) <= 0) { - /* We have hit the limit, so time to give up. */ - rdtp->dyntick_holdoff = jiffies; - trace_rcu_prep_idle("Begin holdoff"); - invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ + if (is_nocb_cpu(cpu)) return; - } - - /* - * Do one step of pushing the remaining RCU callbacks through - * the RCU core state machine. - */ -#ifdef CONFIG_TREE_PREEMPT_RCU - if (per_cpu(rcu_preempt_data, cpu).nxtlist) { - rcu_preempt_qs(cpu); - force_quiescent_state(&rcu_preempt_state); - } -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - if (per_cpu(rcu_sched_data, cpu).nxtlist) { - rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state); - } - if (per_cpu(rcu_bh_data, cpu).nxtlist) { - rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state); - } - - /* - * If RCU callbacks are still pending, RCU still needs this CPU. - * So try forcing the callbacks through the grace period. - */ - if (rcu_cpu_has_callbacks(cpu)) { - trace_rcu_prep_idle("More callbacks"); - invoke_rcu_core(); - } else { - trace_rcu_prep_idle("Callbacks drained"); + rcu_try_advance_all_cbs(); + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + invoke_rcu_core(); } } @@ -2034,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier); static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - struct timer_list *tltp = &rdtp->idle_gp_timer; - char c; + unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; - c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; - if (timer_pending(tltp)) - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, c, tltp->expires - jiffies); - else - sprintf(cp, "drain=%d %c timer not pending", - rdtp->dyntick_drain, c); + sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", + rdtp->last_accelerate & 0xffff, jiffies & 0xffff, + ulong2long(nlpd), + rdtp->all_lazy ? 'L' : '.', + rdtp->tick_nohz_enabled_snap ? '.' : 'D'); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 0d095dcaa670..49099e81c87b 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,8 +46,6 @@ #define RCU_TREE_NONCORE #include "rcutree.h" -#define ulong2long(a) (*(long *)(&(a))) - static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) { -- cgit From f607e31ce3963327f749b56c65dfec2642aa623c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Feb 2013 18:36:53 +0000 Subject: ASoC: arizona: Fix interaction between headphone outputs and identification Running HPDET while the headphone outputs are enabled can disrupt the operation of HPDET. In order to avoid this HPDET needs to disable the headphone outputs and ASoC needs to not enable them while HPDET is running. Do the ASoC side of this by storing the enable state in the core driver structure and only writing to the device if a flag indicating that the accessory detection side is in a state where it can have the headphone output stage enabled. Signed-off-by: Mark Brown --- include/linux/mfd/arizona/core.h | 3 +++ sound/soc/codecs/arizona.c | 33 +++++++++++++++++++++++++++++++++ sound/soc/codecs/arizona.h | 3 +++ sound/soc/codecs/wm5102.c | 8 ++++---- sound/soc/codecs/wm5110.c | 8 ++++---- 5 files changed, 47 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index a710255528d7..cc281368dc55 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -100,6 +100,9 @@ struct arizona { struct regmap_irq_chip_data *aod_irq_chip; struct regmap_irq_chip_data *irq_chip; + bool hpdet_magic; + unsigned int hp_ena; + struct mutex clk_lock; int clk32k_ref; diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index ac948a671ea6..e7d34711412c 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -364,6 +364,39 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, } EXPORT_SYMBOL_GPL(arizona_out_ev); +int arizona_hp_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, + int event) +{ + struct arizona_priv *priv = snd_soc_codec_get_drvdata(w->codec); + unsigned int mask = 1 << w->shift; + unsigned int val; + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + val = mask; + break; + case SND_SOC_DAPM_PRE_PMD: + val = 0; + break; + default: + return -EINVAL; + } + + /* Store the desired state for the HP outputs */ + priv->arizona->hp_ena &= ~mask; + priv->arizona->hp_ena |= val; + + /* Force off if HPDET magic is active */ + if (priv->arizona->hpdet_magic) + val = 0; + + snd_soc_update_bits(w->codec, ARIZONA_OUTPUT_ENABLES_1, mask, val); + + return arizona_out_ev(w, kcontrol, event); +} +EXPORT_SYMBOL_GPL(arizona_hp_ev); + static unsigned int arizona_sysclk_48k_rates[] = { 6144000, 12288000, diff --git a/sound/soc/codecs/arizona.h b/sound/soc/codecs/arizona.h index 116372c91f5d..13dd2916b721 100644 --- a/sound/soc/codecs/arizona.h +++ b/sound/soc/codecs/arizona.h @@ -184,6 +184,9 @@ extern int arizona_in_ev(struct snd_soc_dapm_widget *w, extern int arizona_out_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); +extern int arizona_hp_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, + int event); extern int arizona_set_sysclk(struct snd_soc_codec *codec, int clk_id, int source, unsigned int freq, int dir); diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index b82bbf584146..2657aad3f8b1 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -1131,11 +1131,11 @@ ARIZONA_DSP_WIDGETS(DSP1, "DSP1"), SND_SOC_DAPM_VALUE_MUX("AEC Loopback", ARIZONA_DAC_AEC_CONTROL_1, ARIZONA_AEC_LOOPBACK_ENA, 0, &wm5102_aec_loopback_mux), -SND_SOC_DAPM_PGA_E("OUT1L", ARIZONA_OUTPUT_ENABLES_1, - ARIZONA_OUT1L_ENA_SHIFT, 0, NULL, 0, arizona_out_ev, +SND_SOC_DAPM_PGA_E("OUT1L", SND_SOC_NOPM, + ARIZONA_OUT1L_ENA_SHIFT, 0, NULL, 0, arizona_hp_ev, SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA_E("OUT1R", ARIZONA_OUTPUT_ENABLES_1, - ARIZONA_OUT1R_ENA_SHIFT, 0, NULL, 0, arizona_out_ev, +SND_SOC_DAPM_PGA_E("OUT1R", SND_SOC_NOPM, + ARIZONA_OUT1R_ENA_SHIFT, 0, NULL, 0, arizona_hp_ev, SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_PGA_E("OUT2L", ARIZONA_OUTPUT_ENABLES_1, ARIZONA_OUT2L_ENA_SHIFT, 0, NULL, 0, arizona_out_ev, diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index cdeb301da1f6..7841b42a819c 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -551,11 +551,11 @@ SND_SOC_DAPM_AIF_IN("AIF3RX1", NULL, 0, SND_SOC_DAPM_AIF_IN("AIF3RX2", NULL, 0, ARIZONA_AIF3_RX_ENABLES, ARIZONA_AIF3RX2_ENA_SHIFT, 0), -SND_SOC_DAPM_PGA_E("OUT1L", ARIZONA_OUTPUT_ENABLES_1, - ARIZONA_OUT1L_ENA_SHIFT, 0, NULL, 0, arizona_out_ev, +SND_SOC_DAPM_PGA_E("OUT1L", SND_SOC_NOPM, + ARIZONA_OUT1L_ENA_SHIFT, 0, NULL, 0, arizona_hp_ev, SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA_E("OUT1R", ARIZONA_OUTPUT_ENABLES_1, - ARIZONA_OUT1R_ENA_SHIFT, 0, NULL, 0, arizona_out_ev, +SND_SOC_DAPM_PGA_E("OUT1R", SND_SOC_NOPM, + ARIZONA_OUT1R_ENA_SHIFT, 0, NULL, 0, arizona_hp_ev, SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_PGA_E("OUT2L", ARIZONA_OUTPUT_ENABLES_1, ARIZONA_OUT2L_ENA_SHIFT, 0, NULL, 0, arizona_out_ev, -- cgit From de88cbb7b244f3bcd61d49fd6dec35c19192545a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 18 Jan 2013 15:31:37 +0000 Subject: arm: Move chained_irq_(enter|exit) to a generic file These functions have been introduced by commit 10a8c383 (irq: introduce entry and exit functions for chained handlers) in asm/mach/irq.h. This patch moves them to linux/irqchip/chained_irq.h so that generic irqchip drivers do not rely on architecture specific header files. Signed-off-by: Catalin Marinas Tested-by: Marc Zyngier Cc: Russell King Cc: Thomas Gleixner Cc: Rob Herring --- arch/arm/include/asm/mach/irq.h | 31 ----------------- arch/arm/mach-at91/gpio.c | 3 +- arch/arm/mach-exynos/common.c | 1 + arch/arm/mach-s3c24xx/irq.c | 1 + arch/arm/plat-samsung/irq-vic-timer.c | 3 +- arch/arm/plat-samsung/s5p-irq-gpioint.c | 3 +- drivers/gpio/gpio-msm-v2.c | 3 +- drivers/gpio/gpio-mxc.c | 2 +- drivers/gpio/gpio-omap.c | 3 +- drivers/gpio/gpio-pl061.c | 2 +- drivers/gpio/gpio-pxa.c | 3 +- drivers/gpio/gpio-tegra.c | 3 +- drivers/irqchip/exynos-combiner.c | 1 + drivers/irqchip/irq-gic.c | 1 + drivers/pinctrl/pinctrl-at91.c | 3 +- drivers/pinctrl/pinctrl-exynos.c | 3 +- drivers/pinctrl/pinctrl-nomadik.c | 2 +- drivers/pinctrl/pinctrl-sirf.c | 2 +- drivers/pinctrl/spear/pinctrl-plgpio.c | 2 +- drivers/staging/imx-drm/ipu-v3/ipu-common.c | 2 +- include/linux/irqchip/chained_irq.h | 52 +++++++++++++++++++++++++++++ 21 files changed, 71 insertions(+), 55 deletions(-) create mode 100644 include/linux/irqchip/chained_irq.h (limited to 'include/linux') diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index 749d5052fbb7..2092ee1e1300 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -30,35 +30,4 @@ do { \ raw_spin_unlock(&desc->lock); \ } while(0) -#ifndef __ASSEMBLY__ -/* - * Entry/exit functions for chained handlers where the primary IRQ chip - * may implement either fasteoi or level-trigger flow control. - */ -static inline void chained_irq_enter(struct irq_chip *chip, - struct irq_desc *desc) -{ - /* FastEOI controllers require no action on entry. */ - if (chip->irq_eoi) - return; - - if (chip->irq_mask_ack) { - chip->irq_mask_ack(&desc->irq_data); - } else { - chip->irq_mask(&desc->irq_data); - if (chip->irq_ack) - chip->irq_ack(&desc->irq_data); - } -} - -static inline void chained_irq_exit(struct irq_chip *chip, - struct irq_desc *desc) -{ - if (chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - else - chip->irq_unmask(&desc->irq_data); -} -#endif - #endif diff --git a/arch/arm/mach-at91/gpio.c b/arch/arm/mach-at91/gpio.c index c5d7e1e9d757..a5afcf76550e 100644 --- a/arch/arm/mach-at91/gpio.c +++ b/arch/arm/mach-at91/gpio.c @@ -22,10 +22,9 @@ #include #include #include +#include #include -#include - #include #include diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c index d63d399c7bae..7bc0f9aa8b33 100644 --- a/arch/arm/mach-exynos/common.c +++ b/arch/arm/mach-exynos/common.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-s3c24xx/irq.c b/arch/arm/mach-s3c24xx/irq.c index cb9f5e011e73..b6fac28a0034 100644 --- a/arch/arm/mach-s3c24xx/irq.c +++ b/arch/arm/mach-s3c24xx/irq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include diff --git a/arch/arm/plat-samsung/irq-vic-timer.c b/arch/arm/plat-samsung/irq-vic-timer.c index f980cf3d2baa..5d205e74e495 100644 --- a/arch/arm/plat-samsung/irq-vic-timer.c +++ b/arch/arm/plat-samsung/irq-vic-timer.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -23,8 +24,6 @@ #include #include -#include - static void s3c_irq_demux_vic_timer(unsigned int irq, struct irq_desc *desc) { struct irq_chip *chip = irq_get_chip(irq); diff --git a/arch/arm/plat-samsung/s5p-irq-gpioint.c b/arch/arm/plat-samsung/s5p-irq-gpioint.c index bae56131a50a..fafdb059043a 100644 --- a/arch/arm/plat-samsung/s5p-irq-gpioint.c +++ b/arch/arm/plat-samsung/s5p-irq-gpioint.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -22,8 +23,6 @@ #include #include -#include - #define GPIO_BASE(chip) ((void __iomem *)((unsigned long)((chip)->base) & 0xFFFFF000u)) #define CON_OFFSET 0x700 diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c index 55a7e7769af6..dd2eddeb1e0c 100644 --- a/drivers/gpio/gpio-msm-v2.c +++ b/drivers/gpio/gpio-msm-v2.c @@ -23,13 +23,12 @@ #include #include #include +#include #include #include #include #include -#include - #include #include diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index 7877335c4cc8..7176743915d3 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include #include -#include enum mxc_gpio_hwtype { IMX1_GPIO, /* runs on i.mx1 */ diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 159f5c57eb45..a612ea1c53cb 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -25,11 +25,10 @@ #include #include #include +#include #include #include -#include - #define OFF_MODE 1 static LIST_HEAD(omap_gpio_list); diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index b820869ca93c..29763361d13c 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,6 @@ #include #include #include -#include #define GPIODIR 0x400 #define GPIOIS 0x404 diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 9cc108d2b770..7523b6d108d0 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -26,8 +27,6 @@ #include #include -#include - #include /* diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 414ad912232f..8e2155548888 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -27,11 +27,10 @@ #include #include #include +#include #include #include -#include - #define GPIO_BANK(x) ((x) >> 5) #define GPIO_PORT(x) (((x) >> 3) & 0x3) #define GPIO_BIT(x) ((x) & 0x7) diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c index 04d86a9803f4..6a5201351507 100644 --- a/drivers/irqchip/exynos-combiner.c +++ b/drivers/irqchip/exynos-combiner.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index a32e0d5aa45f..0b1c0af646de 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 75933a6aa828..5cbadc9ad2e8 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -27,8 +28,6 @@ /* Since we request GPIOs from ourself */ #include -#include - #include #include diff --git a/drivers/pinctrl/pinctrl-exynos.c b/drivers/pinctrl/pinctrl-exynos.c index 538b9ddaadf7..7265e551dddb 100644 --- a/drivers/pinctrl/pinctrl-exynos.c +++ b/drivers/pinctrl/pinctrl-exynos.c @@ -23,13 +23,12 @@ #include #include #include +#include #include #include #include #include -#include - #include "pinctrl-samsung.h" #include "pinctrl-exynos.h" diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c index 36d20293de5c..93eba9715e62 100644 --- a/drivers/pinctrl/pinctrl-nomadik.c +++ b/drivers/pinctrl/pinctrl-nomadik.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -33,7 +34,6 @@ /* Since we request GPIOs from ourself */ #include #include -#include #include "pinctrl-nomadik.h" #include "core.h" diff --git a/drivers/pinctrl/pinctrl-sirf.c b/drivers/pinctrl/pinctrl-sirf.c index d02498b30c6e..ab26b4b669d5 100644 --- a/drivers/pinctrl/pinctrl-sirf.c +++ b/drivers/pinctrl/pinctrl-sirf.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -25,7 +26,6 @@ #include #include #include -#include #define DRIVER_NAME "pinmux-sirf" diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c index 295b349a05cf..a4908ecd74fb 100644 --- a/drivers/pinctrl/spear/pinctrl-plgpio.c +++ b/drivers/pinctrl/spear/pinctrl-plgpio.c @@ -15,12 +15,12 @@ #include #include #include +#include #include #include #include #include #include -#include #define MAX_GPIO_PER_REG 32 #define PIN_OFFSET(pin) (pin % MAX_GPIO_PER_REG) diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-common.c b/drivers/staging/imx-drm/ipu-v3/ipu-common.c index 366f259e3756..6efe4e1b499f 100644 --- a/drivers/staging/imx-drm/ipu-v3/ipu-common.c +++ b/drivers/staging/imx-drm/ipu-v3/ipu-common.c @@ -25,8 +25,8 @@ #include #include #include +#include #include -#include #include "imx-ipu-v3.h" #include "ipu-prv.h" diff --git a/include/linux/irqchip/chained_irq.h b/include/linux/irqchip/chained_irq.h new file mode 100644 index 000000000000..adf4c30f3af6 --- /dev/null +++ b/include/linux/irqchip/chained_irq.h @@ -0,0 +1,52 @@ +/* + * Chained IRQ handlers support. + * + * Copyright (C) 2011 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __IRQCHIP_CHAINED_IRQ_H +#define __IRQCHIP_CHAINED_IRQ_H + +#include + +/* + * Entry/exit functions for chained handlers where the primary IRQ chip + * may implement either fasteoi or level-trigger flow control. + */ +static inline void chained_irq_enter(struct irq_chip *chip, + struct irq_desc *desc) +{ + /* FastEOI controllers require no action on entry. */ + if (chip->irq_eoi) + return; + + if (chip->irq_mask_ack) { + chip->irq_mask_ack(&desc->irq_data); + } else { + chip->irq_mask(&desc->irq_data); + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); + } +} + +static inline void chained_irq_exit(struct irq_chip *chip, + struct irq_desc *desc) +{ + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + else + chip->irq_unmask(&desc->irq_data); +} + +#endif /* __IRQCHIP_CHAINED_IRQ_H */ -- cgit From c0114709ed85a5693eb74acdfa03d94f7f12e5b8 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 14 Jan 2013 18:05:37 +0000 Subject: irqchip: gic: Perform the gic_secondary_init() call via CPU notifier All the calls to gic_secondary_init() pass 0 as the first argument. Since this function is called on each CPU when starting, it can be done in a platform-independent way via a CPU notifier registered by the GIC code. Signed-off-by: Catalin Marinas Acked-by: Stephen Warren Acked-by: Viresh Kumar Acked-by: Santosh Shilimkar Acked-by: Rob Herring Acked-by: Simon Horman Tested-by: Simon Horman Acked-by: Srinidhi Kasagar Tested-by: Dinh Nguyen Acked-by: Nicolas Pitre Tested-by: Marc Zyngier Cc: Russell King Cc: Thomas Gleixner Cc: Kukjin Kim Cc: Sascha Hauer Cc: David Brown Cc: Bryan Huntsman Cc: Tony Lindgren Cc: Magnus Damm Cc: Shiraz Hashim Cc: Linus Walleij Cc: Will Deacon Cc: Kukjin Kim Cc: Barry Song --- arch/arm/mach-exynos/platsmp.c | 8 -------- arch/arm/mach-highbank/platsmp.c | 7 ------- arch/arm/mach-imx/platsmp.c | 12 ------------ arch/arm/mach-msm/platsmp.c | 8 -------- arch/arm/mach-omap2/omap-smp.c | 7 ------- arch/arm/mach-prima2/platsmp.c | 8 -------- arch/arm/mach-shmobile/smp-emev2.c | 7 ------- arch/arm/mach-shmobile/smp-r8a7779.c | 7 ------- arch/arm/mach-shmobile/smp-sh73a0.c | 7 ------- arch/arm/mach-socfpga/platsmp.c | 12 ------------ arch/arm/mach-spear13xx/platsmp.c | 8 -------- arch/arm/mach-tegra/platsmp.c | 8 -------- arch/arm/mach-ux500/platsmp.c | 8 -------- arch/arm/mach-virt/platsmp.c | 8 -------- arch/arm/plat-versatile/platsmp.c | 8 -------- drivers/irqchip/irq-gic.c | 28 +++++++++++++++++++++------- include/linux/irqchip/arm-gic.h | 1 - 17 files changed, 21 insertions(+), 131 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 60f7c5be057d..95e04bd5813f 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -75,13 +74,6 @@ static DEFINE_SPINLOCK(boot_lock); static void __cpuinit exynos_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - /* * let the primary processor know we're out of the * pen, then head off into the C entry point diff --git a/arch/arm/mach-highbank/platsmp.c b/arch/arm/mach-highbank/platsmp.c index 8797a7001720..a984573e0d02 100644 --- a/arch/arm/mach-highbank/platsmp.c +++ b/arch/arm/mach-highbank/platsmp.c @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -25,11 +24,6 @@ extern void secondary_startup(void); -static void __cpuinit highbank_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit highbank_boot_secondary(unsigned int cpu, struct task_struct *idle) { highbank_set_cpu_jump(cpu, secondary_startup); @@ -67,7 +61,6 @@ static void __init highbank_smp_prepare_cpus(unsigned int max_cpus) struct smp_operations highbank_smp_ops __initdata = { .smp_init_cpus = highbank_smp_init_cpus, .smp_prepare_cpus = highbank_smp_prepare_cpus, - .smp_secondary_init = highbank_secondary_init, .smp_boot_secondary = highbank_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = highbank_cpu_die, diff --git a/arch/arm/mach-imx/platsmp.c b/arch/arm/mach-imx/platsmp.c index 7c0b03f67b05..77e9a25ed0f6 100644 --- a/arch/arm/mach-imx/platsmp.c +++ b/arch/arm/mach-imx/platsmp.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include @@ -52,16 +51,6 @@ void imx_scu_standby_enable(void) writel_relaxed(val, scu_base); } -static void __cpuinit imx_secondary_init(unsigned int cpu) -{ - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); -} - static int __cpuinit imx_boot_secondary(unsigned int cpu, struct task_struct *idle) { imx_set_cpu_jump(cpu, v7_secondary_startup); @@ -96,7 +85,6 @@ static void __init imx_smp_prepare_cpus(unsigned int max_cpus) struct smp_operations imx_smp_ops __initdata = { .smp_init_cpus = imx_smp_init_cpus, .smp_prepare_cpus = imx_smp_prepare_cpus, - .smp_secondary_init = imx_secondary_init, .smp_boot_secondary = imx_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = imx_cpu_die, diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c index 42932865416a..00cdb0a5dac8 100644 --- a/arch/arm/mach-msm/platsmp.c +++ b/arch/arm/mach-msm/platsmp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -41,13 +40,6 @@ static inline int get_core_count(void) static void __cpuinit msm_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - /* * let the primary processor know we're out of the * pen, then head off into the C entry point diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index d9727218dd0a..e7a449758ab5 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -66,13 +66,6 @@ static void __cpuinit omap4_secondary_init(unsigned int cpu) omap_secure_dispatcher(OMAP4_PPA_CPU_ACTRL_SMP_INDEX, 4, 0, 0, 0, 0, 0); - /* - * If any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - /* * Synchronise with the boot thread. */ diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 4b788310f6a6..c7c92e78f0cf 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -48,13 +47,6 @@ void __init sirfsoc_map_scu(void) static void __cpuinit sirfsoc_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - /* * let the primary processor know we're out of the * pen, then head off into the C entry point diff --git a/arch/arm/mach-shmobile/smp-emev2.c b/arch/arm/mach-shmobile/smp-emev2.c index 953eb1f9388d..384e27dd3601 100644 --- a/arch/arm/mach-shmobile/smp-emev2.c +++ b/arch/arm/mach-shmobile/smp-emev2.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -85,11 +84,6 @@ static int __maybe_unused emev2_cpu_kill(unsigned int cpu) } -static void __cpuinit emev2_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit emev2_boot_secondary(unsigned int cpu, struct task_struct *idle) { cpu = cpu_logical_map(cpu); @@ -124,7 +118,6 @@ static void __init emev2_smp_init_cpus(void) struct smp_operations emev2_smp_ops __initdata = { .smp_init_cpus = emev2_smp_init_cpus, .smp_prepare_cpus = emev2_smp_prepare_cpus, - .smp_secondary_init = emev2_secondary_init, .smp_boot_secondary = emev2_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = emev2_cpu_kill, diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c index 3a4acf23edcf..994906560edd 100644 --- a/arch/arm/mach-shmobile/smp-r8a7779.c +++ b/arch/arm/mach-shmobile/smp-r8a7779.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -132,11 +131,6 @@ static int __maybe_unused r8a7779_cpu_kill(unsigned int cpu) } -static void __cpuinit r8a7779_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit r8a7779_boot_secondary(unsigned int cpu, struct task_struct *idle) { struct r8a7779_pm_ch *ch = NULL; @@ -186,7 +180,6 @@ static void __init r8a7779_smp_init_cpus(void) struct smp_operations r8a7779_smp_ops __initdata = { .smp_init_cpus = r8a7779_smp_init_cpus, .smp_prepare_cpus = r8a7779_smp_prepare_cpus, - .smp_secondary_init = r8a7779_secondary_init, .smp_boot_secondary = r8a7779_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = r8a7779_cpu_kill, diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c index acb46a94ccdf..d0f9aca22477 100644 --- a/arch/arm/mach-shmobile/smp-sh73a0.c +++ b/arch/arm/mach-shmobile/smp-sh73a0.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -59,11 +58,6 @@ static unsigned int __init sh73a0_get_core_count(void) return scu_get_core_count(scu_base); } -static void __cpuinit sh73a0_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct *idle) { cpu = cpu_logical_map(cpu); @@ -138,7 +132,6 @@ static void sh73a0_cpu_die(unsigned int cpu) struct smp_operations sh73a0_smp_ops __initdata = { .smp_init_cpus = sh73a0_smp_init_cpus, .smp_prepare_cpus = sh73a0_smp_prepare_cpus, - .smp_secondary_init = sh73a0_secondary_init, .smp_boot_secondary = sh73a0_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = sh73a0_cpu_kill, diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index 84c60fa8daa0..ca14d1d5ac7f 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -33,16 +32,6 @@ extern void __iomem *sys_manager_base_addr; extern void __iomem *rst_manager_base_addr; -static void __cpuinit socfpga_secondary_init(unsigned int cpu) -{ - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); -} - static int __cpuinit socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) { int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; @@ -109,7 +98,6 @@ static void socfpga_cpu_die(unsigned int cpu) struct smp_operations socfpga_smp_ops __initdata = { .smp_init_cpus = socfpga_smp_init_cpus, .smp_prepare_cpus = socfpga_smp_prepare_cpus, - .smp_secondary_init = socfpga_secondary_init, .smp_boot_secondary = socfpga_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = socfpga_cpu_die, diff --git a/arch/arm/mach-spear13xx/platsmp.c b/arch/arm/mach-spear13xx/platsmp.c index af4ade61cd95..551c69c9a228 100644 --- a/arch/arm/mach-spear13xx/platsmp.c +++ b/arch/arm/mach-spear13xx/platsmp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -27,13 +26,6 @@ static void __iomem *scu_base = IOMEM(VA_SCU_BASE); static void __cpuinit spear13xx_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - /* * let the primary processor know we're out of the * pen, then head off into the C entry point diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c index 2c6b3d55213b..9348d3c496a9 100644 --- a/arch/arm/mach-tegra/platsmp.c +++ b/arch/arm/mach-tegra/platsmp.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -44,13 +43,6 @@ static cpumask_t tegra_cpu_init_mask; static void __cpuinit tegra_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - cpumask_set_cpu(cpu, &tegra_cpu_init_mask); } diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index 18f7af339dc9..152b1309b9af 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -57,13 +56,6 @@ static DEFINE_SPINLOCK(boot_lock); static void __cpuinit ux500_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - /* * let the primary processor know we're out of the * pen, then head off into the C entry point diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c index 8badaabe70a1..f4143f5bfa5b 100644 --- a/arch/arm/mach-virt/platsmp.c +++ b/arch/arm/mach-virt/platsmp.c @@ -21,8 +21,6 @@ #include #include -#include - #include #include @@ -45,14 +43,8 @@ static int __cpuinit virt_boot_secondary(unsigned int cpu, return -ENODEV; } -static void __cpuinit virt_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - struct smp_operations __initdata virt_smp_ops = { .smp_init_cpus = virt_smp_init_cpus, .smp_prepare_cpus = virt_smp_prepare_cpus, - .smp_secondary_init = virt_secondary_init, .smp_boot_secondary = virt_boot_secondary, }; diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index f2ac15561778..1e1b2d769748 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -36,13 +35,6 @@ static DEFINE_SPINLOCK(boot_lock); void __cpuinit versatile_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - /* * let the primary processor know we're out of the * pen, then head off into the C entry point diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 974f77c887b8..add1fd84fc4b 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -699,6 +700,25 @@ static int gic_irq_domain_xlate(struct irq_domain *d, return 0; } +#ifdef CONFIG_SMP +static int __cpuinit gic_secondary_init(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + if (action == CPU_STARTING) + gic_cpu_init(&gic_data[0]); + return NOTIFY_OK; +} + +/* + * Notifier for enabling the GIC CPU interface. Set an arbitrarily high + * priority because the GIC needs to be up before the ARM generic timers. + */ +static struct notifier_block __cpuinitdata gic_cpu_notifier = { + .notifier_call = gic_secondary_init, + .priority = 100, +}; +#endif + const struct irq_domain_ops gic_irq_domain_ops = { .map = gic_irq_domain_map, .xlate = gic_irq_domain_xlate, @@ -789,6 +809,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, #ifdef CONFIG_SMP set_smp_cross_call(gic_raise_softirq); + register_cpu_notifier(&gic_cpu_notifier); #endif set_handle_irq(gic_handle_irq); @@ -799,13 +820,6 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, gic_pm_init(gic); } -void __cpuinit gic_secondary_init(unsigned int gic_nr) -{ - BUG_ON(gic_nr >= MAX_GIC_NR); - - gic_cpu_init(&gic_data[gic_nr]); -} - #ifdef CONFIG_OF static int gic_cnt __initdata = 0; diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 3fd8e4290a1c..3e203eb23cc7 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -65,7 +65,6 @@ extern struct irq_chip gic_arch_extn; void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); -void gic_secondary_init(unsigned int); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); static inline void gic_init(unsigned int nr, int start, -- cgit From 6752c8db8e0cfedb44ba62806dd15b383ed64000 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Mon, 25 Mar 2013 08:26:16 +0000 Subject: firewire net, ipv4 arp: Extend hardware address and remove driver-level packet inspection. Inspection of upper layer protocol is considered harmful, especially if it is about ARP or other stateful upper layer protocol; driver cannot (and should not) have full state of them. IPv4 over Firewire module used to inspect ARP (both in sending path and in receiving path), and record peer's GUID, max packet size, max speed and fifo address. This patch removes such inspection by extending our "hardware address" definition to include other information as well: max packet size, max speed and fifo. By doing this, The neighbour module in networking subsystem can cache them. Note: As we have started ignoring sspd and max_rec in ARP/NDP, those information will not be used in the driver when sending. When a packet is being sent, the IP layer fills our pseudo header with the extended "hardware address", including GUID and fifo. The driver can look-up node-id (the real but rather volatile low-level address) by GUID, and then the module can send the packet to the wire using parameters provided in the extendedn hardware address. This approach is realistic because IP over IEEE1394 (RFC2734) and IPv6 over IEEE1394 (RFC3146) share same "hardware address" format in their address resolution protocols. Here, extended "hardware address" is defined as follows: union fwnet_hwaddr { u8 u[16]; struct { __be64 uniq_id; /* EUI-64 */ u8 max_rec; /* max packet size */ u8 sspd; /* max speed */ __be16 fifo_hi; /* hi 16bits of FIFO addr */ __be32 fifo_lo; /* lo 32bits of FIFO addr */ } __packed uc; }; Note that Hardware address is declared as union, so that we can map full IP address into this, when implementing MCAP (Multicast Cannel Allocation Protocol) for IPv6, but IP and ARP subsystem do not need to know this format in detail. One difference between original ARP (RFC826) and 1394 ARP (RFC2734) is that 1394 ARP Request/Reply do not contain the target hardware address field (aka ar$tha). This difference is handled in the ARP subsystem. CC: Stephan Gatzka Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- drivers/firewire/net.c | 153 +++++++++---------------------------------------- include/linux/if_arp.h | 12 +++- include/net/firewire.h | 25 ++++++++ net/ipv4/arp.c | 27 +++++++-- 4 files changed, 82 insertions(+), 135 deletions(-) create mode 100644 include/net/firewire.h (limited to 'include/linux') diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index cb8aa865ff88..790017eb5051 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -28,6 +28,7 @@ #include #include +#include /* rx limits */ #define FWNET_MAX_FRAGMENTS 30 /* arbitrary, > TX queue depth */ @@ -57,33 +58,6 @@ #define RFC2374_HDR_LASTFRAG 2 /* last fragment */ #define RFC2374_HDR_INTFRAG 3 /* interior fragment */ -#define RFC2734_HW_ADDR_LEN 16 - -struct rfc2734_arp { - __be16 hw_type; /* 0x0018 */ - __be16 proto_type; /* 0x0806 */ - u8 hw_addr_len; /* 16 */ - u8 ip_addr_len; /* 4 */ - __be16 opcode; /* ARP Opcode */ - /* Above is exactly the same format as struct arphdr */ - - __be64 s_uniq_id; /* Sender's 64bit EUI */ - u8 max_rec; /* Sender's max packet size */ - u8 sspd; /* Sender's max speed */ - __be16 fifo_hi; /* hi 16bits of sender's FIFO addr */ - __be32 fifo_lo; /* lo 32bits of sender's FIFO addr */ - __be32 sip; /* Sender's IP Address */ - __be32 tip; /* IP Address of requested hw addr */ -} __packed; - -/* This header format is specific to this driver implementation. */ -#define FWNET_ALEN 8 -#define FWNET_HLEN 10 -struct fwnet_header { - u8 h_dest[FWNET_ALEN]; /* destination address */ - __be16 h_proto; /* packet type ID field */ -} __packed; - static bool fwnet_hwaddr_is_multicast(u8 *ha) { return !!(*ha & 1); @@ -196,8 +170,6 @@ struct fwnet_peer { struct list_head peer_link; struct fwnet_device *dev; u64 guid; - u64 fifo; - __be32 ip; /* guarded by dev->lock */ struct list_head pd_list; /* received partial datagrams */ @@ -226,6 +198,15 @@ struct fwnet_packet_task { u8 enqueued; }; +/* + * Get fifo address embedded in hwaddr + */ +static __u64 fwnet_hwaddr_fifo(union fwnet_hwaddr *ha) +{ + return (u64)get_unaligned_be16(&ha->uc.fifo_hi) << 32 + | get_unaligned_be32(&ha->uc.fifo_lo); +} + /* * saddr == NULL means use device source address. * daddr == NULL means leave destination address (eg unresolved arp). @@ -518,7 +499,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, bool is_broadcast, u16 ether_type) { struct fwnet_device *dev; - static const __be64 broadcast_hw = cpu_to_be64(~0ULL); int status; __be64 guid; @@ -537,76 +517,11 @@ static int fwnet_finish_incoming_packet(struct net_device *net, /* * Parse the encapsulation header. This actually does the job of - * converting to an ethernet frame header, as well as arp - * conversion if needed. ARP conversion is easier in this - * direction, since we are using ethernet as our backend. - */ - /* - * If this is an ARP packet, convert it. First, we want to make - * use of some of the fields, since they tell us a little bit - * about the sending machine. + * converting to an ethernet-like pseudo frame header. */ - if (ether_type == ETH_P_ARP) { - struct rfc2734_arp *arp1394; - struct arphdr *arp; - unsigned char *arp_ptr; - u64 fifo_addr; - u64 peer_guid; - struct fwnet_peer *peer; - unsigned long flags; - - arp1394 = (struct rfc2734_arp *)skb->data; - arp = (struct arphdr *)skb->data; - arp_ptr = (unsigned char *)(arp + 1); - peer_guid = get_unaligned_be64(&arp1394->s_uniq_id); - fifo_addr = (u64)get_unaligned_be16(&arp1394->fifo_hi) << 32 - | get_unaligned_be32(&arp1394->fifo_lo); - - spin_lock_irqsave(&dev->lock, flags); - peer = fwnet_peer_find_by_guid(dev, peer_guid); - if (peer) { - peer->fifo = fifo_addr; - peer->ip = arp1394->sip; - } - spin_unlock_irqrestore(&dev->lock, flags); - - if (!peer) { - dev_notice(&net->dev, - "no peer for ARP packet from %016llx\n", - (unsigned long long)peer_guid); - goto no_peer; - } - - /* - * Now that we're done with the 1394 specific stuff, we'll - * need to alter some of the data. Believe it or not, all - * that needs to be done is sender_IP_address needs to be - * moved, the destination hardware address get stuffed - * in and the hardware address length set to 8. - * - * IMPORTANT: The code below overwrites 1394 specific data - * needed above so keep the munging of the data for the - * higher level IP stack last. - */ - - arp->ar_hln = 8; - /* skip over sender unique id */ - arp_ptr += arp->ar_hln; - /* move sender IP addr */ - put_unaligned(arp1394->sip, (u32 *)arp_ptr); - /* skip over sender IP addr */ - arp_ptr += arp->ar_pln; - - if (arp->ar_op == htons(ARPOP_REQUEST)) - memset(arp_ptr, 0, sizeof(u64)); - else - memcpy(arp_ptr, net->dev_addr, sizeof(u64)); - } - - /* Now add the ethernet header. */ guid = cpu_to_be64(dev->card->guid); if (dev_hard_header(skb, net, ether_type, - is_broadcast ? &broadcast_hw : &guid, + is_broadcast ? net->broadcast : net->dev_addr, NULL, skb->len) >= 0) { struct fwnet_header *eth; u16 *rawp; @@ -649,7 +564,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, return 0; - no_peer: err: net->stats.rx_errors++; net->stats.rx_dropped++; @@ -1355,11 +1269,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) ptask->dest_node = IEEE1394_ALL_NODES; ptask->speed = SCODE_100; } else { - __be64 guid = get_unaligned((__be64 *)hdr_buf.h_dest); + union fwnet_hwaddr *ha = (union fwnet_hwaddr *)hdr_buf.h_dest; + __be64 guid = get_unaligned(&ha->uc.uniq_id); u8 generation; peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid)); - if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR) + if (!peer) goto fail; generation = peer->generation; @@ -1367,32 +1282,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) max_payload = peer->max_payload; datagram_label_ptr = &peer->datagram_label; - ptask->fifo_addr = peer->fifo; + ptask->fifo_addr = fwnet_hwaddr_fifo(ha); ptask->generation = generation; ptask->dest_node = dest_node; ptask->speed = peer->speed; } - /* If this is an ARP packet, convert it */ - if (proto == htons(ETH_P_ARP)) { - struct arphdr *arp = (struct arphdr *)skb->data; - unsigned char *arp_ptr = (unsigned char *)(arp + 1); - struct rfc2734_arp *arp1394 = (struct rfc2734_arp *)skb->data; - __be32 ipaddr; - - ipaddr = get_unaligned((__be32 *)(arp_ptr + FWNET_ALEN)); - - arp1394->hw_addr_len = RFC2734_HW_ADDR_LEN; - arp1394->max_rec = dev->card->max_receive; - arp1394->sspd = dev->card->link_speed; - - put_unaligned_be16(dev->local_fifo >> 32, - &arp1394->fifo_hi); - put_unaligned_be32(dev->local_fifo & 0xffffffff, - &arp1394->fifo_lo); - put_unaligned(ipaddr, &arp1394->sip); - } - ptask->hdr.w0 = 0; ptask->hdr.w1 = 0; ptask->skb = skb; @@ -1507,8 +1402,6 @@ static int fwnet_add_peer(struct fwnet_device *dev, peer->dev = dev; peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4]; - peer->fifo = FWNET_NO_FIFO_ADDR; - peer->ip = 0; INIT_LIST_HEAD(&peer->pd_list); peer->pdg_size = 0; peer->datagram_label = 0; @@ -1538,6 +1431,7 @@ static int fwnet_probe(struct device *_dev) struct fwnet_device *dev; unsigned max_mtu; int ret; + union fwnet_hwaddr *ha; mutex_lock(&fwnet_device_mutex); @@ -1582,8 +1476,15 @@ static int fwnet_probe(struct device *_dev) net->mtu = min(1500U, max_mtu); /* Set our hardware address while we're at it */ - put_unaligned_be64(card->guid, net->dev_addr); - put_unaligned_be64(~0ULL, net->broadcast); + ha = (union fwnet_hwaddr *)net->dev_addr; + put_unaligned_be64(card->guid, &ha->uc.uniq_id); + ha->uc.max_rec = dev->card->max_receive; + ha->uc.sspd = dev->card->link_speed; + put_unaligned_be16(dev->local_fifo >> 32, &ha->uc.fifo_hi); + put_unaligned_be32(dev->local_fifo & 0xffffffff, &ha->uc.fifo_lo); + + memset(net->broadcast, -1, net->addr_len); + ret = register_netdev(net); if (ret) goto out; @@ -1632,8 +1533,6 @@ static int fwnet_remove(struct device *_dev) mutex_lock(&fwnet_device_mutex); net = dev->netdev; - if (net && peer->ip) - arp_invalidate(net, peer->ip); fwnet_remove_peer(peer, dev); diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 89b4614a4722..f563907ed776 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -33,7 +33,15 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) static inline int arp_hdr_len(struct net_device *dev) { - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + /* ARP header, device address and 2 IP addresses */ + return sizeof(struct arphdr) + dev->addr_len + sizeof(u32) * 2; +#endif + default: + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ + return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + } } #endif /* _LINUX_IF_ARP_H */ diff --git a/include/net/firewire.h b/include/net/firewire.h new file mode 100644 index 000000000000..31bcbfe7a220 --- /dev/null +++ b/include/net/firewire.h @@ -0,0 +1,25 @@ +#ifndef _NET_FIREWIRE_H +#define _NET_FIREWIRE_H + +/* Pseudo L2 address */ +#define FWNET_ALEN 16 +union fwnet_hwaddr { + u8 u[FWNET_ALEN]; + /* "Hardware address" defined in RFC2734/RF3146 */ + struct { + __be64 uniq_id; /* EUI-64 */ + u8 max_rec; /* max packet size */ + u8 sspd; /* max speed */ + __be16 fifo_hi; /* hi 16bits of FIFO addr */ + __be32 fifo_lo; /* lo 32bits of FIFO addr */ + } __packed uc; +}; + +/* Pseudo L2 Header */ +#define FWNET_HLEN 18 +struct fwnet_header { + u8 h_dest[FWNET_ALEN]; /* destination address */ + __be16 h_proto; /* packet type ID field */ +} __packed; + +#endif diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fea4929f6200..247ec1951c35 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp_ptr += dev->addr_len; memcpy(arp_ptr, &src_ip, 4); arp_ptr += 4; - if (target_hw != NULL) - memcpy(arp_ptr, target_hw, dev->addr_len); - else - memset(arp_ptr, 0, dev->addr_len); - arp_ptr += dev->addr_len; + + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + if (target_hw != NULL) + memcpy(arp_ptr, target_hw, dev->addr_len); + else + memset(arp_ptr, 0, dev->addr_len); + arp_ptr += dev->addr_len; + } memcpy(arp_ptr, &dest_ip, 4); return skb; @@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb) arp_ptr += dev->addr_len; memcpy(&sip, arp_ptr, 4); arp_ptr += 4; - arp_ptr += dev->addr_len; + switch (dev_type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + arp_ptr += dev->addr_len; + } memcpy(&tip, arp_ptr, 4); /* * Check for bad requests for 127.x.x.x and requests for multicast -- cgit From 0e5e4f0e56aca0df1d5648db0be9028bd573b25c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 9 Nov 2012 16:33:05 -0700 Subject: NVMe: Add discard support for capable devices This adds discard support to block queues if the nvme device is capable of deallocating blocks as indicated by the controller's optional command support. A discard flagged bio request will submit an NVMe deallocate Data Set Management command for the requested blocks. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/nvme.h | 32 ++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 9dcefe40380b..26e266072079 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -80,6 +80,7 @@ struct nvme_dev { char model[40]; char firmware_rev[8]; u32 max_hw_sectors; + u16 oncs; }; /* @@ -510,6 +511,44 @@ static int nvme_map_bio(struct device *dev, struct nvme_iod *iod, return length; } +/* + * We reuse the small pool to allocate the 16-byte range here as it is not + * worth having a special pool for these or additional cases to handle freeing + * the iod. + */ +static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, + struct bio *bio, struct nvme_iod *iod, int cmdid) +{ + struct nvme_dsm_range *range; + struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; + + range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC, + &iod->first_dma); + if (!range) + return -ENOMEM; + + iod_list(iod)[0] = (__le64 *)range; + iod->npages = 0; + + range->cattr = cpu_to_le32(0); + range->nlb = cpu_to_le32(bio->bi_size >> ns->lba_shift); + range->slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); + + memset(cmnd, 0, sizeof(*cmnd)); + cmnd->dsm.opcode = nvme_cmd_dsm; + cmnd->dsm.command_id = cmdid; + cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); + cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); + cmnd->dsm.nr = 0; + cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); + + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + writel(nvmeq->sq_tail, nvmeq->q_db); + + return 0; +} + static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, int cmdid) { @@ -567,6 +606,12 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, if (unlikely(cmdid < 0)) goto free_iod; + if (bio->bi_rw & REQ_DISCARD) { + result = nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); + if (result) + goto free_cmdid; + return result; + } if ((bio->bi_rw & REQ_FLUSH) && !psegs) return nvme_submit_flush(nvmeq, ns, cmdid); @@ -1347,6 +1392,16 @@ static void nvme_put_ns_idx(int index) spin_unlock(&dev_list_lock); } +static void nvme_config_discard(struct nvme_ns *ns) +{ + u32 logical_block_size = queue_logical_block_size(ns->queue); + ns->queue->limits.discard_zeroes_data = 0; + ns->queue->limits.discard_alignment = logical_block_size; + ns->queue->limits.discard_granularity = logical_block_size; + ns->queue->limits.max_discard_sectors = 0xffffffff; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); +} + static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, struct nvme_id_ns *id, struct nvme_lba_range_type *rt) { @@ -1366,7 +1421,6 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, ns->queue->queue_flags = QUEUE_FLAG_DEFAULT; queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); -/* queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); */ blk_queue_make_request(ns->queue, nvme_make_request); ns->dev = dev; ns->queue->queuedata = ns; @@ -1392,6 +1446,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid); set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); + if (dev->oncs & NVME_CTRL_ONCS_DSM) + nvme_config_discard(ns); + return ns; out_free_queue: @@ -1520,6 +1577,7 @@ static int nvme_dev_add(struct nvme_dev *dev) ctrl = mem; nn = le32_to_cpup(&ctrl->nn); + dev->oncs = le16_to_cpup(&ctrl->oncs); memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fa3b0b9b071..bde44c1fd213 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -107,6 +107,12 @@ struct nvme_id_ctrl { __u8 vs[1024]; }; +enum { + NVME_CTRL_ONCS_COMPARE = 1 << 0, + NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + NVME_CTRL_ONCS_DSM = 1 << 2, +}; + struct nvme_lbaf { __le16 ms; __u8 ds; @@ -246,6 +252,31 @@ enum { NVME_RW_DSM_COMPRESSED = 1 << 7, }; +struct nvme_dsm_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 nr; + __le32 attributes; + __u32 rsvd12[4]; +}; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +struct nvme_dsm_range { + __le32 cattr; + __le32 nlb; + __le64 slba; +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -372,6 +403,7 @@ struct nvme_command { struct nvme_create_sq create_sq; struct nvme_delete_queue delete_queue; struct nvme_download_firmware dlfw; + struct nvme_dsm_cmd dsm; }; }; -- cgit From ece70094f6ab2107d4313fa1802b13dab0234ac5 Mon Sep 17 00:00:00 2001 From: Prashant Gaikwad Date: Wed, 20 Mar 2013 17:30:34 +0530 Subject: clk: Add composite clock type Not all clocks are required to be decomposed into basic clock types but at the same time want to use the functionality provided by these basic clock types instead of duplicating. For example, Tegra SoC has ~100 clocks which can be decomposed into Mux -> Div -> Gate clock types making the clock count to ~300. Also, parent change operation can not be performed on gate clock which forces to use mux clock in driver if want to change the parent. Instead aggregate the basic clock types functionality into one clock and just use this clock for all operations. This clock type re-uses the functionality of basic clock types and not limited to basic clock types but any hardware-specific implementation. Signed-off-by: Prashant Gaikwad Signed-off-by: Mike Turquette --- drivers/clk/Makefile | 1 + drivers/clk/clk-composite.c | 201 +++++++++++++++++++++++++++++++++++++++++++ include/linux/clk-provider.h | 31 +++++++ 3 files changed, 233 insertions(+) create mode 100644 drivers/clk/clk-composite.c (limited to 'include/linux') diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 1c22f9dc721d..41cb123a2d02 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_COMMON_CLK) += clk-fixed-factor.o obj-$(CONFIG_COMMON_CLK) += clk-fixed-rate.o obj-$(CONFIG_COMMON_CLK) += clk-gate.o obj-$(CONFIG_COMMON_CLK) += clk-mux.o +obj-$(CONFIG_COMMON_CLK) += clk-composite.o # SoCs specific obj-$(CONFIG_ARCH_BCM2835) += clk-bcm2835.o diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c new file mode 100644 index 000000000000..097dee4fd209 --- /dev/null +++ b/drivers/clk/clk-composite.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2013 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#define to_clk_composite(_hw) container_of(_hw, struct clk_composite, hw) + +static u8 clk_composite_get_parent(struct clk_hw *hw) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *mux_ops = composite->mux_ops; + struct clk_hw *mux_hw = composite->mux_hw; + + mux_hw->clk = hw->clk; + + return mux_ops->get_parent(mux_hw); +} + +static int clk_composite_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *mux_ops = composite->mux_ops; + struct clk_hw *mux_hw = composite->mux_hw; + + mux_hw->clk = hw->clk; + + return mux_ops->set_parent(mux_hw, index); +} + +static unsigned long clk_composite_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *div_ops = composite->div_ops; + struct clk_hw *div_hw = composite->div_hw; + + div_hw->clk = hw->clk; + + return div_ops->recalc_rate(div_hw, parent_rate); +} + +static long clk_composite_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *div_ops = composite->div_ops; + struct clk_hw *div_hw = composite->div_hw; + + div_hw->clk = hw->clk; + + return div_ops->round_rate(div_hw, rate, prate); +} + +static int clk_composite_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *div_ops = composite->div_ops; + struct clk_hw *div_hw = composite->div_hw; + + div_hw->clk = hw->clk; + + return div_ops->set_rate(div_hw, rate, parent_rate); +} + +static int clk_composite_is_enabled(struct clk_hw *hw) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *gate_ops = composite->gate_ops; + struct clk_hw *gate_hw = composite->gate_hw; + + gate_hw->clk = hw->clk; + + return gate_ops->is_enabled(gate_hw); +} + +static int clk_composite_enable(struct clk_hw *hw) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *gate_ops = composite->gate_ops; + struct clk_hw *gate_hw = composite->gate_hw; + + gate_hw->clk = hw->clk; + + return gate_ops->enable(gate_hw); +} + +static void clk_composite_disable(struct clk_hw *hw) +{ + struct clk_composite *composite = to_clk_composite(hw); + const struct clk_ops *gate_ops = composite->gate_ops; + struct clk_hw *gate_hw = composite->gate_hw; + + gate_hw->clk = hw->clk; + + gate_ops->disable(gate_hw); +} + +struct clk *clk_register_composite(struct device *dev, const char *name, + const char **parent_names, int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *div_hw, const struct clk_ops *div_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags) +{ + struct clk *clk; + struct clk_init_data init; + struct clk_composite *composite; + struct clk_ops *clk_composite_ops; + + composite = kzalloc(sizeof(*composite), GFP_KERNEL); + if (!composite) { + pr_err("%s: could not allocate composite clk\n", __func__); + return ERR_PTR(-ENOMEM); + } + + init.name = name; + init.flags = flags | CLK_IS_BASIC; + init.parent_names = parent_names; + init.num_parents = num_parents; + + clk_composite_ops = &composite->ops; + + if (mux_hw && mux_ops) { + if (!mux_ops->get_parent || !mux_ops->set_parent) { + clk = ERR_PTR(-EINVAL); + goto err; + } + + composite->mux_hw = mux_hw; + composite->mux_ops = mux_ops; + clk_composite_ops->get_parent = clk_composite_get_parent; + clk_composite_ops->set_parent = clk_composite_set_parent; + } + + if (div_hw && div_ops) { + if (!div_ops->recalc_rate || !div_ops->round_rate || + !div_ops->set_rate) { + clk = ERR_PTR(-EINVAL); + goto err; + } + + composite->div_hw = div_hw; + composite->div_ops = div_ops; + clk_composite_ops->recalc_rate = clk_composite_recalc_rate; + clk_composite_ops->round_rate = clk_composite_round_rate; + clk_composite_ops->set_rate = clk_composite_set_rate; + } + + if (gate_hw && gate_ops) { + if (!gate_ops->is_enabled || !gate_ops->enable || + !gate_ops->disable) { + clk = ERR_PTR(-EINVAL); + goto err; + } + + composite->gate_hw = gate_hw; + composite->gate_ops = gate_ops; + clk_composite_ops->is_enabled = clk_composite_is_enabled; + clk_composite_ops->enable = clk_composite_enable; + clk_composite_ops->disable = clk_composite_disable; + } + + init.ops = clk_composite_ops; + composite->hw.init = &init; + + clk = clk_register(dev, &composite->hw); + if (IS_ERR(clk)) + goto err; + + if (composite->mux_hw) + composite->mux_hw->clk = clk; + + if (composite->div_hw) + composite->div_hw->clk = clk; + + if (composite->gate_hw) + composite->gate_hw->clk = clk; + + return clk; + +err: + kfree(composite); + return clk; +} diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 63ba3b740794..1f0352802794 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -342,6 +342,37 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +/*** + * struct clk_composite - aggregate clock of mux, divider and gate clocks + * + * @hw: handle between common and hardware-specific interfaces + * @mux_hw: handle between composite and hardware-specifix mux clock + * @div_hw: handle between composite and hardware-specifix divider clock + * @gate_hw: handle between composite and hardware-specifix gate clock + * @mux_ops: clock ops for mux + * @div_ops: clock ops for divider + * @gate_ops: clock ops for gate + */ +struct clk_composite { + struct clk_hw hw; + struct clk_ops ops; + + struct clk_hw *mux_hw; + struct clk_hw *div_hw; + struct clk_hw *gate_hw; + + const struct clk_ops *mux_ops; + const struct clk_ops *div_ops; + const struct clk_ops *gate_ops; +}; + +struct clk *clk_register_composite(struct device *dev, const char *name, + const char **parent_names, int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *div_hw, const struct clk_ops *div_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags); + /** * clk_register - allocate a new clock, register it and return an opaque cookie * @dev: device that is registering this clock -- cgit From 3a54aaa0a3ddb2cf2ec1b94a94024e9a8a8af962 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:26 +0100 Subject: perf/x86: Improve sysfs event mapping with event string This patch extends Jiri's changes to make generic events mapping visible via sysfs. The patch extends the mechanism to non-generic events by allowing the mappings to be hardcoded in strings. This mechanism will be used by the PEBS-LL patch later on. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar [ fixed up conflict with 2663960 "perf: Make EVENT_ATTR global" ] Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++++-------- arch/x86/kernel/cpu/perf_event.h | 17 +++++++++++++++++ include/linux/perf_event.h | 1 + 3 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c886dc8c63f8..6e8ab0427041 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = { */ static void __init filter_events(struct attribute **attrs) { + struct device_attribute *d; + struct perf_pmu_events_attr *pmu_attr; int i, j; for (i = 0; attrs[i]; i++) { + d = (struct device_attribute *)attrs[i]; + pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); + /* str trumps id */ + if (pmu_attr->event_str) + continue; if (x86_pmu.event_map(i)) continue; @@ -1361,17 +1368,14 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at { struct perf_pmu_events_attr *pmu_attr = \ container_of(attr, struct perf_pmu_events_attr, attr); - u64 config = x86_pmu.event_map(pmu_attr->id); - return x86_pmu.events_sysfs_show(page, config); -} -#define EVENT_VAR(_id) event_attr_##_id -#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + /* string trumps id */ + if (pmu_attr->event_str) + return sprintf(page, "%s", pmu_attr->event_str); -#define EVENT_ATTR(_name, _id) \ - PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ - events_sysfs_show) + return x86_pmu.events_sysfs_show(page, config); +} EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 95152c12a8d9..b1518eed5f99 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -422,6 +422,23 @@ do { \ #define ERF_NO_HT_SHARING 1 #define ERF_HAS_RSP_1 2 +#define EVENT_VAR(_id) event_attr_##_id +#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + +#define EVENT_ATTR(_name, _id) \ +static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = PERF_COUNT_HW_##_id, \ + .event_str = NULL, \ +}; + +#define EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ +}; + extern struct x86_pmu x86_pmu __read_mostly; DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8737e1cee8b2..1c592114c437 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -809,6 +809,7 @@ do { \ struct perf_pmu_events_attr { struct device_attribute attr; u64 id; + const char *event_str; }; #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ -- cgit From 85467136cdcc674f30beb0e5b79f048fe3a6a76f Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 27 Feb 2013 17:06:45 -0700 Subject: PCI: Add PCI_BUS_NUM() and PCI_DEVID() interfaces PCI defines PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() interfaces; however, it doesn't have interfaces to return PCI bus and PCI device id. Drivers (AMD IOMMU, and AER) implement module specific definitions for PCI_BUS() and AMD_IOMMU driver also has a module specific interface to calculate PCI device id from bus number and devfn. Add PCI_BUS_NUM and PCI_DEVID interfaces to return PCI bus number and PCI device id respectively to avoid the need for duplicate definitions in other modules. AER driver code and AMD IOMMU driver define PCI_BUS. AMD IOMMU driver defines an interface to calculate device id from bus number, and devfn pair. PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() interfaces are exported to user-space via uapi/linux/pci.h. However, in the interest to keep the new interfaces as kernel only and not export them to user-space unnecessarily, added them to linux/pci.h instead. Signed-off-by: Shuah Khan Signed-off-by: Bjorn Helgaas Acked-by: Joerg Roedel --- include/linux/pci.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 2461033a7987..849a336e149c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -35,6 +35,21 @@ /* Include the ID list */ #include +/* + * The PCI interface treats multi-function devices as independent + * devices. The slot/function address of each device is encoded + * in a single byte as follows: + * + * 7:3 = slot + * 2:0 = function + * PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() are defined uapi/linux/pci.h + * In the interest of not exposing interfaces to user-space unnecessarily, + * the following kernel only defines are being added here. + */ +#define PCI_DEVID(bus, devfn) ((((u16)bus) << 8) | devfn) +/* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */ +#define PCI_BUS_NUM(x) (((x) >> 8) & 0xff) + /* pci_slot represents a physical slot */ struct pci_slot { struct pci_bus *bus; /* The bus this slot is on */ -- cgit From 13c3b0fcc8e33ba49f252378f6e7290b146042af Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 4 Mar 2013 18:40:57 -0700 Subject: NVMe: Move structures & definitions to header file nvme-scsi.c uses several data structures and definitions that were previously private to nvme-core.c. Move the definitions to nvme.h, protected by __KERNEL__. Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 55 ------------------------------------------- include/linux/nvme.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 26e266072079..1f98040cf677 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -46,7 +46,6 @@ #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) #define NVME_MINORS 64 -#define NVME_IO_TIMEOUT (5 * HZ) #define ADMIN_TIMEOUT (60 * HZ) static int nvme_major; @@ -59,44 +58,6 @@ static DEFINE_SPINLOCK(dev_list_lock); static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; -/* - * Represents an NVM Express device. Each nvme_dev is a PCI function. - */ -struct nvme_dev { - struct list_head node; - struct nvme_queue **queues; - u32 __iomem *dbs; - struct pci_dev *pci_dev; - struct dma_pool *prp_page_pool; - struct dma_pool *prp_small_pool; - int instance; - int queue_count; - int db_stride; - u32 ctrl_config; - struct msix_entry *entry; - struct nvme_bar __iomem *bar; - struct list_head namespaces; - char serial[20]; - char model[40]; - char firmware_rev[8]; - u32 max_hw_sectors; - u16 oncs; -}; - -/* - * An NVM Express namespace is equivalent to a SCSI LUN - */ -struct nvme_ns { - struct list_head list; - - struct nvme_dev *dev; - struct request_queue *queue; - struct gendisk *disk; - - int ns_id; - int lba_shift; -}; - /* * An NVM Express queue. Each device has at least two (one for admin * commands and one for I/O commands). @@ -295,22 +256,6 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) return 0; } -/* - * The nvme_iod describes the data in an I/O, including the list of PRP - * entries. You can't see it in this data structure because C doesn't let - * me express that. Use nvme_alloc_iod to ensure there's enough space - * allocated to store the PRP list. - */ -struct nvme_iod { - void *private; /* For the use of the submitter of the I/O */ - int npages; /* In the PRP list. 0 means small pool in use */ - int offset; /* Of PRP list */ - int nents; /* Used in scatterlist */ - int length; /* Of data, in bytes */ - dma_addr_t first_dma; - struct scatterlist sg[0]; -}; - static __le64 **iod_list(struct nvme_iod *iod) { return ((void *)iod) + iod->offset; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bde44c1fd213..6f899add14ab 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -493,4 +493,64 @@ struct nvme_admin_cmd { #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) +#ifdef __KERNEL__ +#include + +#define NVME_IO_TIMEOUT (5 * HZ) + +/* + * Represents an NVM Express device. Each nvme_dev is a PCI function. + */ +struct nvme_dev { + struct list_head node; + struct nvme_queue **queues; + u32 __iomem *dbs; + struct pci_dev *pci_dev; + struct dma_pool *prp_page_pool; + struct dma_pool *prp_small_pool; + int instance; + int queue_count; + int db_stride; + u32 ctrl_config; + struct msix_entry *entry; + struct nvme_bar __iomem *bar; + struct list_head namespaces; + char serial[20]; + char model[40]; + char firmware_rev[8]; + u32 max_hw_sectors; + u16 oncs; +}; + +/* + * An NVM Express namespace is equivalent to a SCSI LUN + */ +struct nvme_ns { + struct list_head list; + + struct nvme_dev *dev; + struct request_queue *queue; + struct gendisk *disk; + + int ns_id; + int lba_shift; +}; + +/* + * The nvme_iod describes the data in an I/O, including the list of PRP + * entries. You can't see it in this data structure because C doesn't let + * me express that. Use nvme_alloc_iod to ensure there's enough space + * allocated to store the PRP list. + */ +struct nvme_iod { + void *private; /* For the use of the submitter of the I/O */ + int npages; /* In the PRP list. 0 means small pool in use */ + int offset; /* Of PRP list */ + int nents; /* Used in scatterlist */ + int length; /* Of data, in bytes */ + dma_addr_t first_dma; + struct scatterlist sg[0]; +}; +#endif + #endif /* _LINUX_NVME_H */ -- cgit From f8ebf8409abfdaeeb8c847381629a2a8b8e3d816 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 27 Mar 2013 07:13:41 -0400 Subject: NVMe: Add definitions for format command The SCSI emulation has the ability to send format commands, so we need to add the definition of the command. Also add a missing error code. Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 1 + include/linux/nvme.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 1f98040cf677..d0cfb85d5582 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -93,6 +93,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(sizeof(struct nvme_features) != 64); + BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6f899add14ab..f1974cab60cf 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -393,6 +393,16 @@ struct nvme_download_firmware { __u32 rsvd12[4]; }; +struct nvme_format_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[4]; + __le32 cdw10; + __u32 rsvd11[5]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -403,6 +413,7 @@ struct nvme_command { struct nvme_create_sq create_sq; struct nvme_delete_queue delete_queue; struct nvme_download_firmware dlfw; + struct nvme_format_cmd format; struct nvme_dsm_cmd dsm; }; }; @@ -420,6 +431,7 @@ enum { NVME_SC_FUSED_FAIL = 0x9, NVME_SC_FUSED_MISSING = 0xa, NVME_SC_INVALID_NS = 0xb, + NVME_SC_CMD_SEQ_ERROR = 0xc, NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, NVME_SC_NS_NOT_READY = 0x82, -- cgit From e05a4f4fc9ddf7a8633c368786a115b3111d36fd Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Mon, 25 Mar 2013 21:12:27 +0100 Subject: Remove spurious _H suffixes from ifdef comments Signed-off-by: Paul Bolle Signed-off-by: Jiri Kosina --- include/linux/evm.h | 2 +- include/linux/ima.h | 4 ++-- include/linux/integrity.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/evm.h b/include/linux/evm.h index 9fc13a760928..1fcb88ca88de 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -96,5 +96,5 @@ static inline int evm_inode_init_security(struct inode *inode, return 0; } -#endif /* CONFIG_EVM_H */ +#endif /* CONFIG_EVM */ #endif /* LINUX_EVM_H */ diff --git a/include/linux/ima.h b/include/linux/ima.h index 86c361e947b9..1b7f268cddce 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -46,7 +46,7 @@ static inline int ima_module_check(struct file *file) return 0; } -#endif /* CONFIG_IMA_H */ +#endif /* CONFIG_IMA */ #ifdef CONFIG_IMA_APPRAISE extern void ima_inode_post_setattr(struct dentry *dentry); @@ -72,5 +72,5 @@ static inline int ima_inode_removexattr(struct dentry *dentry, { return 0; } -#endif /* CONFIG_IMA_APPRAISE_H */ +#endif /* CONFIG_IMA_APPRAISE */ #endif /* _LINUX_IMA_H */ diff --git a/include/linux/integrity.h b/include/linux/integrity.h index 66c5fe9550a5..83222cebd47b 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -36,5 +36,5 @@ static inline void integrity_inode_free(struct inode *inode) { return; } -#endif /* CONFIG_INTEGRITY_H */ +#endif /* CONFIG_INTEGRITY */ #endif /* _LINUX_INTEGRITY_H */ -- cgit From 4f22decf9b6329acfe59091c5cba6b378b9b31db Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 22 Mar 2013 18:38:28 +0100 Subject: HID: input: don't register unmapped input devices There is no need to register an input device containing no events. This allows drivers using the quirk MULTI_INPUT to register one input per report effectively used. For backward compatibility, we need to add a quirk to request this behavior. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/hid.h | 1 + 2 files changed, 78 insertions(+) (limited to 'include/linux') diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 21b196c394b1..945b8158ec4c 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1198,6 +1198,67 @@ static struct hid_input *hidinput_allocate(struct hid_device *hid) return hidinput; } +static bool hidinput_has_been_populated(struct hid_input *hidinput) +{ + int i; + unsigned long r = 0; + + for (i = 0; i < BITS_TO_LONGS(EV_CNT); i++) + r |= hidinput->input->evbit[i]; + + for (i = 0; i < BITS_TO_LONGS(KEY_CNT); i++) + r |= hidinput->input->keybit[i]; + + for (i = 0; i < BITS_TO_LONGS(REL_CNT); i++) + r |= hidinput->input->relbit[i]; + + for (i = 0; i < BITS_TO_LONGS(ABS_CNT); i++) + r |= hidinput->input->absbit[i]; + + for (i = 0; i < BITS_TO_LONGS(MSC_CNT); i++) + r |= hidinput->input->mscbit[i]; + + for (i = 0; i < BITS_TO_LONGS(LED_CNT); i++) + r |= hidinput->input->ledbit[i]; + + for (i = 0; i < BITS_TO_LONGS(SND_CNT); i++) + r |= hidinput->input->sndbit[i]; + + for (i = 0; i < BITS_TO_LONGS(FF_CNT); i++) + r |= hidinput->input->ffbit[i]; + + for (i = 0; i < BITS_TO_LONGS(SW_CNT); i++) + r |= hidinput->input->swbit[i]; + + return !!r; +} + +static void hidinput_cleanup_hidinput(struct hid_device *hid, + struct hid_input *hidinput) +{ + struct hid_report *report; + int i, k; + + list_del(&hidinput->list); + input_free_device(hidinput->input); + + for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { + if (k == HID_OUTPUT_REPORT && + hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS) + continue; + + list_for_each_entry(report, &hid->report_enum[k].report_list, + list) { + + for (i = 0; i < report->maxfield; i++) + if (report->field[i]->hidinput == hidinput) + report->field[i]->hidinput = NULL; + } + } + + kfree(hidinput); +} + /* * Register the input device; print a message. * Configure the input layer interface @@ -1249,6 +1310,10 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) hidinput_configure_usage(hidinput, report->field[i], report->field[i]->usage + j); + if ((hid->quirks & HID_QUIRK_NO_EMPTY_INPUT) && + !hidinput_has_been_populated(hidinput)) + continue; + if (hid->quirks & HID_QUIRK_MULTI_INPUT) { /* This will leave hidinput NULL, so that it * allocates another one if we have more inputs on @@ -1265,6 +1330,18 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } } + if (hidinput && (hid->quirks & HID_QUIRK_NO_EMPTY_INPUT) && + !hidinput_has_been_populated(hidinput)) { + /* no need to register an input device not populated */ + hidinput_cleanup_hidinput(hid, hidinput); + hidinput = NULL; + } + + if (list_empty(&hid->inputs)) { + hid_err(hid, "No inputs registered, leaving\n"); + goto out_unwind; + } + if (hidinput) { if (drv->input_configured) drv->input_configured(hid, hidinput); diff --git a/include/linux/hid.h b/include/linux/hid.h index 863744c38ddc..fffa06bc4880 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -282,6 +282,7 @@ struct hid_item { #define HID_QUIRK_BADPAD 0x00000020 #define HID_QUIRK_MULTI_INPUT 0x00000040 #define HID_QUIRK_HIDINPUT_FORCE 0x00000080 +#define HID_QUIRK_NO_EMPTY_INPUT 0x00000100 #define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 #define HID_QUIRK_FULLSPEED_INTERVAL 0x10000000 #define HID_QUIRK_NO_INIT_REPORTS 0x20000000 -- cgit From 221ad7f2df7c54b3f05471a3599ea7368366aaeb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 26 Mar 2013 21:24:20 +0000 Subject: regmap: core: Provide regmap_can_raw_write() operation Mainly useful internally but exported since this is a public API that's being checked for. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 15 ++++++++++++--- include/linux/regmap.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 9174c9d45a16..9ab1e1fedbc9 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1097,6 +1097,17 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, return ret; } +/** + * regmap_can_raw_write - Test if regmap_raw_write() is supported + * + * @map: Map to check. + */ +bool regmap_can_raw_write(struct regmap *map) +{ + return map->bus && map->format.format_val && map->format.format_reg; +} +EXPORT_SYMBOL_GPL(regmap_can_raw_write); + static int _regmap_bus_formatted_write(void *context, unsigned int reg, unsigned int val) { @@ -1220,12 +1231,10 @@ int regmap_raw_write(struct regmap *map, unsigned int reg, { int ret; - if (!map->bus) + if (!regmap_can_raw_write(map)) return -EINVAL; if (val_len % map->format.val_bytes) return -EINVAL; - if (reg % map->reg_stride) - return -EINVAL; map->lock(map->lock_arg); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index bf77dfdabef9..02d84e24b7c2 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -389,6 +389,7 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg, bool *change); int regmap_get_val_bytes(struct regmap *map); int regmap_async_complete(struct regmap *map); +bool regmap_can_raw_write(struct regmap *map); int regcache_sync(struct regmap *map); int regcache_sync_region(struct regmap *map, unsigned int min, -- cgit From 8761e31c227f9751327196f170eba2b519eab48f Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Tue, 26 Mar 2013 10:30:44 -0700 Subject: mmzone: correct "pags" to "pages" in comment. Signed-off-by: Cody P Schafer Signed-off-by: Jiri Kosina --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ede274957e05..2570216b844a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -450,7 +450,7 @@ struct zone { * * present_pages is physical pages existing within the zone, which * is calculated as: - * present_pages = spanned_pages - absent_pages(pags in holes); + * present_pages = spanned_pages - absent_pages(pages in holes); * * managed_pages is present pages managed by the buddy system, which * is calculated as (reserved_pages includes pages allocated by the -- cgit From e874a6697710f52fa8ab29487a99034d5d96fdcc Mon Sep 17 00:00:00 2001 From: Emilio López Date: Mon, 25 Feb 2013 11:44:26 -0300 Subject: clk: arm: sunxi: Add a new clock driver for sunxi SOCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements the base CPU clocks for sunxi devices. It has been tested using a slightly modified cpufreq driver from the linux-sunxi 3.0 tree. Additionally, document the new bindings introduced by this patch. Idling: / # cat /sys/kernel/debug/clk/clk_summary clock enable_cnt prepare_cnt rate --------------------------------------------------------------------- osc32k 0 0 32768 osc24M_fixed 0 0 24000000 osc24M 0 0 24000000 apb1_mux 0 0 24000000 apb1 0 0 24000000 pll1 0 0 60000000 cpu 0 0 60000000 axi 0 0 60000000 ahb 0 0 60000000 apb0 0 0 30000000 dummy 0 0 0 After "yes >/dev/null &": / # cat /sys/kernel/debug/clk/clk_summary clock enable_cnt prepare_cnt rate --------------------------------------------------------------------- osc32k 0 0 32768 osc24M_fixed 0 0 24000000 osc24M 0 0 24000000 apb1_mux 0 0 24000000 apb1 0 0 24000000 pll1 0 0 1008000000 cpu 0 0 1008000000 axi 0 0 336000000 ahb 0 0 168000000 apb0 0 0 84000000 dummy 0 0 0 Signed-off-by: Emilio López Acked-by: Maxime Ripard Signed-off-by: Mike Turquette --- Documentation/devicetree/bindings/clock/sunxi.txt | 44 +++ drivers/clk/Makefile | 1 + drivers/clk/sunxi/Makefile | 5 + drivers/clk/sunxi/clk-factors.c | 180 +++++++++++ drivers/clk/sunxi/clk-factors.h | 27 ++ drivers/clk/sunxi/clk-sunxi.c | 362 ++++++++++++++++++++++ drivers/clocksource/sunxi_timer.c | 4 +- include/linux/clk/sunxi.h | 22 ++ 8 files changed, 643 insertions(+), 2 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/sunxi.txt create mode 100644 drivers/clk/sunxi/Makefile create mode 100644 drivers/clk/sunxi/clk-factors.c create mode 100644 drivers/clk/sunxi/clk-factors.h create mode 100644 drivers/clk/sunxi/clk-sunxi.c create mode 100644 include/linux/clk/sunxi.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt new file mode 100644 index 000000000000..b23cfbdbcd6d --- /dev/null +++ b/Documentation/devicetree/bindings/clock/sunxi.txt @@ -0,0 +1,44 @@ +Device Tree Clock bindings for arch-sunxi + +This binding uses the common clock binding[1]. + +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt + +Required properties: +- compatible : shall be one of the following: + "allwinner,sunxi-osc-clk" - for a gatable oscillator + "allwinner,sunxi-pll1-clk" - for the main PLL clock + "allwinner,sunxi-cpu-clk" - for the CPU multiplexer clock + "allwinner,sunxi-axi-clk" - for the sunxi AXI clock + "allwinner,sunxi-ahb-clk" - for the sunxi AHB clock + "allwinner,sunxi-apb0-clk" - for the sunxi APB0 clock + "allwinner,sunxi-apb1-clk" - for the sunxi APB1 clock + "allwinner,sunxi-apb1-mux-clk" - for the sunxi APB1 clock muxing + +Required properties for all clocks: +- reg : shall be the control register address for the clock. +- clocks : shall be the input parent clock(s) phandle for the clock +- #clock-cells : from common clock binding; shall be set to 0. + +For example: + +osc24M: osc24M@01c20050 { + #clock-cells = <0>; + compatible = "allwinner,sunxi-osc-clk"; + reg = <0x01c20050 0x4>; + clocks = <&osc24M_fixed>; +}; + +pll1: pll1@01c20000 { + #clock-cells = <0>; + compatible = "allwinner,sunxi-pll1-clk"; + reg = <0x01c20000 0x4>; + clocks = <&osc24M>; +}; + +cpu: cpu@01c20054 { + #clock-cells = <0>; + compatible = "allwinner,sunxi-cpu-clk"; + reg = <0x01c20054 0x4>; + clocks = <&osc32k>, <&osc24M>, <&pll1>; +}; diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 41cb123a2d02..79e98e416724 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -24,6 +24,7 @@ ifeq ($(CONFIG_COMMON_CLK), y) obj-$(CONFIG_ARCH_MMP) += mmp/ endif obj-$(CONFIG_MACH_LOONGSON1) += clk-ls1x.o +obj-$(CONFIG_ARCH_SUNXI) += sunxi/ obj-$(CONFIG_ARCH_U8500) += ux500/ obj-$(CONFIG_ARCH_VT8500) += clk-vt8500.o obj-$(CONFIG_ARCH_ZYNQ) += clk-zynq.o diff --git a/drivers/clk/sunxi/Makefile b/drivers/clk/sunxi/Makefile new file mode 100644 index 000000000000..b5bac917612c --- /dev/null +++ b/drivers/clk/sunxi/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for sunxi specific clk +# + +obj-y += clk-sunxi.o clk-factors.o diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c new file mode 100644 index 000000000000..88523f91d9b7 --- /dev/null +++ b/drivers/clk/sunxi/clk-factors.c @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2013 Emilio López + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Adjustable factor-based clock implementation + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "clk-factors.h" + +/* + * DOC: basic adjustable factor-based clock that cannot gate + * + * Traits of this clock: + * prepare - clk_prepare only ensures that parents are prepared + * enable - clk_enable only ensures that parents are enabled + * rate - rate is adjustable. + * clk->rate = (parent->rate * N * (K + 1) >> P) / (M + 1) + * parent - fixed parent. No clk_set_parent support + */ + +struct clk_factors { + struct clk_hw hw; + void __iomem *reg; + struct clk_factors_config *config; + void (*get_factors) (u32 *rate, u32 parent, u8 *n, u8 *k, u8 *m, u8 *p); + spinlock_t *lock; +}; + +#define to_clk_factors(_hw) container_of(_hw, struct clk_factors, hw) + +#define SETMASK(len, pos) (((-1U) >> (31-len)) << (pos)) +#define CLRMASK(len, pos) (~(SETMASK(len, pos))) +#define FACTOR_GET(bit, len, reg) (((reg) & SETMASK(len, bit)) >> (bit)) + +#define FACTOR_SET(bit, len, reg, val) \ + (((reg) & CLRMASK(len, bit)) | (val << (bit))) + +static unsigned long clk_factors_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + u8 n = 1, k = 0, p = 0, m = 0; + u32 reg; + unsigned long rate; + struct clk_factors *factors = to_clk_factors(hw); + struct clk_factors_config *config = factors->config; + + /* Fetch the register value */ + reg = readl(factors->reg); + + /* Get each individual factor if applicable */ + if (config->nwidth != SUNXI_FACTORS_NOT_APPLICABLE) + n = FACTOR_GET(config->nshift, config->nwidth, reg); + if (config->kwidth != SUNXI_FACTORS_NOT_APPLICABLE) + k = FACTOR_GET(config->kshift, config->kwidth, reg); + if (config->mwidth != SUNXI_FACTORS_NOT_APPLICABLE) + m = FACTOR_GET(config->mshift, config->mwidth, reg); + if (config->pwidth != SUNXI_FACTORS_NOT_APPLICABLE) + p = FACTOR_GET(config->pshift, config->pwidth, reg); + + /* Calculate the rate */ + rate = (parent_rate * n * (k + 1) >> p) / (m + 1); + + return rate; +} + +static long clk_factors_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + struct clk_factors *factors = to_clk_factors(hw); + factors->get_factors((u32 *)&rate, (u32)*parent_rate, + NULL, NULL, NULL, NULL); + + return rate; +} + +static int clk_factors_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + u8 n, k, m, p; + u32 reg; + struct clk_factors *factors = to_clk_factors(hw); + struct clk_factors_config *config = factors->config; + unsigned long flags = 0; + + factors->get_factors((u32 *)&rate, (u32)parent_rate, &n, &k, &m, &p); + + if (factors->lock) + spin_lock_irqsave(factors->lock, flags); + + /* Fetch the register value */ + reg = readl(factors->reg); + + /* Set up the new factors - macros do not do anything if width is 0 */ + reg = FACTOR_SET(config->nshift, config->nwidth, reg, n); + reg = FACTOR_SET(config->kshift, config->kwidth, reg, k); + reg = FACTOR_SET(config->mshift, config->mwidth, reg, m); + reg = FACTOR_SET(config->pshift, config->pwidth, reg, p); + + /* Apply them now */ + writel(reg, factors->reg); + + /* delay 500us so pll stabilizes */ + __delay((rate >> 20) * 500 / 2); + + if (factors->lock) + spin_unlock_irqrestore(factors->lock, flags); + + return 0; +} + +static const struct clk_ops clk_factors_ops = { + .recalc_rate = clk_factors_recalc_rate, + .round_rate = clk_factors_round_rate, + .set_rate = clk_factors_set_rate, +}; + +/** + * clk_register_factors - register a factors clock with + * the clock framework + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust factors + * @config: shift and width of factors n, k, m and p + * @get_factors: function to calculate the factors for a given frequency + * @lock: shared register lock for this clock + */ +struct clk *clk_register_factors(struct device *dev, const char *name, + const char *parent_name, + unsigned long flags, void __iomem *reg, + struct clk_factors_config *config, + void (*get_factors)(u32 *rate, u32 parent, + u8 *n, u8 *k, u8 *m, u8 *p), + spinlock_t *lock) +{ + struct clk_factors *factors; + struct clk *clk; + struct clk_init_data init; + + /* allocate the factors */ + factors = kzalloc(sizeof(struct clk_factors), GFP_KERNEL); + if (!factors) { + pr_err("%s: could not allocate factors clk\n", __func__); + return ERR_PTR(-ENOMEM); + } + + init.name = name; + init.ops = &clk_factors_ops; + init.flags = flags; + init.parent_names = (parent_name ? &parent_name : NULL); + init.num_parents = (parent_name ? 1 : 0); + + /* struct clk_factors assignments */ + factors->reg = reg; + factors->config = config; + factors->lock = lock; + factors->hw.init = &init; + factors->get_factors = get_factors; + + /* register the clock */ + clk = clk_register(dev, &factors->hw); + + if (IS_ERR(clk)) + kfree(factors); + + return clk; +} diff --git a/drivers/clk/sunxi/clk-factors.h b/drivers/clk/sunxi/clk-factors.h new file mode 100644 index 000000000000..f49851cc4380 --- /dev/null +++ b/drivers/clk/sunxi/clk-factors.h @@ -0,0 +1,27 @@ +#ifndef __MACH_SUNXI_CLK_FACTORS_H +#define __MACH_SUNXI_CLK_FACTORS_H + +#include +#include + +#define SUNXI_FACTORS_NOT_APPLICABLE (0) + +struct clk_factors_config { + u8 nshift; + u8 nwidth; + u8 kshift; + u8 kwidth; + u8 mshift; + u8 mwidth; + u8 pshift; + u8 pwidth; +}; + +struct clk *clk_register_factors(struct device *dev, const char *name, + const char *parent_name, + unsigned long flags, void __iomem *reg, + struct clk_factors_config *config, + void (*get_factors) (u32 *rate, u32 parent_rate, + u8 *n, u8 *k, u8 *m, u8 *p), + spinlock_t *lock); +#endif diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c new file mode 100644 index 000000000000..d4ad1c22859e --- /dev/null +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -0,0 +1,362 @@ +/* + * Copyright 2013 Emilio López + * + * Emilio López + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#include "clk-factors.h" + +static DEFINE_SPINLOCK(clk_lock); + +/** + * sunxi_osc_clk_setup() - Setup function for gatable oscillator + */ + +#define SUNXI_OSC24M_GATE 0 + +static void __init sunxi_osc_clk_setup(struct device_node *node) +{ + struct clk *clk; + const char *clk_name = node->name; + const char *parent; + void *reg; + + reg = of_iomap(node, 0); + + parent = of_clk_get_parent_name(node, 0); + + clk = clk_register_gate(NULL, clk_name, parent, CLK_IGNORE_UNUSED, + reg, SUNXI_OSC24M_GATE, 0, &clk_lock); + + if (clk) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + } +} + + + +/** + * sunxi_get_pll1_factors() - calculates n, k, m, p factors for PLL1 + * PLL1 rate is calculated as follows + * rate = (parent_rate * n * (k + 1) >> p) / (m + 1); + * parent_rate is always 24Mhz + */ + +static void sunxi_get_pll1_factors(u32 *freq, u32 parent_rate, + u8 *n, u8 *k, u8 *m, u8 *p) +{ + u8 div; + + /* Normalize value to a 6M multiple */ + div = *freq / 6000000; + *freq = 6000000 * div; + + /* we were called to round the frequency, we can now return */ + if (n == NULL) + return; + + /* m is always zero for pll1 */ + *m = 0; + + /* k is 1 only on these cases */ + if (*freq >= 768000000 || *freq == 42000000 || *freq == 54000000) + *k = 1; + else + *k = 0; + + /* p will be 3 for divs under 10 */ + if (div < 10) + *p = 3; + + /* p will be 2 for divs between 10 - 20 and odd divs under 32 */ + else if (div < 20 || (div < 32 && (div & 1))) + *p = 2; + + /* p will be 1 for even divs under 32, divs under 40 and odd pairs + * of divs between 40-62 */ + else if (div < 40 || (div < 64 && (div & 2))) + *p = 1; + + /* any other entries have p = 0 */ + else + *p = 0; + + /* calculate a suitable n based on k and p */ + div <<= *p; + div /= (*k + 1); + *n = div / 4; +} + + + +/** + * sunxi_get_apb1_factors() - calculates m, p factors for APB1 + * APB1 rate is calculated as follows + * rate = (parent_rate >> p) / (m + 1); + */ + +static void sunxi_get_apb1_factors(u32 *freq, u32 parent_rate, + u8 *n, u8 *k, u8 *m, u8 *p) +{ + u8 calcm, calcp; + + if (parent_rate < *freq) + *freq = parent_rate; + + parent_rate = (parent_rate + (*freq - 1)) / *freq; + + /* Invalid rate! */ + if (parent_rate > 32) + return; + + if (parent_rate <= 4) + calcp = 0; + else if (parent_rate <= 8) + calcp = 1; + else if (parent_rate <= 16) + calcp = 2; + else + calcp = 3; + + calcm = (parent_rate >> calcp) - 1; + + *freq = (parent_rate >> calcp) / (calcm + 1); + + /* we were called to round the frequency, we can now return */ + if (n == NULL) + return; + + *m = calcm; + *p = calcp; +} + + + +/** + * sunxi_factors_clk_setup() - Setup function for factor clocks + */ + +struct factors_data { + struct clk_factors_config *table; + void (*getter) (u32 *rate, u32 parent_rate, u8 *n, u8 *k, u8 *m, u8 *p); +}; + +static struct clk_factors_config pll1_config = { + .nshift = 8, + .nwidth = 5, + .kshift = 4, + .kwidth = 2, + .mshift = 0, + .mwidth = 2, + .pshift = 16, + .pwidth = 2, +}; + +static struct clk_factors_config apb1_config = { + .mshift = 0, + .mwidth = 5, + .pshift = 16, + .pwidth = 2, +}; + +static const __initconst struct factors_data pll1_data = { + .table = &pll1_config, + .getter = sunxi_get_pll1_factors, +}; + +static const __initconst struct factors_data apb1_data = { + .table = &apb1_config, + .getter = sunxi_get_apb1_factors, +}; + +static void __init sunxi_factors_clk_setup(struct device_node *node, + struct factors_data *data) +{ + struct clk *clk; + const char *clk_name = node->name; + const char *parent; + void *reg; + + reg = of_iomap(node, 0); + + parent = of_clk_get_parent_name(node, 0); + + clk = clk_register_factors(NULL, clk_name, parent, CLK_IGNORE_UNUSED, + reg, data->table, data->getter, &clk_lock); + + if (clk) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + } +} + + + +/** + * sunxi_mux_clk_setup() - Setup function for muxes + */ + +#define SUNXI_MUX_GATE_WIDTH 2 + +struct mux_data { + u8 shift; +}; + +static const __initconst struct mux_data cpu_data = { + .shift = 16, +}; + +static const __initconst struct mux_data apb1_mux_data = { + .shift = 24, +}; + +static void __init sunxi_mux_clk_setup(struct device_node *node, + struct mux_data *data) +{ + struct clk *clk; + const char *clk_name = node->name; + const char **parents = kmalloc(sizeof(char *) * 5, GFP_KERNEL); + void *reg; + int i = 0; + + reg = of_iomap(node, 0); + + while (i < 5 && (parents[i] = of_clk_get_parent_name(node, i)) != NULL) + i++; + + clk = clk_register_mux(NULL, clk_name, parents, i, 0, reg, + data->shift, SUNXI_MUX_GATE_WIDTH, + 0, &clk_lock); + + if (clk) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + } +} + + + +/** + * sunxi_divider_clk_setup() - Setup function for simple divider clocks + */ + +#define SUNXI_DIVISOR_WIDTH 2 + +struct div_data { + u8 shift; + u8 pow; +}; + +static const __initconst struct div_data axi_data = { + .shift = 0, + .pow = 0, +}; + +static const __initconst struct div_data ahb_data = { + .shift = 4, + .pow = 1, +}; + +static const __initconst struct div_data apb0_data = { + .shift = 8, + .pow = 1, +}; + +static void __init sunxi_divider_clk_setup(struct device_node *node, + struct div_data *data) +{ + struct clk *clk; + const char *clk_name = node->name; + const char *clk_parent; + void *reg; + + reg = of_iomap(node, 0); + + clk_parent = of_clk_get_parent_name(node, 0); + + clk = clk_register_divider(NULL, clk_name, clk_parent, 0, + reg, data->shift, SUNXI_DIVISOR_WIDTH, + data->pow ? CLK_DIVIDER_POWER_OF_TWO : 0, + &clk_lock); + if (clk) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + } +} + + +/* Matches for of_clk_init */ +static const __initconst struct of_device_id clk_match[] = { + {.compatible = "fixed-clock", .data = of_fixed_clk_setup,}, + {.compatible = "allwinner,sunxi-osc-clk", .data = sunxi_osc_clk_setup,}, + {} +}; + +/* Matches for factors clocks */ +static const __initconst struct of_device_id clk_factors_match[] = { + {.compatible = "allwinner,sunxi-pll1-clk", .data = &pll1_data,}, + {.compatible = "allwinner,sunxi-apb1-clk", .data = &apb1_data,}, + {} +}; + +/* Matches for divider clocks */ +static const __initconst struct of_device_id clk_div_match[] = { + {.compatible = "allwinner,sunxi-axi-clk", .data = &axi_data,}, + {.compatible = "allwinner,sunxi-ahb-clk", .data = &ahb_data,}, + {.compatible = "allwinner,sunxi-apb0-clk", .data = &apb0_data,}, + {} +}; + +/* Matches for mux clocks */ +static const __initconst struct of_device_id clk_mux_match[] = { + {.compatible = "allwinner,sunxi-cpu-clk", .data = &cpu_data,}, + {.compatible = "allwinner,sunxi-apb1-mux-clk", .data = &apb1_mux_data,}, + {} +}; + +static void __init of_sunxi_table_clock_setup(const struct of_device_id *clk_match, + void *function) +{ + struct device_node *np; + const struct div_data *data; + const struct of_device_id *match; + void (*setup_function)(struct device_node *, const void *) = function; + + for_each_matching_node(np, clk_match) { + match = of_match_node(clk_match, np); + data = match->data; + setup_function(np, data); + } +} + +void __init sunxi_init_clocks(void) +{ + /* Register all the simple sunxi clocks on DT */ + of_clk_init(clk_match); + + /* Register factor clocks */ + of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup); + + /* Register divider clocks */ + of_sunxi_table_clock_setup(clk_div_match, sunxi_divider_clk_setup); + + /* Register mux clocks */ + of_sunxi_table_clock_setup(clk_mux_match, sunxi_mux_clk_setup); +} diff --git a/drivers/clocksource/sunxi_timer.c b/drivers/clocksource/sunxi_timer.c index 4086b9167159..0ce85e29769b 100644 --- a/drivers/clocksource/sunxi_timer.c +++ b/drivers/clocksource/sunxi_timer.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #define TIMER_CTL_REG 0x00 #define TIMER_CTL_ENABLE (1 << 0) @@ -123,7 +123,7 @@ void __init sunxi_timer_init(void) if (irq <= 0) panic("Can't parse IRQ"); - of_clk_init(NULL); + sunxi_init_clocks(); clk = of_clk_get(node, 0); if (IS_ERR(clk)) diff --git a/include/linux/clk/sunxi.h b/include/linux/clk/sunxi.h new file mode 100644 index 000000000000..e074fdd5a236 --- /dev/null +++ b/include/linux/clk/sunxi.h @@ -0,0 +1,22 @@ +/* + * Copyright 2012 Maxime Ripard + * + * Maxime Ripard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_CLK_SUNXI_H_ +#define __LINUX_CLK_SUNXI_H_ + +void __init sunxi_init_clocks(void); + +#endif -- cgit From 2db76d7c3c6db93058f983c8240f7c7c25e87ee6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 26 Mar 2013 15:14:18 +0200 Subject: lib/scatterlist: sg_page_iter: support sg lists w/o backing pages The i915 driver uses sg lists for memory without backing 'struct page' pages, similarly to other IO memory regions, setting only the DMA address for these. It does this, so that it can program the HW MMU tables in a uniform way both for sg lists with and without backing pages. Without a valid page pointer we can't call nth_page to get the current page in __sg_page_iter_next, so add a helper that relevant users can call separately. Also add a helper to get the DMA address of the current page (idea from Daniel). Convert all places in i915, to use the new API. Signed-off-by: Imre Deak Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_cache.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++---- drivers/gpu/drm/i915/i915_gem_tiling.c | 4 ++-- include/linux/scatterlist.h | 28 +++++++++++++++++++++++----- lib/scatterlist.c | 4 +--- 8 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index bc8edbeca3fd..bb8f58012189 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -109,7 +109,7 @@ drm_clflush_sg(struct sg_table *st) mb(); for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) - drm_clflush_page(sg_iter.page); + drm_clflush_page(sg_page_iter_page(&sg_iter)); mb(); return; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1d091ea12fad..f69538508d8c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1543,7 +1543,7 @@ static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object * struct sg_page_iter sg_iter; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, n) - return sg_iter.page; + return sg_page_iter_page(&sg_iter); return NULL; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a1123a32dc27..911bd40ef513 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -442,7 +442,7 @@ i915_gem_shmem_pread(struct drm_device *dev, for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { - struct page *page = sg_iter.page; + struct page *page = sg_page_iter_page(&sg_iter); if (remain <= 0) break; @@ -765,7 +765,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { - struct page *page = sg_iter.page; + struct page *page = sg_page_iter_page(&sg_iter); int partial_cacheline_write; if (remain <= 0) @@ -1647,7 +1647,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) obj->dirty = 0; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { - struct page *page = sg_iter.page; + struct page *page = sg_page_iter_page(&sg_iter); if (obj->dirty) set_page_dirty(page); @@ -1827,7 +1827,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) err_pages: sg_mark_end(sg); for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) - page_cache_release(sg_iter.page); + page_cache_release(sg_page_iter_page(&sg_iter)); sg_free_table(st); kfree(st); return PTR_ERR(page); diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 898615d2d5e2..c6dfc1466e3a 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -130,7 +130,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) i = 0; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0); - pages[i++] = sg_iter.page; + pages[i++] = sg_page_iter_page(&sg_iter); obj->dma_buf_vmapping = vmap(pages, i, 0, PAGE_KERNEL); drm_free_large(pages); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4cbae7bbb833..24a23b31b55f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -123,8 +123,7 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt, for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { dma_addr_t page_addr; - page_addr = sg_dma_address(sg_iter.sg) + - (sg_iter.sg_pgoffset << PAGE_SHIFT); + page_addr = sg_page_iter_dma_address(&sg_iter); pt_vaddr[act_pte] = gen6_pte_encode(ppgtt->dev, page_addr, cache_level); if (++act_pte == I915_PPGTT_PT_ENTRIES) { @@ -424,8 +423,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev, dma_addr_t addr; for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { - addr = sg_dma_address(sg_iter.sg) + - (sg_iter.sg_pgoffset << PAGE_SHIFT); + addr = sg_page_iter_dma_address(&sg_iter); iowrite32(gen6_pte_encode(dev, addr, level), >t_entries[i]); i++; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index f799708bcb85..c807eb93755b 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -481,7 +481,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) i = 0; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { - struct page *page = sg_iter.page; + struct page *page = sg_page_iter_page(&sg_iter); char new_bit_17 = page_to_phys(page) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { @@ -511,7 +511,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) i = 0; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { - if (page_to_phys(sg_iter.page) & (1 << 17)) + if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17)) __set_bit(i, obj->bit_17); else __clear_bit(i, obj->bit_17); diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 2d8bdaef9611..e96b9546c4c6 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -235,13 +235,13 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, * sg page iterator * * Iterates over sg entries page-by-page. On each successful iteration, - * @piter->page points to the current page, @piter->sg to the sg holding this - * page and @piter->sg_pgoffset to the page's page offset within the sg. The - * iteration will stop either when a maximum number of sg entries was reached - * or a terminating sg (sg_last(sg) == true) was reached. + * you can call sg_page_iter_page(@piter) and sg_page_iter_dma_address(@piter) + * to get the current page and its dma address. @piter->sg will point to the + * sg holding this page and @piter->sg_pgoffset to the page's page offset + * within the sg. The iteration will stop either when a maximum number of sg + * entries was reached or a terminating sg (sg_last(sg) == true) was reached. */ struct sg_page_iter { - struct page *page; /* current page */ struct scatterlist *sg; /* sg holding the page */ unsigned int sg_pgoffset; /* page offset within the sg */ @@ -255,6 +255,24 @@ bool __sg_page_iter_next(struct sg_page_iter *piter); void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset); +/** + * sg_page_iter_page - get the current page held by the page iterator + * @piter: page iterator holding the page + */ +static inline struct page *sg_page_iter_page(struct sg_page_iter *piter) +{ + return nth_page(sg_page(piter->sg), piter->sg_pgoffset); +} + +/** + * sg_page_iter_dma_address - get the dma address of the current page held by + * the page iterator. + * @piter: page iterator holding the page + */ +static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter) +{ + return sg_dma_address(piter->sg) + (piter->sg_pgoffset << PAGE_SHIFT); +} /** * for_each_sg_page - iterate over the pages of the given sg list diff --git a/lib/scatterlist.c b/lib/scatterlist.c index b83c144d731f..a1cf8cae60e7 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -401,7 +401,6 @@ void __sg_page_iter_start(struct sg_page_iter *piter, piter->__pg_advance = 0; piter->__nents = nents; - piter->page = NULL; piter->sg = sglist; piter->sg_pgoffset = pgoffset; } @@ -426,7 +425,6 @@ bool __sg_page_iter_next(struct sg_page_iter *piter) if (!--piter->__nents || !piter->sg) return false; } - piter->page = nth_page(sg_page(piter->sg), piter->sg_pgoffset); return true; } @@ -496,7 +494,7 @@ bool sg_miter_next(struct sg_mapping_iter *miter) miter->__remaining = min_t(unsigned long, miter->__remaining, PAGE_SIZE - miter->__offset); } - miter->page = miter->piter.page; + miter->page = sg_page_iter_page(&miter->piter); miter->consumed = miter->length = miter->__remaining; if (miter->__flags & SG_MITER_ATOMIC) -- cgit From 5203cd28db6dc05c3618a602cf4cf81203d00257 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 26 Mar 2013 23:11:21 +0000 Subject: net: core: introduce skb_probe_transport_header() Sometimes, we need probe and set the transport header for packets (e.g from untrusted source). This patch introduces a new helper skb_probe_transport_header() which tries to probe and set the l4 header through skb_flow_dissect(), if not just set the transport header to the hint passed by caller. Cc: Eric Dumazet Signed-off-by: Jason Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 497412165b1c..fa88b966cb8e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,6 +32,7 @@ #include #include #include +#include /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 @@ -1559,6 +1560,19 @@ static inline void skb_set_transport_header(struct sk_buff *skb, skb->transport_header += offset; } +static inline void skb_probe_transport_header(struct sk_buff *skb, + const int offset_hint) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return; + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_set_transport_header(skb, offset_hint); +} + static inline unsigned char *skb_network_header(const struct sk_buff *skb) { return skb->head + skb->network_header; -- cgit From d6b688cf2f7ca3e168acc73597f4d7102ae663fa Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 27 Mar 2013 17:23:10 +0100 Subject: bcma: handle more devices in bcma_pmu_get_alp_clock() Add some more chip IDs to bcma_pmu_get_alp_clock() Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_chipcommon_pmu.c | 24 ++++++++++++++++++++---- include/linux/bcma/bcma_driver_chipcommon.h | 1 + 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c index 7e88fffaf3f5..edca73af3cc0 100644 --- a/drivers/bcma/driver_chipcommon_pmu.c +++ b/drivers/bcma/driver_chipcommon_pmu.c @@ -174,19 +174,35 @@ u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc) struct bcma_bus *bus = cc->core->bus; switch (bus->chipinfo.id) { + case BCMA_CHIP_ID_BCM4313: + case BCMA_CHIP_ID_BCM43224: + case BCMA_CHIP_ID_BCM43225: + case BCMA_CHIP_ID_BCM43227: + case BCMA_CHIP_ID_BCM43228: + case BCMA_CHIP_ID_BCM4331: + case BCMA_CHIP_ID_BCM43421: + case BCMA_CHIP_ID_BCM43428: + case BCMA_CHIP_ID_BCM43431: case BCMA_CHIP_ID_BCM4716: - case BCMA_CHIP_ID_BCM4748: case BCMA_CHIP_ID_BCM47162: - case BCMA_CHIP_ID_BCM4313: - case BCMA_CHIP_ID_BCM5357: + case BCMA_CHIP_ID_BCM4748: case BCMA_CHIP_ID_BCM4749: + case BCMA_CHIP_ID_BCM5357: case BCMA_CHIP_ID_BCM53572: + case BCMA_CHIP_ID_BCM6362: /* always 20Mhz */ return 20000 * 1000; - case BCMA_CHIP_ID_BCM5356: case BCMA_CHIP_ID_BCM4706: + case BCMA_CHIP_ID_BCM5356: /* always 25Mhz */ return 25000 * 1000; + case BCMA_CHIP_ID_BCM43460: + case BCMA_CHIP_ID_BCM4352: + case BCMA_CHIP_ID_BCM4360: + if (cc->status & BCMA_CC_CHIPST_4360_XTAL_40MZ) + return 40000 * 1000; + else + return 20000 * 1000; default: bcma_warn(bus, "No ALP clock specified for %04X device, pmu rev. %d, using default %d Hz\n", bus->chipinfo.id, cc->pmu.rev, BCMA_CC_PMU_ALP_CLOCK); diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 8390c474f69a..1db4c6de372e 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -104,6 +104,7 @@ #define BCMA_CC_CHIPST_4706_MIPS_BENDIAN BIT(3) /* 0: little, 1: big endian */ #define BCMA_CC_CHIPST_4706_PCIE1_DISABLE BIT(5) /* PCIE1 enable strap pin */ #define BCMA_CC_CHIPST_5357_NAND_BOOT BIT(4) /* NAND boot, valid for CC rev 38 and/or BCM5357 */ +#define BCMA_CC_CHIPST_4360_XTAL_40MZ 0x00000001 #define BCMA_CC_JCMD 0x0030 /* Rev >= 10 only */ #define BCMA_CC_JCMD_START 0x80000000 #define BCMA_CC_JCMD_BUSY 0x80000000 -- cgit From 6951618b4b0bb022429ab17d49f2fa3650f21cb4 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 27 Mar 2013 17:23:11 +0100 Subject: bcma: export bcma_chipco_get_alp_clock() This function will be used by brcmsmac. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_chipcommon.c | 3 ++- include/linux/bcma/bcma_driver_chipcommon.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index 28fa50ad87be..88db0cb7bf19 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -25,13 +25,14 @@ static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset, return value; } -static u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc) +u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc) { if (cc->capabilities & BCMA_CC_CAP_PMU) return bcma_pmu_get_alp_clock(cc); return 20000000; } +EXPORT_SYMBOL_GPL(bcma_chipco_get_alp_clock); static u32 bcma_chipco_watchdog_get_max_timer(struct bcma_drv_cc *cc) { diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 1db4c6de372e..453fcc914683 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -608,6 +608,8 @@ void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks); +extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc); + void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_irq_status(struct bcma_drv_cc *cc, u32 mask); -- cgit From 43a5911b3dcec81add87d833cd8c7ddaaa205a47 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Mar 2013 15:59:15 +0000 Subject: regulator: ab8500: Clean out SoC registers Clean out initialisation that is handled by SoC. Regulator settings for Vpll (partly), Vsmps1, Vsmps2, Vsmps3 (partly), Vrf1, Varm, Vape, Vbb, Vmod are cleaned out. They should not be touched by the kernel. We also update many of the initialisation values to be more in-line with the current development efforts of ST-Ericsson internal engineers. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- arch/arm/mach-ux500/board-mop500-regulators.c | 28 ++------ drivers/regulator/ab8500.c | 98 +++++++-------------------- include/linux/regulator/ab8500.h | 12 ---- 3 files changed, 28 insertions(+), 110 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500-regulators.c b/arch/arm/mach-ux500/board-mop500-regulators.c index c962d1d726bb..3d899c51bbac 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.c +++ b/arch/arm/mach-ux500/board-mop500-regulators.c @@ -139,10 +139,9 @@ static struct regulator_consumer_supply ab8500_vana_consumers[] = { static struct ab8500_regulator_reg_init ab8500_reg_init[] = { /* * VanaRequestCtrl = HP/LP depending on VxRequest - * VpllRequestCtrl = HP/LP depending on VxRequest * VextSupply1RequestCtrl = HP/LP depending on VxRequest */ - INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL2, 0xfc, 0x00), + INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL2, 0xf0, 0x00), /* * VextSupply2RequestCtrl = HP/LP depending on VxRequest * VextSupply3RequestCtrl = HP/LP depending on VxRequest @@ -156,16 +155,12 @@ static struct ab8500_regulator_reg_init ab8500_reg_init[] = { */ INIT_REGULATOR_REGISTER(AB8500_REGUREQUESTCTRL4, 0x07, 0x00), /* - * Vsmps1SysClkReq1HPValid = enabled - * Vsmps2SysClkReq1HPValid = enabled - * Vsmps3SysClkReq1HPValid = enabled * VanaSysClkReq1HPValid = disabled - * VpllSysClkReq1HPValid = enabled * Vaux1SysClkReq1HPValid = disabled * Vaux2SysClkReq1HPValid = disabled * Vaux3SysClkReq1HPValid = disabled */ - INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID1, 0xff, 0x17), + INIT_REGULATOR_REGISTER(AB8500_REGUSYSCLKREQ1HPVALID1, 0xe8, 0x00), /* * VextSupply1SysClkReq1HPValid = disabled * VextSupply2SysClkReq1HPValid = disabled @@ -252,17 +247,7 @@ static struct ab8500_regulator_reg_init ab8500_reg_init[] = { */ INIT_REGULATOR_REGISTER(AB8500_REGUCTRL1VAMIC, 0x03, 0x00), /* - * Vsmps1Regu = HW control - * Vsmps1SelCtrl = Vsmps1 voltage defined by Vsmsp1Sel2 - */ - INIT_REGULATOR_REGISTER(AB8500_VSMPS1REGU, 0x0f, 0x06), - /* - * Vsmps2Regu = HW control - * Vsmps2SelCtrl = Vsmps2 voltage defined by Vsmsp2Sel2 - */ - INIT_REGULATOR_REGISTER(AB8500_VSMPS2REGU, 0x0f, 0x06), - /* - * VPll = Hw controlled + * VPll = Hw controlled (NOTE! PRCMU bits) * VanaRegu = force off */ INIT_REGULATOR_REGISTER(AB8500_VPLLVANAREGU, 0x0f, 0x02), @@ -285,14 +270,9 @@ static struct ab8500_regulator_reg_init ab8500_reg_init[] = { */ INIT_REGULATOR_REGISTER(AB8500_VAUX12REGU, 0x0f, 0x01), /* - * Vrf1Regu = HW control * Vaux3Regu = force off */ - INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3REGU, 0x0f, 0x08), - /* - * Vsmps1 = 1.15V - */ - INIT_REGULATOR_REGISTER(AB8500_VSMPS1SEL1, 0x3f, 0x24), + INIT_REGULATOR_REGISTER(AB8500_VRF1VAUX3REGU, 0x03, 0x00), /* * Vaux1Sel = 2.5 V */ diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index f1453a66a0fd..919d9fa9605e 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -613,19 +613,10 @@ struct ab8500_reg_init { static struct ab8500_reg_init ab8500_reg_init[] = { /* - * 0x03, VarmRequestCtrl - * 0x0c, VapeRequestCtrl - * 0x30, Vsmps1RequestCtrl - * 0xc0, Vsmps2RequestCtrl - */ - REG_INIT(AB8500_REGUREQUESTCTRL1, 0x03, 0x03, 0xff), - /* - * 0x03, Vsmps3RequestCtrl - * 0x0c, VpllRequestCtrl * 0x30, VanaRequestCtrl * 0xc0, VextSupply1RequestCtrl */ - REG_INIT(AB8500_REGUREQUESTCTRL2, 0x03, 0x04, 0xff), + REG_INIT(AB8500_REGUREQUESTCTRL2, 0x03, 0x04, 0xf0), /* * 0x03, VextSupply2RequestCtrl * 0x0c, VextSupply3RequestCtrl @@ -639,91 +630,74 @@ static struct ab8500_reg_init ab8500_reg_init[] = { */ REG_INIT(AB8500_REGUREQUESTCTRL4, 0x03, 0x06, 0x07), /* - * 0x01, Vsmps1SysClkReq1HPValid - * 0x02, Vsmps2SysClkReq1HPValid - * 0x04, Vsmps3SysClkReq1HPValid * 0x08, VanaSysClkReq1HPValid - * 0x10, VpllSysClkReq1HPValid * 0x20, Vaux1SysClkReq1HPValid * 0x40, Vaux2SysClkReq1HPValid * 0x80, Vaux3SysClkReq1HPValid */ - REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID1, 0x03, 0x07, 0xff), + REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID1, 0x03, 0x07, 0xe8), /* - * 0x01, VapeSysClkReq1HPValid - * 0x02, VarmSysClkReq1HPValid - * 0x04, VbbSysClkReq1HPValid - * 0x08, VmodSysClkReq1HPValid * 0x10, VextSupply1SysClkReq1HPValid * 0x20, VextSupply2SysClkReq1HPValid * 0x40, VextSupply3SysClkReq1HPValid */ - REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID2, 0x03, 0x08, 0x7f), + REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID2, 0x03, 0x08, 0x70), /* - * 0x01, Vsmps1HwHPReq1Valid - * 0x02, Vsmps2HwHPReq1Valid - * 0x04, Vsmps3HwHPReq1Valid * 0x08, VanaHwHPReq1Valid - * 0x10, VpllHwHPReq1Valid * 0x20, Vaux1HwHPReq1Valid * 0x40, Vaux2HwHPReq1Valid * 0x80, Vaux3HwHPReq1Valid */ - REG_INIT(AB8500_REGUHWHPREQ1VALID1, 0x03, 0x09, 0xff), + REG_INIT(AB8500_REGUHWHPREQ1VALID1, 0x03, 0x09, 0xe8), /* * 0x01, VextSupply1HwHPReq1Valid * 0x02, VextSupply2HwHPReq1Valid * 0x04, VextSupply3HwHPReq1Valid - * 0x08, VmodHwHPReq1Valid */ - REG_INIT(AB8500_REGUHWHPREQ1VALID2, 0x03, 0x0a, 0x0f), + REG_INIT(AB8500_REGUHWHPREQ1VALID2, 0x03, 0x0a, 0x07), /* - * 0x01, Vsmps1HwHPReq2Valid - * 0x02, Vsmps2HwHPReq2Valid - * 0x03, Vsmps3HwHPReq2Valid * 0x08, VanaHwHPReq2Valid - * 0x10, VpllHwHPReq2Valid * 0x20, Vaux1HwHPReq2Valid * 0x40, Vaux2HwHPReq2Valid * 0x80, Vaux3HwHPReq2Valid */ - REG_INIT(AB8500_REGUHWHPREQ2VALID1, 0x03, 0x0b, 0xff), + REG_INIT(AB8500_REGUHWHPREQ2VALID1, 0x03, 0x0b, 0xe8), /* * 0x01, VextSupply1HwHPReq2Valid * 0x02, VextSupply2HwHPReq2Valid * 0x04, VextSupply3HwHPReq2Valid - * 0x08, VmodHwHPReq2Valid */ - REG_INIT(AB8500_REGUHWHPREQ2VALID2, 0x03, 0x0c, 0x0f), + REG_INIT(AB8500_REGUHWHPREQ2VALID2, 0x03, 0x0c, 0x07), /* - * 0x01, VapeSwHPReqValid - * 0x02, VarmSwHPReqValid - * 0x04, Vsmps1SwHPReqValid - * 0x08, Vsmps2SwHPReqValid - * 0x10, Vsmps3SwHPReqValid * 0x20, VanaSwHPReqValid - * 0x40, VpllSwHPReqValid * 0x80, Vaux1SwHPReqValid */ - REG_INIT(AB8500_REGUSWHPREQVALID1, 0x03, 0x0d, 0xff), + REG_INIT(AB8500_REGUSWHPREQVALID1, 0x03, 0x0d, 0xa0), /* * 0x01, Vaux2SwHPReqValid * 0x02, Vaux3SwHPReqValid * 0x04, VextSupply1SwHPReqValid * 0x08, VextSupply2SwHPReqValid * 0x10, VextSupply3SwHPReqValid - * 0x20, VmodSwHPReqValid */ - REG_INIT(AB8500_REGUSWHPREQVALID2, 0x03, 0x0e, 0x3f), + REG_INIT(AB8500_REGUSWHPREQVALID2, 0x03, 0x0e, 0x1f), /* * 0x02, SysClkReq2Valid1 - * ... + * 0x04, SysClkReq3Valid1 + * 0x08, SysClkReq4Valid1 + * 0x10, SysClkReq5Valid1 + * 0x20, SysClkReq6Valid1 + * 0x40, SysClkReq7Valid1 * 0x80, SysClkReq8Valid1 */ REG_INIT(AB8500_REGUSYSCLKREQVALID1, 0x03, 0x0f, 0xfe), /* * 0x02, SysClkReq2Valid2 - * ... + * 0x04, SysClkReq3Valid2 + * 0x08, SysClkReq4Valid2 + * 0x10, SysClkReq5Valid2 + * 0x20, SysClkReq6Valid2 + * 0x40, SysClkReq7Valid2 * 0x80, SysClkReq8Valid2 */ REG_INIT(AB8500_REGUSYSCLKREQVALID2, 0x03, 0x10, 0xfe), @@ -748,21 +722,7 @@ static struct ab8500_reg_init ab8500_reg_init[] = { */ REG_INIT(AB8500_REGUCTRL1VAMIC, 0x03, 0x84, 0x03), /* - * 0x03, Vsmps1Regu - * 0x0c, Vsmps1SelCtrl - * 0x10, Vsmps1AutoMode - * 0x20, Vsmps1PWMMode - */ - REG_INIT(AB8500_VSMPS1REGU, 0x04, 0x03, 0x3f), - /* - * 0x03, Vsmps2Regu - * 0x0c, Vsmps2SelCtrl - * 0x10, Vsmps2AutoMode - * 0x20, Vsmps2PWMMode - */ - REG_INIT(AB8500_VSMPS2REGU, 0x04, 0x04, 0x3f), - /* - * 0x03, VpllRegu + * 0x03, VpllRegu (NOTE! PRCMU register bits) * 0x0c, VanaRegu */ REG_INIT(AB8500_VPLLVANAREGU, 0x04, 0x06, 0x0f), @@ -785,14 +745,9 @@ static struct ab8500_reg_init ab8500_reg_init[] = { */ REG_INIT(AB8500_VAUX12REGU, 0x04, 0x09, 0x0f), /* - * 0x0c, Vrf1Regu * 0x03, Vaux3Regu */ - REG_INIT(AB8500_VRF1VAUX3REGU, 0x04, 0x0a, 0x0f), - /* - * 0x3f, Vsmps1Sel1 - */ - REG_INIT(AB8500_VSMPS1SEL1, 0x04, 0x13, 0x3f), + REG_INIT(AB8500_VRF1VAUX3REGU, 0x04, 0x0a, 0x03), /* * 0x0f, Vaux1Sel */ @@ -803,16 +758,13 @@ static struct ab8500_reg_init ab8500_reg_init[] = { REG_INIT(AB8500_VAUX2SEL, 0x04, 0x20, 0x0f), /* * 0x07, Vaux3Sel - * 0x30, Vrf1Sel */ - REG_INIT(AB8500_VRF1VAUX3SEL, 0x04, 0x21, 0x37), + REG_INIT(AB8500_VRF1VAUX3SEL, 0x04, 0x21, 0x07), /* * 0x01, VextSupply12LP */ REG_INIT(AB8500_REGUCTRL2SPARE, 0x04, 0x22, 0x01), /* - * 0x01, VpllDisch - * 0x02, Vrf1Disch * 0x04, Vaux1Disch * 0x08, Vaux2Disch * 0x10, Vaux3Disch @@ -820,15 +772,13 @@ static struct ab8500_reg_init ab8500_reg_init[] = { * 0x40, VTVoutDisch * 0x80, VaudioDisch */ - REG_INIT(AB8500_REGUCTRLDISCH, 0x04, 0x43, 0xff), + REG_INIT(AB8500_REGUCTRLDISCH, 0x04, 0x43, 0xfc), /* - * 0x01, VsimDisch * 0x02, VanaDisch * 0x04, VdmicPullDownEna - * 0x08, VpllPullDownEna * 0x10, VdmicDisch */ - REG_INIT(AB8500_REGUCTRLDISCH2, 0x04, 0x44, 0x1f), + REG_INIT(AB8500_REGUCTRLDISCH2, 0x04, 0x44, 0x16), }; static int ab8500_regulator_init_registers(struct platform_device *pdev, diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 3a8e02687f7b..26792ff360be 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -61,7 +61,6 @@ struct ab8500_regulator_reg_init { /* AB8500 registers */ enum ab8500_regulator_reg { - AB8500_REGUREQUESTCTRL1, AB8500_REGUREQUESTCTRL2, AB8500_REGUREQUESTCTRL3, AB8500_REGUREQUESTCTRL4, @@ -78,22 +77,11 @@ enum ab8500_regulator_reg { AB8500_REGUMISC1, AB8500_VAUDIOSUPPLY, AB8500_REGUCTRL1VAMIC, - AB8500_VSMPS1REGU, - AB8500_VSMPS2REGU, - AB8500_VSMPS3REGU, /* NOTE! PRCMU register */ AB8500_VPLLVANAREGU, AB8500_VREFDDR, AB8500_EXTSUPPLYREGU, AB8500_VAUX12REGU, AB8500_VRF1VAUX3REGU, - AB8500_VSMPS1SEL1, - AB8500_VSMPS1SEL2, - AB8500_VSMPS1SEL3, - AB8500_VSMPS2SEL1, - AB8500_VSMPS2SEL2, - AB8500_VSMPS2SEL3, - AB8500_VSMPS3SEL1, /* NOTE! PRCMU register */ - AB8500_VSMPS3SEL2, /* NOTE! PRCMU register */ AB8500_VAUX1SEL, AB8500_VAUX2SEL, AB8500_VRF1VAUX3SEL, -- cgit From fbbdb8f096e0e5d8244e1ffa46e364146ab9a440 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 27 Mar 2013 16:46:06 +0000 Subject: net: fix compile error of implicit declaration of skb_probe_transport_header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 40893fd(net: switch to use skb_probe_transport_header()) involes a new error accidently. When NET_SKBUFF_DATA_USES_OFFSE is not enabled, below compile error happens: CC net/packet/af_packet.o net/packet/af_packet.c: In function ‘packet_sendmsg_spkt’: net/packet/af_packet.c:1516:2: error: implicit declaration of function ‘skb_probe_transport_header’ [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors make[2]: *** [net/packet/af_packet.o] Error 1 make[1]: *** [net/packet] Error 2 make: *** [net] Error 2 As it seems skb_probe_transport_header() is not related to NET_SKBUFF_DATA_USES_OFFSE, we should move the definition of skb_probe_transport_header() out of scope of NET_SKBUFF_DATA_USES_OFFSE macro. Cc: Jason Wang Cc: Eric Dumazet Signed-off-by: Ying Xue Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fa88b966cb8e..878e0ee81068 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1560,19 +1560,6 @@ static inline void skb_set_transport_header(struct sk_buff *skb, skb->transport_header += offset; } -static inline void skb_probe_transport_header(struct sk_buff *skb, - const int offset_hint) -{ - struct flow_keys keys; - - if (skb_transport_header_was_set(skb)) - return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_set_transport_header(skb, offset_hint); -} - static inline unsigned char *skb_network_header(const struct sk_buff *skb) { return skb->head + skb->network_header; @@ -1716,6 +1703,19 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_probe_transport_header(struct sk_buff *skb, + const int offset_hint) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return; + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_set_transport_header(skb, offset_hint); +} + static inline void skb_mac_header_rebuild(struct sk_buff *skb) { if (skb_mac_header_was_set(skb)) { -- cgit From f3d4039242af92a9d93dee2fd9ae47066b20ca29 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 27 Mar 2013 10:52:28 +0000 Subject: tokenring: delete last holdout of CONFIG_TR Tokenring support was deleted in v3.5. One last holdout of the macro CONFIG_TR escaped that fate. Until now. Signed-off-by: Paul Bolle Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 56e3e0665272..1dbb02c98946 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -144,8 +144,6 @@ static inline bool dev_xmit_complete(int rc) # else # define LL_MAX_HEADER 96 # endif -#elif IS_ENABLED(CONFIG_TR) -# define LL_MAX_HEADER 48 #else # define LL_MAX_HEADER 32 #endif -- cgit From e5c5d22e8dcf7c2d430336cbf8e180bd38e8daf1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 28 Mar 2013 13:38:25 +0900 Subject: net: add ETH_P_802_3_MIN Add a new constant ETH_P_802_3_MIN, the minimum ethernet type for an 802.3 frame. Frames with a lower value in the ethernet type field are Ethernet II. Also update all the users of this value that David Miller and I could find to use the new constant. Also correct a bug in util.c. The comparison with ETH_P_802_3_MIN should be >= not >. As suggested by Jesse Gross. Compile tested only. Cc: David Miller Cc: Jesse Gross Cc: Karsten Keil Cc: John W. Linville Cc: Johannes Berg Cc: Bart De Schuymer Cc: Stephen Hemminger Cc: Patrick McHardy Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Cc: linux-bluetooth@vger.kernel.org Cc: netfilter-devel@vger.kernel.org Cc: bridge@lists.linux-foundation.org Cc: linux-wireless@vger.kernel.org Cc: linux1394-devel@lists.sourceforge.net Cc: linux-media@vger.kernel.org Cc: netdev@vger.kernel.org Cc: dev@openvswitch.org Acked-by: Mauro Carvalho Chehab Acked-by: Stefan Richter Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/firewire/net.c | 2 +- drivers/isdn/i4l/isdn_net.c | 2 +- drivers/media/dvb-core/dvb_net.c | 10 +++++----- drivers/net/ethernet/sun/niu.c | 2 +- drivers/net/plip/plip.c | 2 +- drivers/net/wireless/ray_cs.c | 2 +- include/linux/if_vlan.h | 2 +- include/uapi/linux/if_ether.h | 3 +++ net/atm/lec.h | 2 +- net/bluetooth/bnep/netdev.c | 2 +- net/bridge/netfilter/ebtables.c | 2 +- net/ethernet/eth.c | 2 +- net/mac80211/tx.c | 2 +- net/openvswitch/datapath.c | 2 +- net/openvswitch/flow.c | 6 +++--- net/wireless/util.c | 2 +- 16 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 56796330a162..4d565365e476 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -547,7 +547,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, if (memcmp(eth->h_dest, net->dev_addr, net->addr_len)) skb->pkt_type = PACKET_OTHERHOST; } - if (ntohs(eth->h_proto) >= 1536) { + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) { protocol = eth->h_proto; } else { rawp = (u16 *)skb->data; diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index babc621a07fb..88d657dff474 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -1385,7 +1385,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev) if (memcmp(eth->h_dest, dev->dev_addr, ETH_ALEN)) skb->pkt_type = PACKET_OTHERHOST; } - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 44225b186f6d..83a23afb13ab 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -185,7 +185,7 @@ static __be16 dvb_net_eth_type_trans(struct sk_buff *skb, skb->pkt_type=PACKET_MULTICAST; } - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; @@ -228,9 +228,9 @@ static int ule_test_sndu( struct dvb_net_priv *p ) static int ule_bridged_sndu( struct dvb_net_priv *p ) { struct ethhdr *hdr = (struct ethhdr*) p->ule_next_hdr; - if(ntohs(hdr->h_proto) < 1536) { + if(ntohs(hdr->h_proto) < ETH_P_802_3_MIN) { int framelen = p->ule_sndu_len - ((p->ule_next_hdr+sizeof(struct ethhdr)) - p->ule_skb->data); - /* A frame Type < 1536 for a bridged frame, introduces a LLC Length field. */ + /* A frame Type < ETH_P_802_3_MIN for a bridged frame, introduces a LLC Length field. */ if(framelen != ntohs(hdr->h_proto)) { return -1; } @@ -320,7 +320,7 @@ static int handle_ule_extensions( struct dvb_net_priv *p ) (int) p->ule_sndu_type, l, total_ext_len); #endif - } while (p->ule_sndu_type < 1536); + } while (p->ule_sndu_type < ETH_P_802_3_MIN); return total_ext_len; } @@ -712,7 +712,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len ) } /* Handle ULE Extension Headers. */ - if (priv->ule_sndu_type < 1536) { + if (priv->ule_sndu_type < ETH_P_802_3_MIN) { /* There is an extension header. Handle it accordingly. */ int l = handle_ule_extensions(priv); if (l < 0) { diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index e4c1c88e4c2a..95cff98d8a34 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6618,7 +6618,7 @@ static u64 niu_compute_tx_flags(struct sk_buff *skb, struct ethhdr *ehdr, (len << TXHDR_LEN_SHIFT) | ((l3off / 2) << TXHDR_L3START_SHIFT) | (ihl << TXHDR_IHL_SHIFT) | - ((eth_proto_inner < 1536) ? TXHDR_LLC : 0) | + ((eth_proto_inner < ETH_P_802_3_MIN) ? TXHDR_LLC : 0) | ((eth_proto == ETH_P_8021Q) ? TXHDR_VLAN : 0) | (ipv6 ? TXHDR_IP_VER : 0) | csum_bits); diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index bed62d9c53c8..1f7bef90b467 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -560,7 +560,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev) * so don't forget to remove it. */ - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 4775b5d172d5..ebada812b3a5 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -953,7 +953,7 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, unsigned char *data, int len) { __be16 proto = ((struct ethhdr *)data)->h_proto; - if (ntohs(proto) >= 1536) { /* DIX II ethernet frame */ + if (ntohs(proto) >= ETH_P_802_3_MIN) { /* DIX II ethernet frame */ pr_debug("ray_cs translate_frame DIX II\n"); /* Copy LLC header to card buffer */ memcpy_toio(&ptx->var, eth2_llc, sizeof(eth2_llc)); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 218a3b686d90..70962f3fdb79 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -339,7 +339,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= 1536) { + if (ntohs(proto) >= ETH_P_802_3_MIN) { skb->protocol = proto; return; } diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 798032d01112..ade07f1c491a 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -94,6 +94,9 @@ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value + * then the frame is Ethernet II. Else it is 802.3 */ + /* * Non DIX types. Won't clash for 1500 types. */ diff --git a/net/atm/lec.h b/net/atm/lec.h index a86aff9a3c04..4149db1b7885 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -58,7 +58,7 @@ struct lane2_ops { * field in h_type field. Data follows immediately after header. * 2. LLC Data frames whose total length, including LLC field and data, * but not padding required to meet the minimum data frame length, - * is less than 1536(0x0600) MUST be encoded by placing that length + * is less than ETH_P_802_3_MIN MUST be encoded by placing that length * in the h_type field. The LLC field follows header immediately. * 3. LLC data frames longer than this maximum MUST be encoded by placing * the value 0 in the h_type field. diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index e58c8b32589c..4b488ec26105 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb) struct ethhdr *eh = (void *) skb->data; u16 proto = ntohs(eh->h_proto); - if (proto >= 1536) + if (proto >= ETH_P_802_3_MIN) return proto; if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8d493c91a562..3d110c4fc787 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO)) + if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && FWINV2(e->ethproto != ethproto, EBT_IPROTO)) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a36c85eab5b4..5359560926bc 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (netdev_uses_trailer_tags(dev)) return htons(ETH_P_TRAILER); - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; /* diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8914d2d2881a..4e8a86163fc7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2085,7 +2085,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype >= 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d61cd9971808..8759265a3e46 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -681,7 +681,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) /* Normally, setting the skb 'protocol' field would be handled by a * call to eth_type_trans(), but it assumes there's a sending * device, which we may not have. */ - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) packet->protocol = eth->h_proto; else packet->protocol = htons(ETH_P_802_2); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fe0e4215c73d..332486839347 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); - if (ntohs(proto) >= 1536) + if (ntohs(proto) >= ETH_P_802_3_MIN) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) @@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) __skb_pull(skb, sizeof(struct llc_snap_hdr)); - if (ntohs(llc->ethertype) >= 1536) + if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) return llc->ethertype; return htons(ETH_P_802_2); @@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (ntohs(swkey->eth.type) < 1536) + if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) return -EINVAL; attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); } else { diff --git a/net/wireless/util.c b/net/wireless/util.c index 37a56ee1e1ed..6cbac99ae03d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype > 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; -- cgit From 3d5a96582303e28c48699f3faaf920ef7d43e6f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Mar 2013 15:38:50 +0100 Subject: clocksource: make CLOCKSOURCE_OF_DECLARE type safe This ensures that a function pointer passed into CLOCKSOURCE_OF_DECLARE takes the same arguments that we use for calling that function later. Also fix the extraneous semicolon at end of the CLOCKSOURCE_OF_DECLARE definition. Signed-off-by: Arnd Bergmann Acked-by: Rob Herring --- drivers/clocksource/clksrc-of.c | 3 ++- drivers/clocksource/vt8500_timer.c | 2 +- include/linux/clocksource.h | 11 +++++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c index 3ef11fba781c..37f5325bec95 100644 --- a/drivers/clocksource/clksrc-of.c +++ b/drivers/clocksource/clksrc-of.c @@ -16,6 +16,7 @@ #include #include +#include extern struct of_device_id __clksrc_of_table[]; @@ -26,7 +27,7 @@ void __init clocksource_of_init(void) { struct device_node *np; const struct of_device_id *match; - void (*init_func)(struct device_node *); + clocksource_of_init_fn init_func; for_each_matching_node_and_match(np, __clksrc_of_table, &match) { init_func = match->data; diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c index 242255285597..64f553f04fa4 100644 --- a/drivers/clocksource/vt8500_timer.c +++ b/drivers/clocksource/vt8500_timer.c @@ -165,4 +165,4 @@ static void __init vt8500_timer_init(struct device_node *np) 4, 0xf0000000); } -CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init) +CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 08ed5e19d8c6..192d6d1771ee 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -332,16 +332,23 @@ extern int clocksource_mmio_init(void __iomem *, const char *, extern int clocksource_i8253_init(void); +struct device_node; +typedef void(*clocksource_of_init_fn)(struct device_node *); #ifdef CONFIG_CLKSRC_OF extern void clocksource_of_init(void); #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ static const struct of_device_id __clksrc_of_table_##name \ __used __section(__clksrc_of_table) \ - = { .compatible = compat, .data = fn }; + = { .compatible = compat, \ + .data = (fn == (clocksource_of_init_fn)NULL) ? fn : fn } #else static inline void clocksource_of_init(void) {} -#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) +#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ + static const struct of_device_id __clksrc_of_table_##name \ + __attribute__((unused)) \ + = { .compatible = compat, \ + .data = (fn == (clocksource_of_init_fn)NULL) ? fn : fn } #endif #endif /* _LINUX_CLOCKSOURCE_H */ -- cgit From b949be5857a4033e00fed67b707774f52619ce60 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Wed, 27 Mar 2013 14:08:33 +0100 Subject: idr: document exit conditions on idr_for_each_entry better And some manual common subexpression elimination which may help the compiler produce smaller code. Signed-off-by: George Spelvin Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- include/linux/idr.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 2640c7e99e51..6ece0583362a 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -122,11 +122,13 @@ static inline void *idr_find(struct idr *idr, int id) * @idp: idr handle * @entry: the type * to use as cursor * @id: id entry's key + * + * @entry and @id do not need to be initialized before the loop, and + * after normal terminatinon @entry is left with the value NULL. This + * is convenient for a "not found" value. */ -#define idr_for_each_entry(idp, entry, id) \ - for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ - entry != NULL; \ - ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) /* * Don't use the following functions. These exist only to suppress -- cgit From 2bd5ed5d6713594eb2b4d234d01217d506279c7d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:40 +0100 Subject: drbd: Fix disconnect to keep the peer disk state if connection breaks during operation The issue was that if the connection broke while we did the gracefull state change to C_DISCONNECTING (C_TEARDOWN), then we returned a success code from the state engine. (SS_CW_NO_NEED) The result of that is that we missed to call the fence-peer script in such a case. Fixed that by introducing a new error code (SS_OUTDATE_WO_CONN). This one should never reach back into user space. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 7 +++++-- drivers/block/drbd/drbd_state.c | 14 +++++++------- drivers/block/drbd/drbd_strings.c | 1 + include/linux/drbd.h | 3 ++- 4 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 56bafdcd943e..39e9a91a8f31 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2198,8 +2198,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for return SS_SUCCESS; case SS_PRIMARY_NOP: /* Our state checking code wants to see the peer outdated. */ - rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED), CS_VERBOSE); + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0); + + if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */ + rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_VERBOSE); + break; case SS_CW_FAILED_BY_PEER: /* The peer probably wants to see us outdated. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 22e259f34370..90c5be2b1d30 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -642,6 +642,10 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_t && os.conn < C_WF_REPORT_PARAMS) rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ + if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED && + os.conn < C_CONNECTED && os.pdsk > D_OUTDATED) + rv = SS_OUTDATE_WO_CONN; + return rv; } @@ -1748,13 +1752,9 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) return SS_CW_FAILED_BY_PEER; - rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; - - if (rv == SS_UNKNOWN_ERROR) - rv = conn_is_valid_transition(tconn, mask, val, 0); - - if (rv == SS_SUCCESS) - rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ + rv = conn_is_valid_transition(tconn, mask, val, 0); + if (rv == SS_SUCCESS && tconn->cstate == C_WF_REPORT_PARAMS) + rv = SS_UNKNOWN_ERROR; /* continue waiting */ return rv; } diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 9a664bd27404..58e08ff2b2ce 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = { [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", + [-SS_OUTDATE_WO_CONN] = "Need a connection for a graceful disconnect/outdate peer", [-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config", }; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0c5a18ec322c..316330705fd7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -319,7 +319,8 @@ enum drbd_state_rv { SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ SS_O_VOL_PEER_PRI = -20, - SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */ + SS_OUTDATE_WO_CONN = -21, + SS_AFTER_LAST_ERROR = -22, /* Keep this at bottom */ }; /* from drbd_strings.c */ -- cgit From 3990e04df085e0561ab34f84731dc5929585c526 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:48 +0100 Subject: drbd: use sched_setscheduler() It was unnoticed for some time that assigning to current->policy is no longer sufficient to set a real time priority for a kernel thread. Reported-by: Charlie Suffin Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 6 ++++-- include/linux/drbd.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a75c0b134856..0f449bbf0edf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5257,9 +5257,11 @@ int drbd_asender(struct drbd_thread *thi) bool ping_timeout_active = false; struct net_conf *nc; int ping_timeo, tcp_cork, ping_int; + struct sched_param param = { .sched_priority = 2 }; - current->policy = SCHED_RR; /* Make this a realtime task! */ - current->rt_priority = 2; /* more important than all other tasks */ + rv = sched_setscheduler(current, SCHED_RR, ¶m); + if (rv < 0) + conn_err(tconn, "drbd_asender: ERROR set priority, ret=%d\n", rv); while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 316330705fd7..1b4d4ee1168f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -52,7 +52,7 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.4.2" +#define REL_VERSION "8.4.3" #define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- cgit From 84ebc10294a3d7be4c66f51070b7aedbaa24de9b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 27 Mar 2013 16:14:46 -0400 Subject: USB: remove CONFIG_USB_SUSPEND option This patch (as1675) removes the CONFIG_USB_SUSPEND option, essentially replacing it everywhere with CONFIG_PM_RUNTIME (except for one place in hub.c, where it is replaced with CONFIG_PM because the code needs to be used in both runtime and system PM). The net result is code shrinkage and simplification. There's very little point in keeping CONFIG_USB_SUSPEND because almost everybody enables it. The few that don't will find that the usbcore module has gotten somewhat bigger and they will have to take active measures if they want to prevent hubs from being runtime suspended. Signed-off-by: Alan Stern CC: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/Kconfig | 16 ---------------- drivers/usb/core/driver.c | 4 ++-- drivers/usb/core/hcd.c | 10 +++++----- drivers/usb/core/hub.c | 42 +++++++----------------------------------- drivers/usb/core/port.c | 4 ++-- drivers/usb/core/sysfs.c | 4 ++-- drivers/usb/core/usb.c | 4 ++-- drivers/usb/core/usb.h | 2 +- drivers/usb/host/ehci-pci.c | 12 +----------- drivers/usb/host/ohci-hub.c | 6 ------ drivers/usb/host/sl811-hcd.c | 2 +- drivers/usb/host/u132-hcd.c | 9 +++++---- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci.c | 4 ++-- include/linux/usb.h | 2 +- include/linux/usb/hcd.h | 6 +++--- 16 files changed, 35 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig index f70c1a1694ad..175701a2dae4 100644 --- a/drivers/usb/core/Kconfig +++ b/drivers/usb/core/Kconfig @@ -38,22 +38,6 @@ config USB_DYNAMIC_MINORS If you are unsure about this, say N here. -config USB_SUSPEND - bool "USB runtime power management (autosuspend) and wakeup" - depends on USB && PM_RUNTIME - help - If you say Y here, you can use driver calls or the sysfs - "power/control" file to enable or disable autosuspend for - individual USB peripherals (see - Documentation/usb/power-management.txt for more details). - - Also, USB "remote wakeup" signaling is supported, whereby some - USB devices (like keyboards and network adapters) can wake up - their parent hub. That wakeup cascades up the USB tree, and - could wake the system from states like suspend-to-RAM. - - If you are unsure about this, say N here. - config USB_OTG bool "OTG support" depends on USB diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index eb1d00a3543a..84d2b0585810 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1412,7 +1412,7 @@ int usb_resume(struct device *dev, pm_message_t msg) #endif /* CONFIG_PM */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME /** * usb_enable_autosuspend - allow a USB device to be autosuspended @@ -1780,7 +1780,7 @@ int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) return ret; } -#endif /* CONFIG_USB_SUSPEND */ +#endif /* CONFIG_PM_RUNTIME */ struct bus_type usb_bus_type = { .name = "usb", diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index f9ec44cbb82f..d53547d2e4c7 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2125,7 +2125,7 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg) #endif /* CONFIG_PM */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME /* Workqueue routine for root-hub remote wakeup */ static void hcd_resume_work(struct work_struct *work) @@ -2160,7 +2160,7 @@ void usb_hcd_resume_root_hub (struct usb_hcd *hcd) } EXPORT_SYMBOL_GPL(usb_hcd_resume_root_hub); -#endif /* CONFIG_USB_SUSPEND */ +#endif /* CONFIG_PM_RUNTIME */ /*-------------------------------------------------------------------------*/ @@ -2336,7 +2336,7 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, init_timer(&hcd->rh_timer); hcd->rh_timer.function = rh_timer_func; hcd->rh_timer.data = (unsigned long) hcd; -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME INIT_WORK(&hcd->wakeup_work, hcd_resume_work); #endif @@ -2590,7 +2590,7 @@ error_create_attr_group: hcd->rh_registered = 0; spin_unlock_irq(&hcd_root_hub_lock); -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME cancel_work_sync(&hcd->wakeup_work); #endif mutex_lock(&usb_bus_list_lock); @@ -2645,7 +2645,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME cancel_work_sync(&hcd->wakeup_work); #endif diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 443d5cc9330b..feef9351463d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2827,7 +2827,7 @@ void usb_enable_ltm(struct usb_device *udev) } EXPORT_SYMBOL_GPL(usb_enable_ltm); -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM /* * usb_disable_function_remotewakeup - disable usb3.0 * device's function remote wakeup @@ -2886,7 +2886,7 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev) * Linux (2.6) currently has NO mechanisms to initiate that: no khubd * timer, no SRP, no requests through sysfs. * - * If CONFIG_USB_SUSPEND isn't enabled, non-SuperSpeed devices really get + * If Runtime PM isn't enabled or used, non-SuperSpeed devices really get * suspended only when their bus goes into global suspend (i.e., the root * hub is suspended). Nevertheless, we change @udev->state to * USB_STATE_SUSPENDED as this is the device's "logical" state. The actual @@ -3247,6 +3247,10 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) return status; } +#endif /* CONFIG_PM */ + +#ifdef CONFIG_PM_RUNTIME + /* caller has locked udev */ int usb_remote_wakeup(struct usb_device *udev) { @@ -3263,38 +3267,6 @@ int usb_remote_wakeup(struct usb_device *udev) return status; } -#else /* CONFIG_USB_SUSPEND */ - -/* When CONFIG_USB_SUSPEND isn't set, we never suspend or resume any ports. */ - -int usb_port_suspend(struct usb_device *udev, pm_message_t msg) -{ - return 0; -} - -/* However we may need to do a reset-resume */ - -int usb_port_resume(struct usb_device *udev, pm_message_t msg) -{ - struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); - int port1 = udev->portnum; - int status; - u16 portchange, portstatus; - - status = hub_port_status(hub, port1, &portstatus, &portchange); - status = check_port_resume_type(udev, - hub, port1, status, portchange, portstatus); - - if (status) { - dev_dbg(&udev->dev, "can't resume, status %d\n", status); - hub_port_logical_disconnect(hub, port1); - } else if (udev->reset_resume) { - dev_dbg(&udev->dev, "reset-resume\n"); - status = usb_reset_and_verify_device(udev); - } - return status; -} - #endif static int check_ports_changed(struct usb_hub *hub) @@ -4356,7 +4328,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, if (portstatus & USB_PORT_STAT_ENABLE) { status = 0; /* Nothing to do */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME } else if (udev->state == USB_STATE_SUSPENDED && udev->persist_enabled) { /* For a suspended device, treat this as a diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 797f9d514732..06c4894bf181 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -71,7 +71,7 @@ static void usb_port_device_release(struct device *dev) kfree(port_dev); } -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME static int usb_port_runtime_resume(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); @@ -139,7 +139,7 @@ static int usb_port_runtime_suspend(struct device *dev) #endif static const struct dev_pm_ops usb_port_pm_ops = { -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME .runtime_suspend = usb_port_runtime_suspend, .runtime_resume = usb_port_runtime_resume, .runtime_idle = pm_generic_runtime_idle, diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 3f81a3dc6867..aa38db44818a 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -338,7 +338,7 @@ static void remove_persist_attributes(struct device *dev) #endif /* CONFIG_PM */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME static ssize_t show_connected_duration(struct device *dev, struct device_attribute *attr, @@ -544,7 +544,7 @@ static void remove_power_attributes(struct device *dev) #define add_power_attributes(dev) 0 #define remove_power_attributes(dev) do {} while (0) -#endif /* CONFIG_USB_SUSPEND */ +#endif /* CONFIG_PM_RUNTIME */ /* Descriptor fields */ diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index f81b92572735..03eb7ae8fc1a 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -49,7 +49,7 @@ const char *usbcore_name = "usbcore"; static bool nousb; /* Disable USB when built into kernel image */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME static int usb_autosuspend_delay = 2; /* Default delay value, * in seconds */ module_param_named(autosuspend, usb_autosuspend_delay, int, 0644); @@ -307,7 +307,7 @@ static const struct dev_pm_ops usb_device_pm_ops = { .thaw = usb_dev_thaw, .poweroff = usb_dev_poweroff, .restore = usb_dev_restore, -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME .runtime_suspend = usb_runtime_suspend, .runtime_resume = usb_runtime_resume, .runtime_idle = usb_runtime_idle, diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index a7f20bde0e5e..823857767a16 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -93,7 +93,7 @@ static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg) #endif -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME extern void usb_autosuspend_device(struct usb_device *udev); extern int usb_autoresume_device(struct usb_device *udev); diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 170b9399e09f..a573d5ff9adc 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -292,17 +292,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd) } } -#ifdef CONFIG_USB_SUSPEND - /* REVISIT: the controller works fine for wakeup iff the root hub - * itself is "globally" suspended, but usbcore currently doesn't - * understand such things. - * - * System suspend currently expects to be able to suspend the entire - * device tree, device-at-a-time. If we failed selective suspend - * reports, system suspend would fail; so the root hub code must claim - * success. That's lying to usbcore, and it matters for runtime - * PM scenarios with selective suspend and remote wakeup... - */ +#ifdef CONFIG_PM_RUNTIME if (ehci->no_selective_suspend && device_can_wakeup(&pdev->dev)) ehci_warn(ehci, "selective suspend/wakeup unavailable\n"); #endif diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index db09dae7b557..60ff4220e8b4 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -580,14 +580,8 @@ static int ohci_start_port_reset (struct usb_hcd *hcd, unsigned port) /* See usb 7.1.7.5: root hubs must issue at least 50 msec reset signaling, * not necessarily continuous ... to guard against resume signaling. - * The short timeout is safe for non-root hubs, and is backward-compatible - * with earlier Linux hosts. */ -#ifdef CONFIG_USB_SUSPEND #define PORT_RESET_MSEC 50 -#else -#define PORT_RESET_MSEC 10 -#endif /* this timer value might be vendor-specific ... */ #define PORT_RESET_HW_MSEC 10 diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index d62f0404baaa..15ed7e8d887f 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -1755,7 +1755,7 @@ sl811h_probe(struct platform_device *dev) /* for this device there's no useful distinction between the controller * and its root hub, except that the root hub only gets direct PM calls - * when CONFIG_USB_SUSPEND is enabled. + * when CONFIG_PM_RUNTIME is enabled. */ static int diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c index 5efdffe32365..5c124bf5d018 100644 --- a/drivers/usb/host/u132-hcd.c +++ b/drivers/usb/host/u132-hcd.c @@ -3141,10 +3141,11 @@ static int u132_probe(struct platform_device *pdev) #ifdef CONFIG_PM -/* for this device there's no useful distinction between the controller -* and its root hub, except that the root hub only gets direct PM calls -* when CONFIG_USB_SUSPEND is enabled. -*/ +/* + * for this device there's no useful distinction between the controller + * and its root hub, except that the root hub only gets direct PM calls + * when CONFIG_PM_RUNTIME is enabled. + */ static int u132_suspend(struct platform_device *pdev, pm_message_t state) { struct usb_hcd *hcd = platform_get_drvdata(pdev); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 68914429482f..187a3ec1069a 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1075,7 +1075,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd) set_bit(port_index, &bus_state->bus_suspended); } /* USB core sets remote wake mask for USB 3.0 hubs, - * including the USB 3.0 roothub, but only if CONFIG_USB_SUSPEND + * including the USB 3.0 roothub, but only if CONFIG_PM_RUNTIME * is enabled, so also enable remote wake here. */ if (hcd->self.root_hub->do_remote_wakeup) { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 53b8f89a0b1c..5156b720a53a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3801,7 +3801,7 @@ int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1) return raw_port; } -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME /* BESL to HIRD Encoding array for USB2 LPM */ static int xhci_besl_encoding[16] = {125, 150, 200, 300, 400, 500, 1000, 2000, @@ -4051,7 +4051,7 @@ int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev) return 0; } -#endif /* CONFIG_USB_SUSPEND */ +#endif /* CONFIG_PM_RUNTIME */ /*---------------------- USB 3.0 Link PM functions ------------------------*/ diff --git a/include/linux/usb.h b/include/linux/usb.h index 8d4bc173d66a..a0bee5a28d1a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -617,7 +617,7 @@ static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index) #endif /* USB autosuspend and autoresume */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME extern void usb_enable_autosuspend(struct usb_device *udev); extern void usb_disable_autosuspend(struct usb_device *udev); diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 59694b5e5e90..f5f5c7dfda90 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -84,7 +84,7 @@ struct usb_hcd { struct timer_list rh_timer; /* drives root-hub polling */ struct urb *status_urb; /* the current status urb */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME struct work_struct wakeup_work; /* for remote wakeup */ #endif @@ -593,14 +593,14 @@ extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg); extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg); #endif /* CONFIG_PM */ -#ifdef CONFIG_USB_SUSPEND +#ifdef CONFIG_PM_RUNTIME extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd); #else static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd) { return; } -#endif /* CONFIG_USB_SUSPEND */ +#endif /* CONFIG_PM_RUNTIME */ /*-------------------------------------------------------------------------*/ -- cgit From 5d0f6131a79adfa1fb51309c5f81a2a4ef879dd4 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 4 Mar 2013 18:40:58 -0700 Subject: NVMe: Add nvme-scsi.c Translates SCSI commands in SG_IO ioctl to NVMe commands. Uses the scsi-nvme translation spec from nvmexpress.org as reference. Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/Makefile | 2 +- drivers/block/nvme-core.c | 37 +- drivers/block/nvme-scsi.c | 2941 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 35 + 4 files changed, 2997 insertions(+), 18 deletions(-) create mode 100644 drivers/block/nvme-scsi.c (limited to 'include/linux') diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 2a41c86d3ad9..ca07399a8d99 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -42,5 +42,5 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ -nvme-y := nvme-core.o +nvme-y := nvme-core.o nvme-scsi.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index d0cfb85d5582..a89f7dbefba0 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -39,7 +39,7 @@ #include #include #include - +#include #include #define NVME_Q_DEPTH 1024 @@ -224,12 +224,12 @@ static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid, return ctx; } -static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) +struct nvme_queue *get_nvmeq(struct nvme_dev *dev) { return dev->queues[get_cpu() + 1]; } -static void put_nvmeq(struct nvme_queue *nvmeq) +void put_nvmeq(struct nvme_queue *nvmeq) { put_cpu(); } @@ -290,7 +290,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) return iod; } -static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) +void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) { const int last_prp = PAGE_SIZE / 8 - 1; int i; @@ -339,9 +339,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, } /* length is in bytes. gfp flags indicates whether we may sleep. */ -static int nvme_setup_prps(struct nvme_dev *dev, - struct nvme_common_command *cmd, struct nvme_iod *iod, - int total_len, gfp_t gfp) +int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, + struct nvme_iod *iod, int total_len, gfp_t gfp) { struct dma_pool *pool; int length = total_len; @@ -512,7 +511,7 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; } -static int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) +int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) { int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, special_completion, NVME_IO_TIMEOUT); @@ -715,8 +714,8 @@ static void sync_completion(struct nvme_dev *dev, void *ctx, * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, - struct nvme_command *cmd, u32 *result, unsigned timeout) +int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + u32 *result, unsigned timeout) { int cmdid; struct sync_cmd_info cmdinfo; @@ -745,7 +744,7 @@ static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, return cmdinfo.status; } -static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, +int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, u32 *result) { return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); @@ -818,7 +817,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); } -static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, +int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, dma_addr_t dma_addr) { struct nvme_command c; @@ -832,7 +831,7 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, return nvme_submit_admin_cmd(dev, &c, NULL); } -static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result) { struct nvme_command c; @@ -846,8 +845,8 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, return nvme_submit_admin_cmd(dev, &c, result); } -static int nvme_set_features(struct nvme_dev *dev, unsigned fid, - unsigned dword11, dma_addr_t dma_addr, u32 *result) +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result) { struct nvme_command c; @@ -1065,7 +1064,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, +struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length) { int i, err, count, nents, offset; @@ -1121,7 +1120,7 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, return ERR_PTR(err); } -static void nvme_unmap_user_pages(struct nvme_dev *dev, int write, +void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod) { int i; @@ -1257,6 +1256,10 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, return nvme_user_admin_cmd(ns->dev, (void __user *)arg); case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, (void __user *)arg); + case SG_GET_VERSION_NUM: + return nvme_sg_get_version_num((void __user *)arg); + case SG_IO: + return nvme_sg_io(ns, (void __user *)arg); default: return -ENOTTY; } diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c new file mode 100644 index 000000000000..483af3585c92 --- /dev/null +++ b/drivers/block/nvme-scsi.c @@ -0,0 +1,2941 @@ +/* + * NVM Express device driver + * Copyright (c) 2011, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * Refer to the SCSI-NVMe Translation spec for details on how + * each command is translated. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static int sg_version_num = 30534; /* 2 digits for each component */ + +#define SNTI_TRANSLATION_SUCCESS 0 +#define SNTI_INTERNAL_ERROR 1 + +/* VPD Page Codes */ +#define VPD_SUPPORTED_PAGES 0x00 +#define VPD_SERIAL_NUMBER 0x80 +#define VPD_DEVICE_IDENTIFIERS 0x83 +#define VPD_EXTENDED_INQUIRY 0x86 +#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1 + +/* CDB offsets */ +#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET 6 +#define REPORT_LUNS_SR_OFFSET 2 +#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET 10 +#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET 4 +#define REQUEST_SENSE_DESC_OFFSET 1 +#define REQUEST_SENSE_DESC_MASK 0x01 +#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE 1 +#define INQUIRY_EVPD_BYTE_OFFSET 1 +#define INQUIRY_PAGE_CODE_BYTE_OFFSET 2 +#define INQUIRY_EVPD_BIT_MASK 1 +#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET 3 +#define START_STOP_UNIT_CDB_IMMED_OFFSET 1 +#define START_STOP_UNIT_CDB_IMMED_MASK 0x1 +#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET 3 +#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK 0xF +#define START_STOP_UNIT_CDB_POWER_COND_OFFSET 4 +#define START_STOP_UNIT_CDB_POWER_COND_MASK 0xF0 +#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET 4 +#define START_STOP_UNIT_CDB_NO_FLUSH_MASK 0x4 +#define START_STOP_UNIT_CDB_START_OFFSET 4 +#define START_STOP_UNIT_CDB_START_MASK 0x1 +#define WRITE_BUFFER_CDB_MODE_OFFSET 1 +#define WRITE_BUFFER_CDB_MODE_MASK 0x1F +#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET 2 +#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET 3 +#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET 6 +#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET 1 +#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK 0xC0 +#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT 6 +#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET 1 +#define FORMAT_UNIT_CDB_LONG_LIST_MASK 0x20 +#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET 1 +#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK 0x10 +#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4 +#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8 +#define FORMAT_UNIT_PROT_INT_OFFSET 3 +#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0 +#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07 + +/* Misc. defines */ +#define NIBBLE_SHIFT 4 +#define FIXED_SENSE_DATA 0x70 +#define DESC_FORMAT_SENSE_DATA 0x72 +#define FIXED_SENSE_DATA_ADD_LENGTH 10 +#define LUN_ENTRY_SIZE 8 +#define LUN_DATA_HEADER_SIZE 8 +#define ALL_LUNS_RETURNED 0x02 +#define ALL_WELL_KNOWN_LUNS_RETURNED 0x01 +#define RESTRICTED_LUNS_RETURNED 0x00 +#define NVME_POWER_STATE_START_VALID 0x00 +#define NVME_POWER_STATE_ACTIVE 0x01 +#define NVME_POWER_STATE_IDLE 0x02 +#define NVME_POWER_STATE_STANDBY 0x03 +#define NVME_POWER_STATE_LU_CONTROL 0x07 +#define POWER_STATE_0 0 +#define POWER_STATE_1 1 +#define POWER_STATE_2 2 +#define POWER_STATE_3 3 +#define DOWNLOAD_SAVE_ACTIVATE 0x05 +#define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E +#define ACTIVATE_DEFERRED_MICROCODE 0x0F +#define FORMAT_UNIT_IMMED_MASK 0x2 +#define FORMAT_UNIT_IMMED_OFFSET 1 +#define KELVIN_TEMP_FACTOR 273 +#define FIXED_FMT_SENSE_DATA_SIZE 18 +#define DESC_FMT_SENSE_DATA_SIZE 8 + +/* SCSI/NVMe defines and bit masks */ +#define INQ_STANDARD_INQUIRY_PAGE 0x00 +#define INQ_SUPPORTED_VPD_PAGES_PAGE 0x00 +#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80 +#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83 +#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86 +#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1 +#define INQ_SERIAL_NUMBER_LENGTH 0x14 +#define INQ_NUM_SUPPORTED_VPD_PAGES 5 +#define VERSION_SPC_4 0x06 +#define ACA_UNSUPPORTED 0 +#define STANDARD_INQUIRY_LENGTH 36 +#define ADDITIONAL_STD_INQ_LENGTH 31 +#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C +#define RESERVED_FIELD 0 + +/* SCSI READ/WRITE Defines */ +#define IO_CDB_WP_MASK 0xE0 +#define IO_CDB_WP_SHIFT 5 +#define IO_CDB_FUA_MASK 0x8 +#define IO_6_CDB_LBA_OFFSET 0 +#define IO_6_CDB_LBA_MASK 0x001FFFFF +#define IO_6_CDB_TX_LEN_OFFSET 4 +#define IO_6_DEFAULT_TX_LEN 256 +#define IO_10_CDB_LBA_OFFSET 2 +#define IO_10_CDB_TX_LEN_OFFSET 7 +#define IO_10_CDB_WP_OFFSET 1 +#define IO_10_CDB_FUA_OFFSET 1 +#define IO_12_CDB_LBA_OFFSET 2 +#define IO_12_CDB_TX_LEN_OFFSET 6 +#define IO_12_CDB_WP_OFFSET 1 +#define IO_12_CDB_FUA_OFFSET 1 +#define IO_16_CDB_FUA_OFFSET 1 +#define IO_16_CDB_WP_OFFSET 1 +#define IO_16_CDB_LBA_OFFSET 2 +#define IO_16_CDB_TX_LEN_OFFSET 10 + +/* Mode Sense/Select defines */ +#define MODE_PAGE_INFO_EXCEP 0x1C +#define MODE_PAGE_CACHING 0x08 +#define MODE_PAGE_CONTROL 0x0A +#define MODE_PAGE_POWER_CONDITION 0x1A +#define MODE_PAGE_RETURN_ALL 0x3F +#define MODE_PAGE_BLK_DES_LEN 0x08 +#define MODE_PAGE_LLBAA_BLK_DES_LEN 0x10 +#define MODE_PAGE_CACHING_LEN 0x14 +#define MODE_PAGE_CONTROL_LEN 0x0C +#define MODE_PAGE_POW_CND_LEN 0x28 +#define MODE_PAGE_INF_EXC_LEN 0x0C +#define MODE_PAGE_ALL_LEN 0x54 +#define MODE_SENSE6_MPH_SIZE 4 +#define MODE_SENSE6_ALLOC_LEN_OFFSET 4 +#define MODE_SENSE_PAGE_CONTROL_OFFSET 2 +#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0 +#define MODE_SENSE_PAGE_CODE_OFFSET 2 +#define MODE_SENSE_PAGE_CODE_MASK 0x3F +#define MODE_SENSE_LLBAA_OFFSET 1 +#define MODE_SENSE_LLBAA_MASK 0x10 +#define MODE_SENSE_LLBAA_SHIFT 4 +#define MODE_SENSE_DBD_OFFSET 1 +#define MODE_SENSE_DBD_MASK 8 +#define MODE_SENSE_DBD_SHIFT 3 +#define MODE_SENSE10_MPH_SIZE 8 +#define MODE_SENSE10_ALLOC_LEN_OFFSET 7 +#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET 1 +#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET 1 +#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET 4 +#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET 7 +#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10 +#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1 +#define MODE_SELECT_6_BD_OFFSET 3 +#define MODE_SELECT_10_BD_OFFSET 6 +#define MODE_SELECT_10_LLBAA_OFFSET 4 +#define MODE_SELECT_10_LLBAA_MASK 1 +#define MODE_SELECT_6_MPH_SIZE 4 +#define MODE_SELECT_10_MPH_SIZE 8 +#define CACHING_MODE_PAGE_WCE_MASK 0x04 +#define MODE_SENSE_BLK_DESC_ENABLED 0 +#define MODE_SENSE_BLK_DESC_COUNT 1 +#define MODE_SELECT_PAGE_CODE_MASK 0x3F +#define SHORT_DESC_BLOCK 8 +#define LONG_DESC_BLOCK 16 +#define MODE_PAGE_POW_CND_LEN_FIELD 0x26 +#define MODE_PAGE_INF_EXC_LEN_FIELD 0x0A +#define MODE_PAGE_CACHING_LEN_FIELD 0x12 +#define MODE_PAGE_CONTROL_LEN_FIELD 0x0A +#define MODE_SENSE_PC_CURRENT_VALUES 0 + +/* Log Sense defines */ +#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE 0x00 +#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07 +#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F +#define LOG_PAGE_TEMPERATURE_PAGE 0x0D +#define LOG_SENSE_CDB_SP_OFFSET 1 +#define LOG_SENSE_CDB_SP_NOT_ENABLED 0 +#define LOG_SENSE_CDB_PC_OFFSET 2 +#define LOG_SENSE_CDB_PC_MASK 0xC0 +#define LOG_SENSE_CDB_PC_SHIFT 6 +#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1 +#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F +#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET 7 +#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8 +#define LOG_INFO_EXCP_PAGE_LENGTH 0xC +#define REMAINING_TEMP_PAGE_LENGTH 0xC +#define LOG_TEMP_PAGE_LENGTH 0x10 +#define LOG_TEMP_UNKNOWN 0xFF +#define SUPPORTED_LOG_PAGES_PAGE_LENGTH 0x3 + +/* Read Capacity defines */ +#define READ_CAP_10_RESP_SIZE 8 +#define READ_CAP_16_RESP_SIZE 32 + +/* NVMe Namespace and Command Defines */ +#define NVME_GET_SMART_LOG_PAGE 0x02 +#define NVME_GET_FEAT_TEMP_THRESH 0x04 +#define BYTES_TO_DWORDS 4 +#define NVME_MAX_FIRMWARE_SLOT 7 + +/* Report LUNs defines */ +#define REPORT_LUNS_FIRST_LUN_OFFSET 8 + +/* SCSI ADDITIONAL SENSE Codes */ + +#define SCSI_ASC_NO_SENSE 0x00 +#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT 0x03 +#define SCSI_ASC_LUN_NOT_READY 0x04 +#define SCSI_ASC_WARNING 0x0B +#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED 0x10 +#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED 0x10 +#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED 0x10 +#define SCSI_ASC_UNRECOVERED_READ_ERROR 0x11 +#define SCSI_ASC_MISCOMPARE_DURING_VERIFY 0x1D +#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID 0x20 +#define SCSI_ASC_ILLEGAL_COMMAND 0x20 +#define SCSI_ASC_ILLEGAL_BLOCK 0x21 +#define SCSI_ASC_INVALID_CDB 0x24 +#define SCSI_ASC_INVALID_LUN 0x25 +#define SCSI_ASC_INVALID_PARAMETER 0x26 +#define SCSI_ASC_FORMAT_COMMAND_FAILED 0x31 +#define SCSI_ASC_INTERNAL_TARGET_FAILURE 0x44 + +/* SCSI ADDITIONAL SENSE Code Qualifiers */ + +#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE 0x00 +#define SCSI_ASCQ_FORMAT_COMMAND_FAILED 0x01 +#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED 0x01 +#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED 0x02 +#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED 0x03 +#define SCSI_ASCQ_FORMAT_IN_PROGRESS 0x04 +#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08 +#define SCSI_ASCQ_INVALID_LUN_ID 0x09 + +/** + * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to + * enable DPOFUA support type 0x10 value. + */ +#define DEVICE_SPECIFIC_PARAMETER 0 +#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR) + +/* MACROs to extract information from CDBs */ + +#define GET_OPCODE(cdb) cdb[0] + +#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0) + +#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0)) + +#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \ +(cdb[index + 1] << 8) | \ +(cdb[index + 2] << 0)) + +#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \ +(cdb[index + 1] << 16) | \ +(cdb[index + 2] << 8) | \ +(cdb[index + 3] << 0)) + +#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \ +(((u64)cdb[index + 1]) << 48) | \ +(((u64)cdb[index + 2]) << 40) | \ +(((u64)cdb[index + 3]) << 32) | \ +(((u64)cdb[index + 4]) << 24) | \ +(((u64)cdb[index + 5]) << 16) | \ +(((u64)cdb[index + 6]) << 8) | \ +(((u64)cdb[index + 7]) << 0)) + +/* Inquiry Helper Macros */ +#define GET_INQ_EVPD_BIT(cdb) \ +((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) & \ +INQUIRY_EVPD_BIT_MASK) ? 1 : 0) + +#define GET_INQ_PAGE_CODE(cdb) \ +(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET)) + +#define GET_INQ_ALLOC_LENGTH(cdb) \ +(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET)) + +/* Report LUNs Helper Macros */ +#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb) \ +(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET)) + +/* Read Capacity Helper Macros */ +#define GET_READ_CAP_16_ALLOC_LENGTH(cdb) \ +(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET)) + +#define IS_READ_CAP_16(cdb) \ +((cdb[0] == SERVICE_ACTION_IN && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0) + +/* Request Sense Helper Macros */ +#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb) \ +(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET)) + +/* Mode Sense Helper Macros */ +#define GET_MODE_SENSE_DBD(cdb) \ +((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >> \ +MODE_SENSE_DBD_SHIFT) + +#define GET_MODE_SENSE_LLBAA(cdb) \ +((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) & \ +MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT) + +#define GET_MODE_SENSE_MPH_SIZE(cdb10) \ +(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE) + + +/* Struct to gather data that needs to be extracted from a SCSI CDB. + Not conforming to any particular CDB variant, but compatible with all. */ + +struct nvme_trans_io_cdb { + u8 fua; + u8 prot_info; + u64 lba; + u32 xfer_len; +}; + + +/* Internal Helper Functions */ + + +/* Copy data to userspace memory */ + +static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, + unsigned long n) +{ + int res = SNTI_TRANSLATION_SUCCESS; + unsigned long not_copied; + int i; + void *index = from; + size_t remaining = n; + size_t xfer_len; + + if (hdr->iovec_count > 0) { + struct sg_iovec *sgl = hdr->dxferp; + + for (i = 0; i < hdr->iovec_count; i++) { + xfer_len = min(remaining, sgl[i].iov_len); + not_copied = copy_to_user(__user sgl[i].iov_base, index, + xfer_len); + if (not_copied) { + res = -EFAULT; + break; + } + index += xfer_len; + remaining -= xfer_len; + if (remaining == 0) + break; + } + return res; + } + not_copied = copy_to_user(__user hdr->dxferp, from, n); + if (not_copied) + res = -EFAULT; + return res; +} + +/* Copy data from userspace memory */ + +static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, + unsigned long n) +{ + int res = SNTI_TRANSLATION_SUCCESS; + unsigned long not_copied; + int i; + void *index = to; + size_t remaining = n; + size_t xfer_len; + + if (hdr->iovec_count > 0) { + struct sg_iovec *sgl = hdr->dxferp; + + for (i = 0; i < hdr->iovec_count; i++) { + xfer_len = min(remaining, sgl[i].iov_len); + not_copied = copy_from_user(index, + __user sgl[i].iov_base, xfer_len); + if (not_copied) { + res = -EFAULT; + break; + } + index += xfer_len; + remaining -= xfer_len; + if (remaining == 0) + break; + } + return res; + } + + not_copied = copy_from_user(to, __user hdr->dxferp, n); + if (not_copied) + res = -EFAULT; + return res; +} + +/* Status/Sense Buffer Writeback */ + +static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key, + u8 asc, u8 ascq) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 xfer_len; + u8 resp[DESC_FMT_SENSE_DATA_SIZE]; + + if (scsi_status_is_good(status)) { + hdr->status = SAM_STAT_GOOD; + hdr->masked_status = GOOD; + hdr->host_status = DID_OK; + hdr->driver_status = DRIVER_OK; + hdr->sb_len_wr = 0; + } else { + hdr->status = status; + hdr->masked_status = status >> 1; + hdr->host_status = DID_OK; + hdr->driver_status = DRIVER_OK; + + memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE); + resp[0] = DESC_FORMAT_SENSE_DATA; + resp[1] = sense_key; + resp[2] = asc; + resp[3] = ascq; + + xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE); + hdr->sb_len_wr = xfer_len; + if (copy_to_user(__user hdr->sbp, resp, xfer_len) > 0) + res = -EFAULT; + } + + return res; +} + +static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc) +{ + u8 status, sense_key, asc, ascq; + int res = SNTI_TRANSLATION_SUCCESS; + + /* For non-nvme (Linux) errors, simply return the error code */ + if (nvme_sc < 0) + return nvme_sc; + + /* Mask DNR, More, and reserved fields */ + nvme_sc &= 0x7FF; + + switch (nvme_sc) { + /* Generic Command Status */ + case NVME_SC_SUCCESS: + status = SAM_STAT_GOOD; + sense_key = NO_SENSE; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_INVALID_OPCODE: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ILLEGAL_COMMAND; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_INVALID_FIELD: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_INVALID_CDB; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_DATA_XFER_ERROR: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_POWER_LOSS: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_WARNING; + ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED; + break; + case NVME_SC_INTERNAL: + status = SAM_STAT_CHECK_CONDITION; + sense_key = HARDWARE_ERROR; + asc = SCSI_ASC_INTERNAL_TARGET_FAILURE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_ABORT_REQ: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_ABORT_QUEUE: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_FUSED_FAIL: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_FUSED_MISSING: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_INVALID_NS: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID; + ascq = SCSI_ASCQ_INVALID_LUN_ID; + break; + case NVME_SC_LBA_RANGE: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ILLEGAL_BLOCK; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_CAP_EXCEEDED: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_NS_NOT_READY: + status = SAM_STAT_CHECK_CONDITION; + sense_key = NOT_READY; + asc = SCSI_ASC_LUN_NOT_READY; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + + /* Command Specific Status */ + case NVME_SC_INVALID_FORMAT: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_FORMAT_COMMAND_FAILED; + ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED; + break; + case NVME_SC_BAD_ATTRIBUTES: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_INVALID_CDB; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + + /* Media Errors */ + case NVME_SC_WRITE_FAULT: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_READ_ERROR: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_UNRECOVERED_READ_ERROR; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_GUARD_CHECK: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED; + ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED; + break; + case NVME_SC_APPTAG_CHECK: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED; + ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED; + break; + case NVME_SC_REFTAG_CHECK: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED; + ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED; + break; + case NVME_SC_COMPARE_FAILED: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MISCOMPARE; + asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_ACCESS_DENIED: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID; + ascq = SCSI_ASCQ_INVALID_LUN_ID; + break; + + /* Unspecified/Default */ + case NVME_SC_CMDID_CONFLICT: + case NVME_SC_CMD_SEQ_ERROR: + case NVME_SC_CQ_INVALID: + case NVME_SC_QID_INVALID: + case NVME_SC_QUEUE_SIZE: + case NVME_SC_ABORT_LIMIT: + case NVME_SC_ABORT_MISSING: + case NVME_SC_ASYNC_LIMIT: + case NVME_SC_FIRMWARE_SLOT: + case NVME_SC_FIRMWARE_IMAGE: + case NVME_SC_INVALID_VECTOR: + case NVME_SC_INVALID_LOG_PAGE: + default: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + } + + res = nvme_trans_completion(hdr, status, sense_key, asc, ascq); + + return res; +} + +/* INQUIRY Helper Functions */ + +static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *inq_response, + int alloc_len) +{ + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + int xfer_len; + u8 resp_data_format = 0x02; + u8 protect; + u8 cmdque = 0x01 << 1; + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* nvme ns identify - use DPS value for PROTECT field */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + /* + * If nvme_sc was -ve, res will be -ve here. + * If nvme_sc was +ve, the status would bace been translated, and res + * can only be 0 or -ve. + * - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc + * - If -ve, return because its a Linux error. + */ + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ns = mem; + (id_ns->dps) ? (protect = 0x01) : (protect = 0); + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[2] = VERSION_SPC_4; + inq_response[3] = resp_data_format; /*normaca=0 | hisup=0 */ + inq_response[4] = ADDITIONAL_STD_INQ_LENGTH; + inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */ + inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */ + strncpy(&inq_response[8], "NVMe ", 8); + strncpy(&inq_response[16], dev->model, 16); + strncpy(&inq_response[32], dev->firmware_rev, 4); + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + out_free: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out_dma: + return res; +} + +static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *inq_response, + int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE; /* Page Code */ + inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES; /* Page Length */ + inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE; + inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE; + inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE; + inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE; + inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE; + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + return res; +} + +static int nvme_trans_unit_serial_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *inq_response, + int alloc_len) +{ + struct nvme_dev *dev = ns->dev; + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */ + inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */ + strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH); + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + return res; +} + +static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *inq_response, int alloc_len) +{ + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + u8 ieee[4]; + int xfer_len; + u32 tmp_id = cpu_to_be64(ns->ns_id); + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* nvme controller identify */ + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ctrl = mem; + + /* Since SCSI tried to save 4 bits... [SPC-4(r34) Table 591] */ + ieee[0] = id_ctrl->ieee[0] << 4; + ieee[1] = id_ctrl->ieee[0] >> 4 | id_ctrl->ieee[1] << 4; + ieee[2] = id_ctrl->ieee[1] >> 4 | id_ctrl->ieee[2] << 4; + ieee[3] = id_ctrl->ieee[2] >> 4; + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */ + inq_response[3] = 20; /* Page Length */ + /* Designation Descriptor start */ + inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */ + inq_response[5] = 0x03; /* PIV=0b | Asso=00b | Designator Type=3h */ + inq_response[6] = 0x00; /* Rsvd */ + inq_response[7] = 16; /* Designator Length */ + /* Designator start */ + inq_response[8] = 0x60 | ieee[3]; /* NAA=6h | IEEE ID MSB, High nibble*/ + inq_response[9] = ieee[2]; /* IEEE ID */ + inq_response[10] = ieee[1]; /* IEEE ID */ + inq_response[11] = ieee[0]; /* IEEE ID| Vendor Specific ID... */ + inq_response[12] = (dev->pci_dev->vendor & 0xFF00) >> 8; + inq_response[13] = (dev->pci_dev->vendor & 0x00FF); + inq_response[14] = dev->serial[0]; + inq_response[15] = dev->serial[1]; + inq_response[16] = dev->model[0]; + inq_response[17] = dev->model[1]; + memcpy(&inq_response[18], &tmp_id, sizeof(u32)); + /* Last 2 bytes are zero */ + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + out_free: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out_dma: + return res; +} + +static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + u8 *inq_response; + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + struct nvme_id_ns *id_ns; + int xfer_len; + u8 microcode = 0x80; + u8 spt; + u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7}; + u8 grd_chk, app_chk, ref_chk, protect; + u8 uask_sup = 0x20; + u8 v_sup; + u8 luiclr = 0x01; + + inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); + if (inq_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ns = mem; + spt = spt_lut[(id_ns->dpc) & 0x07] << 3; + (id_ns->dps) ? (protect = 0x01) : (protect = 0); + grd_chk = protect << 2; + app_chk = protect << 1; + ref_chk = protect; + + /* nvme controller identify */ + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ctrl = mem; + v_sup = id_ctrl->vwc; + + memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */ + inq_response[2] = 0x00; /* Page Length MSB */ + inq_response[3] = 0x3C; /* Page Length LSB */ + inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk; + inq_response[5] = uask_sup; + inq_response[6] = v_sup; + inq_response[7] = luiclr; + inq_response[8] = 0; + inq_response[9] = 0; + + xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + out_free: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out_dma: + kfree(inq_response); + out_mem: + return res; +} + +static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + u8 *inq_response; + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + + inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); + if (inq_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + + memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ + inq_response[2] = 0x00; /* Page Length MSB */ + inq_response[3] = 0x3C; /* Page Length LSB */ + inq_response[4] = 0x00; /* Medium Rotation Rate MSB */ + inq_response[5] = 0x01; /* Medium Rotation Rate LSB */ + inq_response[6] = 0x00; /* Form Factor */ + + xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + kfree(inq_response); + out_mem: + return res; +} + +/* LOG SENSE Helper Functions */ + +static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *log_response; + + log_response = kmalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); + if (log_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(log_response, 0, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH); + + log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; + /* Subpage=0x00, Page Length MSB=0 */ + log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH; + log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; + log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE; + log_response[6] = LOG_PAGE_TEMPERATURE_PAGE; + + xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH); + res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); + + kfree(log_response); + out_mem: + return res; +} + +static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, + struct sg_io_hdr *hdr, int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *log_response; + struct nvme_command c; + struct nvme_dev *dev = ns->dev; + struct nvme_smart_log *smart_log; + dma_addr_t dma_addr; + void *mem; + u8 temp_c; + u16 temp_k; + + log_response = kmalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); + if (log_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(log_response, 0, LOG_INFO_EXCP_PAGE_LENGTH); + + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_smart_log), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* Get SMART Log Page */ + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(0xFFFFFFFF); + c.common.prp1 = cpu_to_le64(dma_addr); + c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + res = nvme_submit_admin_cmd(dev, &c, NULL); + if (res != NVME_SC_SUCCESS) { + temp_c = LOG_TEMP_UNKNOWN; + } else { + smart_log = mem; + temp_k = (smart_log->temperature[1] << 8) + + (smart_log->temperature[0]); + temp_c = temp_k - KELVIN_TEMP_FACTOR; + } + + log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE; + /* Subpage=0x00, Page Length MSB=0 */ + log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH; + /* Informational Exceptions Log Parameter 1 Start */ + /* Parameter Code=0x0000 bytes 4,5 */ + log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */ + log_response[7] = 0x04; /* PARAMETER LENGTH */ + /* Add sense Code and qualifier = 0x00 each */ + /* Use Temperature from NVMe Get Log Page, convert to C from K */ + log_response[10] = temp_c; + + xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); + + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + mem, dma_addr); + out_dma: + kfree(log_response); + out_mem: + return res; +} + +static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *log_response; + struct nvme_command c; + struct nvme_dev *dev = ns->dev; + struct nvme_smart_log *smart_log; + dma_addr_t dma_addr; + void *mem; + u32 feature_resp; + u8 temp_c_cur, temp_c_thresh; + u16 temp_k; + + log_response = kmalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); + if (log_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(log_response, 0, LOG_TEMP_PAGE_LENGTH); + + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_smart_log), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* Get SMART Log Page */ + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(0xFFFFFFFF); + c.common.prp1 = cpu_to_le64(dma_addr); + c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + res = nvme_submit_admin_cmd(dev, &c, NULL); + if (res != NVME_SC_SUCCESS) { + temp_c_cur = LOG_TEMP_UNKNOWN; + } else { + smart_log = mem; + temp_k = (smart_log->temperature[1] << 8) + + (smart_log->temperature[0]); + temp_c_cur = temp_k - KELVIN_TEMP_FACTOR; + } + + /* Get Features for Temp Threshold */ + res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0, + &feature_resp); + if (res != NVME_SC_SUCCESS) + temp_c_thresh = LOG_TEMP_UNKNOWN; + else + temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR; + + log_response[0] = LOG_PAGE_TEMPERATURE_PAGE; + /* Subpage=0x00, Page Length MSB=0 */ + log_response[3] = REMAINING_TEMP_PAGE_LENGTH; + /* Temperature Log Parameter 1 (Temperature) Start */ + /* Parameter Code = 0x0000 */ + log_response[6] = 0x01; /* Format and Linking = 01b */ + log_response[7] = 0x02; /* Parameter Length */ + /* Use Temperature from NVMe Get Log Page, convert to C from K */ + log_response[9] = temp_c_cur; + /* Temperature Log Parameter 2 (Reference Temperature) Start */ + log_response[11] = 0x01; /* Parameter Code = 0x0001 */ + log_response[12] = 0x01; /* Format and Linking = 01b */ + log_response[13] = 0x02; /* Parameter Length */ + /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */ + log_response[15] = temp_c_thresh; + + xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); + + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + mem, dma_addr); + out_dma: + kfree(log_response); + out_mem: + return res; +} + +/* MODE SENSE Helper Functions */ + +static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa, + u16 mode_data_length, u16 blk_desc_len) +{ + /* Quick check to make sure I don't stomp on my own memory... */ + if ((cdb10 && len < 8) || (!cdb10 && len < 4)) + return SNTI_INTERNAL_ERROR; + + if (cdb10) { + resp[0] = (mode_data_length & 0xFF00) >> 8; + resp[1] = (mode_data_length & 0x00FF); + /* resp[2] and [3] are zero */ + resp[4] = llbaa; + resp[5] = RESERVED_FIELD; + resp[6] = (blk_desc_len & 0xFF00) >> 8; + resp[7] = (blk_desc_len & 0x00FF); + } else { + resp[0] = (mode_data_length & 0x00FF); + /* resp[1] and [2] are zero */ + resp[3] = (blk_desc_len & 0x00FF); + } + + return SNTI_TRANSLATION_SUCCESS; +} + +static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *resp, int len, u8 llbaa) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 flbas; + u32 lba_length; + + if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN) + return SNTI_INTERNAL_ERROR; + else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) + return SNTI_INTERNAL_ERROR; + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + flbas = (id_ns->flbas) & 0x0F; + lba_length = (1 << (id_ns->lbaf[flbas].ds)); + + if (llbaa == 0) { + u32 tmp_cap = cpu_to_be32(id_ns->ncap); + /* Byte 4 is reserved */ + u32 tmp_len = cpu_to_be32(lba_length) & 0x00FFFFFF; + + memcpy(resp, &tmp_cap, sizeof(u32)); + memcpy(&resp[4], &tmp_len, sizeof(u32)); + } else { + u64 tmp_cap = cpu_to_be64(id_ns->ncap); + u32 tmp_len = cpu_to_be32(lba_length); + + memcpy(resp, &tmp_cap, sizeof(u64)); + /* Bytes 8, 9, 10, 11 are reserved */ + memcpy(&resp[12], &tmp_len, sizeof(u32)); + } + + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out: + return res; +} + +static int nvme_trans_fill_control_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *resp, + int len) +{ + if (len < MODE_PAGE_CONTROL_LEN) + return SNTI_INTERNAL_ERROR; + + resp[0] = MODE_PAGE_CONTROL; + resp[1] = MODE_PAGE_CONTROL_LEN_FIELD; + resp[2] = 0x0E; /* TST=000b, TMF_ONLY=0, DPICZ=1, + * D_SENSE=1, GLTSD=1, RLEC=0 */ + resp[3] = 0x12; /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */ + /* Byte 4: VS=0, RAC=0, UA_INT=0, SWP=0 */ + resp[5] = 0x40; /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */ + /* resp[6] and [7] are obsolete, thus zero */ + resp[8] = 0xFF; /* Busy timeout period = 0xffff */ + resp[9] = 0xFF; + /* Bytes 10,11: Extended selftest completion time = 0x0000 */ + + return SNTI_TRANSLATION_SUCCESS; +} + +static int nvme_trans_fill_caching_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, + u8 *resp, int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + u32 feature_resp; + u8 vwc; + + if (len < MODE_PAGE_CACHING_LEN) + return SNTI_INTERNAL_ERROR; + + nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0, + &feature_resp); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out; + if (nvme_sc) { + res = nvme_sc; + goto out; + } + vwc = feature_resp & 0x00000001; + + resp[0] = MODE_PAGE_CACHING; + resp[1] = MODE_PAGE_CACHING_LEN_FIELD; + resp[2] = vwc << 2; + + out: + return res; +} + +static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *resp, + int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + + if (len < MODE_PAGE_POW_CND_LEN) + return SNTI_INTERNAL_ERROR; + + resp[0] = MODE_PAGE_POWER_CONDITION; + resp[1] = MODE_PAGE_POW_CND_LEN_FIELD; + /* All other bytes are zero */ + + return res; +} + +static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *resp, + int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + + if (len < MODE_PAGE_INF_EXC_LEN) + return SNTI_INTERNAL_ERROR; + + resp[0] = MODE_PAGE_INFO_EXCEP; + resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD; + resp[2] = 0x88; + /* All other bytes are zero */ + + return res; +} + +static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *resp, int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u16 mode_pages_offset_1 = 0; + u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4; + + mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN; + mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN; + mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN; + + res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1], + MODE_PAGE_CACHING_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2], + MODE_PAGE_CONTROL_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3], + MODE_PAGE_POW_CND_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4], + MODE_PAGE_INF_EXC_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + + out: + return res; +} + +static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa) +{ + if (dbd == MODE_SENSE_BLK_DESC_ENABLED) { + /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */ + return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT; + } else { + return 0; + } +} + +static int nvme_trans_mode_page_create(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *cmd, + u16 alloc_len, u8 cdb10, + int (*mode_page_fill_func) + (struct nvme_ns *, + struct sg_io_hdr *hdr, u8 *, int), + u16 mode_pages_tot_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *response; + u8 dbd, llbaa; + u16 resp_size; + int mph_size; + u16 mode_pages_offset_1; + u16 blk_desc_len, blk_desc_offset, mode_data_length; + + dbd = GET_MODE_SENSE_DBD(cmd); + llbaa = GET_MODE_SENSE_LLBAA(cmd); + mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10); + blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa); + + resp_size = mph_size + blk_desc_len + mode_pages_tot_len; + /* Refer spc4r34 Table 440 for calculation of Mode data Length field */ + mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len; + + blk_desc_offset = mph_size; + mode_pages_offset_1 = blk_desc_offset + blk_desc_len; + + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(response, 0, resp_size); + + res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, + llbaa, mode_data_length, blk_desc_len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_free; + if (blk_desc_len > 0) { + res = nvme_trans_fill_blk_desc(ns, hdr, + &response[blk_desc_offset], + blk_desc_len, llbaa); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_free; + } + res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1], + mode_pages_tot_len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_free; + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + out_free: + kfree(response); + out_mem: + return res; +} + +/* Read Capacity Helper Functions */ + +static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns, + u8 cdb16) +{ + u8 flbas; + u32 lba_length; + u64 rlba; + u8 prot_en; + u8 p_type_lut[4] = {0, 0, 1, 2}; + u64 tmp_rlba; + u32 tmp_rlba_32; + u32 tmp_len; + + flbas = (id_ns->flbas) & 0x0F; + lba_length = (1 << (id_ns->lbaf[flbas].ds)); + rlba = le64_to_cpup(&id_ns->nsze) - 1; + (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0); + + if (!cdb16) { + if (rlba > 0xFFFFFFFF) + rlba = 0xFFFFFFFF; + tmp_rlba_32 = cpu_to_be32(rlba); + tmp_len = cpu_to_be32(lba_length); + memcpy(response, &tmp_rlba_32, sizeof(u32)); + memcpy(&response[4], &tmp_len, sizeof(u32)); + } else { + tmp_rlba = cpu_to_be64(rlba); + tmp_len = cpu_to_be32(lba_length); + memcpy(response, &tmp_rlba, sizeof(u64)); + memcpy(&response[8], &tmp_len, sizeof(u32)); + response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en; + /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */ + /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */ + /* Bytes 16-31 - Reserved */ + } +} + +/* Start Stop Unit Helper Functions */ + +static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 pc, u8 pcmod, u8 start) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + int lowest_pow_st; /* max npss = lowest power consumption */ + unsigned ps_desired = 0; + + /* NVMe Controller Identify */ + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_id_ctrl), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ctrl = mem; + lowest_pow_st = id_ctrl->npss - 1; + + switch (pc) { + case NVME_POWER_STATE_START_VALID: + /* Action unspecified if POWER CONDITION MODIFIER != 0 */ + if (pcmod == 0 && start == 0x1) + ps_desired = POWER_STATE_0; + if (pcmod == 0 && start == 0x0) + ps_desired = lowest_pow_st; + break; + case NVME_POWER_STATE_ACTIVE: + /* Action unspecified if POWER CONDITION MODIFIER != 0 */ + if (pcmod == 0) + ps_desired = POWER_STATE_0; + break; + case NVME_POWER_STATE_IDLE: + /* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */ + /* min of desired state and (lps-1) because lps is STOP */ + if (pcmod == 0x0) + ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1)); + else if (pcmod == 0x1) + ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1)); + else if (pcmod == 0x2) + ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1)); + break; + case NVME_POWER_STATE_STANDBY: + /* Action unspecified if POWER CONDITION MODIFIER != [0,1] */ + if (pcmod == 0x0) + ps_desired = max(0, (lowest_pow_st - 2)); + else if (pcmod == 0x1) + ps_desired = max(0, (lowest_pow_st - 1)); + break; + case NVME_POWER_STATE_LU_CONTROL: + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0, + NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) + res = nvme_sc; + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, + dma_addr); + out: + return res; +} + +/* Write Buffer Helper Functions */ +/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */ + +static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 opcode, u32 tot_len, u32 offset, + u8 buffer_id) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + struct nvme_command c; + struct nvme_iod *iod = NULL; + unsigned length; + + memset(&c, 0, sizeof(c)); + c.common.opcode = opcode; + if (opcode == nvme_admin_download_fw) { + if (hdr->iovec_count > 0) { + /* Assuming SGL is not allowed for this command */ + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + iod = nvme_map_user_pages(dev, DMA_TO_DEVICE, + (unsigned long)hdr->dxferp, tot_len); + if (IS_ERR(iod)) { + res = PTR_ERR(iod); + goto out; + } + length = nvme_setup_prps(dev, &c.common, iod, tot_len, + GFP_KERNEL); + if (length != tot_len) { + res = -ENOMEM; + goto out_unmap; + } + + c.dlfw.numd = (tot_len/BYTES_TO_DWORDS) - 1; + c.dlfw.offset = offset/BYTES_TO_DWORDS; + } else if (opcode == nvme_admin_activate_fw) { + c.common.cdw10[0] = buffer_id; + /* AA=01b Replace & activate at reset */ + c.common.cdw10[0] |= 0x00000008; + } + + nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_unmap; + if (nvme_sc) + res = nvme_sc; + + out_unmap: + if (opcode == nvme_admin_download_fw) { + nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); + nvme_free_iod(dev, iod); + } + out: + return res; +} + +/* Mode Select Helper Functions */ + +static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10, + u16 *bd_len, u8 *llbaa) +{ + if (cdb10) { + /* 10 Byte CDB */ + *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) + + parm_list[MODE_SELECT_10_BD_OFFSET + 1]; + *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] && + MODE_SELECT_10_LLBAA_MASK; + } else { + /* 6 Byte CDB */ + *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET]; + } +} + +static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list, + u16 idx, u16 bd_len, u8 llbaa) +{ + u16 bd_num; + + bd_num = bd_len / ((llbaa == 0) ? + SHORT_DESC_BLOCK : LONG_DESC_BLOCK); + /* Store block descriptor info if a FORMAT UNIT comes later */ + /* TODO Saving 1st BD info; what to do if multiple BD received? */ + if (llbaa == 0) { + /* Standard Block Descriptor - spc4r34 7.5.5.1 */ + ns->mode_select_num_blocks = + (parm_list[idx + 1] << 16) + + (parm_list[idx + 2] << 8) + + (parm_list[idx + 3]); + + ns->mode_select_block_len = + (parm_list[idx + 5] << 16) + + (parm_list[idx + 6] << 8) + + (parm_list[idx + 7]); + } else { + /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */ + ns->mode_select_num_blocks = + (((u64)parm_list[idx + 0]) << 56) + + (((u64)parm_list[idx + 1]) << 48) + + (((u64)parm_list[idx + 2]) << 40) + + (((u64)parm_list[idx + 3]) << 32) + + (((u64)parm_list[idx + 4]) << 24) + + (((u64)parm_list[idx + 5]) << 16) + + (((u64)parm_list[idx + 6]) << 8) + + ((u64)parm_list[idx + 7]); + + ns->mode_select_block_len = + (parm_list[idx + 12] << 24) + + (parm_list[idx + 13] << 16) + + (parm_list[idx + 14] << 8) + + (parm_list[idx + 15]); + } +} + +static u16 nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *mode_page, u8 page_code) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + unsigned dword11; + + switch (page_code) { + case MODE_PAGE_CACHING: + dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0); + nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11, + 0, NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + break; + if (nvme_sc) { + res = nvme_sc; + break; + } + break; + case MODE_PAGE_CONTROL: + break; + case MODE_PAGE_POWER_CONDITION: + /* Verify the OS is not trying to set timers */ + if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + if (!res) + res = SNTI_INTERNAL_ERROR; + break; + } + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + if (!res) + res = SNTI_INTERNAL_ERROR; + break; + } + + return res; +} + +static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd, u16 parm_list_len, u8 pf, + u8 sp, u8 cdb10) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 *parm_list; + u16 bd_len; + u8 llbaa = 0; + u16 index, saved_index; + u8 page_code; + u16 mp_size; + + /* Get parm list from data-in/out buffer */ + parm_list = kmalloc(parm_list_len, GFP_KERNEL); + if (parm_list == NULL) { + res = -ENOMEM; + goto out; + } + + res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_mem; + + nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa); + index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE); + + if (bd_len != 0) { + /* Block Descriptors present, parse */ + nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa); + index += bd_len; + } + saved_index = index; + + /* Multiple mode pages may be present; iterate through all */ + /* In 1st Iteration, don't do NVME Command, only check for CDB errors */ + do { + page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK; + mp_size = parm_list[index + 1] + 2; + if ((page_code != MODE_PAGE_CACHING) && + (page_code != MODE_PAGE_CONTROL) && + (page_code != MODE_PAGE_POWER_CONDITION)) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_mem; + } + index += mp_size; + } while (index < parm_list_len); + + /* In 2nd Iteration, do the NVME Commands */ + index = saved_index; + do { + page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK; + mp_size = parm_list[index + 1] + 2; + res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index], + page_code); + if (res != SNTI_TRANSLATION_SUCCESS) + break; + index += mp_size; + } while (index < parm_list_len); + + out_mem: + kfree(parm_list); + out: + return res; +} + +/* Format Unit Helper Functions */ + +static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, + struct sg_io_hdr *hdr) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 flbas; + + /* + * SCSI Expects a MODE SELECT would have been issued prior to + * a FORMAT UNIT, and the block size and number would be used + * from the block descriptor in it. If a MODE SELECT had not + * been issued, FORMAT shall use the current values for both. + */ + + if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + + if (ns->mode_select_num_blocks == 0) + ns->mode_select_num_blocks = id_ns->ncap; + if (ns->mode_select_block_len == 0) { + flbas = (id_ns->flbas) & 0x0F; + ns->mode_select_block_len = + (1 << (id_ns->lbaf[flbas].ds)); + } + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem, dma_addr); + } + out: + return res; +} + +static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, + u8 format_prot_info, u8 *nvme_pf_code) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 *parm_list; + u8 pf_usage, pf_code; + + parm_list = kmalloc(len, GFP_KERNEL); + if (parm_list == NULL) { + res = -ENOMEM; + goto out; + } + res = nvme_trans_copy_from_user(hdr, parm_list, len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_mem; + + if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] & + FORMAT_UNIT_IMMED_MASK) != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_mem; + } + + if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN && + (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_mem; + } + pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] & + FORMAT_UNIT_PROT_FIELD_USAGE_MASK; + pf_code = (pf_usage << 2) | format_prot_info; + switch (pf_code) { + case 0: + *nvme_pf_code = 0; + break; + case 2: + *nvme_pf_code = 1; + break; + case 3: + *nvme_pf_code = 2; + break; + case 7: + *nvme_pf_code = 3; + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out_mem: + kfree(parm_list); + out: + return res; +} + +static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 prot_info) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 i; + u8 flbas, nlbaf; + u8 selected_lbaf = 0xFF; + u32 cdw10 = 0; + struct nvme_command c; + + /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + flbas = (id_ns->flbas) & 0x0F; + nlbaf = id_ns->nlbaf; + + for (i = 0; i < nlbaf; i++) { + if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) { + selected_lbaf = i; + break; + } + } + if (selected_lbaf > 0x0F) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + if (ns->mode_select_num_blocks != id_ns->ncap) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + + cdw10 |= prot_info << 5; + cdw10 |= selected_lbaf & 0x0F; + memset(&c, 0, sizeof(c)); + c.format.opcode = nvme_admin_format_nvm; + c.format.nsid = ns->ns_id; + c.format.cdw10 = cpu_to_le32(cdw10); + + nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) + res = nvme_sc; + + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out: + return res; +} + +/* Read/Write Helper Functions */ + +static inline void nvme_trans_get_io_cdb6(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = 0; + cdb_info->prot_info = 0; + cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) & + IO_6_CDB_LBA_MASK; + cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET); + + /* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */ + if (cdb_info->xfer_len == 0) + cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN; +} + +static inline void nvme_trans_get_io_cdb10(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) & + IO_CDB_FUA_MASK; + cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) & + IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET); + cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET); +} + +static inline void nvme_trans_get_io_cdb12(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) & + IO_CDB_FUA_MASK; + cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) & + IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET); + cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET); +} + +static inline void nvme_trans_get_io_cdb16(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) & + IO_CDB_FUA_MASK; + cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) & + IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET); + cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET); +} + +static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr, + struct nvme_trans_io_cdb *cdb_info, + u32 max_blocks) +{ + /* If using iovecs, send one nvme command per vector */ + if (hdr->iovec_count > 0) + return hdr->iovec_count; + else if (cdb_info->xfer_len > max_blocks) + return ((cdb_info->xfer_len - 1) / max_blocks) + 1; + else + return 1; +} + +static u16 nvme_trans_io_get_control(struct nvme_ns *ns, + struct nvme_trans_io_cdb *cdb_info) +{ + u16 control = 0; + + /* When Protection information support is added, implement here */ + + if (cdb_info->fua > 0) + control |= NVME_RW_FUA; + + return control; +} + +static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, + struct nvme_trans_io_cdb *cdb_info, u8 is_write) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + struct nvme_queue *nvmeq = get_nvmeq(ns->dev); + u32 num_cmds; + struct nvme_iod *iod; + u64 unit_len; + u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */ + u32 retcode; + u32 i = 0; + u64 nvme_offset = 0; + void *next_mapping_addr; + struct nvme_command c; + u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); + u16 control; + u32 max_blocks = (dev->max_hw_sectors << 9) >> ns->lba_shift; + + num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); + + /* + * This loop handles two cases. + * First, when an SGL is used in the form of an iovec list: + * - Use iov_base as the next mapping address for the nvme command_id + * - Use iov_len as the data transfer length for the command. + * Second, when we have a single buffer + * - If larger than max_blocks, split into chunks, offset + * each nvme command accordingly. + */ + for (i = 0; i < num_cmds; i++) { + memset(&c, 0, sizeof(c)); + if (hdr->iovec_count > 0) { + struct sg_iovec *sgl = hdr->dxferp; + + unit_len = sgl[i].iov_len; + unit_num_blocks = unit_len >> ns->lba_shift; + next_mapping_addr = sgl[i].iov_base; + } else { + unit_num_blocks = min((u64)max_blocks, + (cdb_info->xfer_len - nvme_offset)); + unit_len = unit_num_blocks << ns->lba_shift; + next_mapping_addr = hdr->dxferp + + ((1 << ns->lba_shift) * nvme_offset); + } + + c.rw.opcode = opcode; + c.rw.nsid = cpu_to_le32(ns->ns_id); + c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset); + c.rw.length = cpu_to_le16(unit_num_blocks - 1); + control = nvme_trans_io_get_control(ns, cdb_info); + c.rw.control = cpu_to_le16(control); + + iod = nvme_map_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + (unsigned long)next_mapping_addr, unit_len); + if (IS_ERR(iod)) { + res = PTR_ERR(iod); + goto out; + } + retcode = nvme_setup_prps(dev, &c.common, iod, unit_len, + GFP_KERNEL); + if (retcode != unit_len) { + nvme_unmap_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + iod); + nvme_free_iod(dev, iod); + res = -ENOMEM; + goto out; + } + + nvme_offset += unit_num_blocks; + + nvmeq = get_nvmeq(dev); + /* + * Since nvme_submit_sync_cmd sleeps, we can't keep + * preemption disabled. We may be preempted at any + * point, and be rescheduled to a different CPU. That + * will cause cacheline bouncing, but no additional + * races since q_lock already protects against other + * CPUs. + */ + put_nvmeq(nvmeq); + nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, + NVME_IO_TIMEOUT); + if (nvme_sc != NVME_SC_SUCCESS) { + nvme_unmap_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + iod); + nvme_free_iod(dev, iod); + res = nvme_trans_status_code(hdr, nvme_sc); + goto out; + } + nvme_unmap_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + iod); + nvme_free_iod(dev, iod); + } + res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS); + + out: + return res; +} + + +/* SCSI Command Translation Functions */ + +static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + struct nvme_trans_io_cdb cdb_info; + u8 opcode = cmd[0]; + u64 xfer_bytes; + u64 sum_iov_len = 0; + struct sg_iovec *sgl; + int i; + + /* Extract Fields from CDB */ + switch (opcode) { + case WRITE_6: + case READ_6: + nvme_trans_get_io_cdb6(cmd, &cdb_info); + break; + case WRITE_10: + case READ_10: + nvme_trans_get_io_cdb10(cmd, &cdb_info); + break; + case WRITE_12: + case READ_12: + nvme_trans_get_io_cdb12(cmd, &cdb_info); + break; + case WRITE_16: + case READ_16: + nvme_trans_get_io_cdb16(cmd, &cdb_info); + break; + default: + /* Will never really reach here */ + res = SNTI_INTERNAL_ERROR; + goto out; + } + + /* Calculate total length of transfer (in bytes) */ + if (hdr->iovec_count > 0) { + sgl = hdr->dxferp; + for (i = 0; i < hdr->iovec_count; i++) { + sum_iov_len += sgl[i].iov_len; + /* IO vector sizes should be multiples of block size */ + if (sgl[i].iov_len % (1 << ns->lba_shift) != 0) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + } + } else { + sum_iov_len = hdr->dxfer_len; + } + + /* As Per sg ioctl howto, if the lengths differ, use the lower one */ + xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len); + + /* If block count and actual data buffer size dont match, error out */ + if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) { + res = -EINVAL; + goto out; + } + + /* Check for 0 length transfer - it is not illegal */ + if (cdb_info.xfer_len == 0) + goto out; + + /* Send NVMe IO Command(s) */ + res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + + out: + return res; +} + +static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 evpd; + u8 page_code; + int alloc_len; + u8 *inq_response; + + evpd = GET_INQ_EVPD_BIT(cmd); + page_code = GET_INQ_PAGE_CODE(cmd); + alloc_len = GET_INQ_ALLOC_LENGTH(cmd); + + inq_response = kmalloc(STANDARD_INQUIRY_LENGTH, GFP_KERNEL); + if (inq_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + + if (evpd == 0) { + if (page_code == INQ_STANDARD_INQUIRY_PAGE) { + res = nvme_trans_standard_inquiry_page(ns, hdr, + inq_response, alloc_len); + } else { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + } else { + switch (page_code) { + case VPD_SUPPORTED_PAGES: + res = nvme_trans_supported_vpd_pages(ns, hdr, + inq_response, alloc_len); + break; + case VPD_SERIAL_NUMBER: + res = nvme_trans_unit_serial_page(ns, hdr, inq_response, + alloc_len); + break; + case VPD_DEVICE_IDENTIFIERS: + res = nvme_trans_device_id_page(ns, hdr, inq_response, + alloc_len); + break; + case VPD_EXTENDED_INQUIRY: + res = nvme_trans_ext_inq_page(ns, hdr, alloc_len); + break; + case VPD_BLOCK_DEV_CHARACTERISTICS: + res = nvme_trans_bdev_char_page(ns, hdr, alloc_len); + break; + default: + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + } + kfree(inq_response); + out_mem: + return res; +} + +static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u16 alloc_len; + u8 sp; + u8 pc; + u8 page_code; + + sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET); + if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET); + page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK; + pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT; + if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET); + switch (page_code) { + case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE: + res = nvme_trans_log_supp_pages(ns, hdr, alloc_len); + break; + case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE: + res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len); + break; + case LOG_PAGE_TEMPERATURE_PAGE: + res = nvme_trans_log_temperature(ns, hdr, alloc_len); + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out: + return res; +} + +static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 cdb10 = 0; + u16 parm_list_len; + u8 page_format; + u8 save_pages; + + page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET); + page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK; + + save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET); + save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK; + + if (GET_OPCODE(cmd) == MODE_SELECT) { + parm_list_len = GET_U8_FROM_CDB(cmd, + MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET); + } else { + parm_list_len = GET_U16_FROM_CDB(cmd, + MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET); + cdb10 = 1; + } + + if (parm_list_len != 0) { + /* + * According to SPC-4 r24, a paramter list length field of 0 + * shall not be considered an error + */ + res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len, + page_format, save_pages, cdb10); + } + + return res; +} + +static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u16 alloc_len; + u8 cdb10 = 0; + u8 page_code; + u8 pc; + + if (GET_OPCODE(cmd) == MODE_SENSE) { + alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET); + } else { + alloc_len = GET_U16_FROM_CDB(cmd, + MODE_SENSE10_ALLOC_LEN_OFFSET); + cdb10 = 1; + } + + pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) & + MODE_SENSE_PAGE_CONTROL_MASK; + if (pc != MODE_SENSE_PC_CURRENT_VALUES) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + + page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) & + MODE_SENSE_PAGE_CODE_MASK; + switch (page_code) { + case MODE_PAGE_CACHING: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_caching_page, + MODE_PAGE_CACHING_LEN); + break; + case MODE_PAGE_CONTROL: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_control_page, + MODE_PAGE_CONTROL_LEN); + break; + case MODE_PAGE_POWER_CONDITION: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_pow_cnd_page, + MODE_PAGE_POW_CND_LEN); + break; + case MODE_PAGE_INFO_EXCEP: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_inf_exc_page, + MODE_PAGE_INF_EXC_LEN); + break; + case MODE_PAGE_RETURN_ALL: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_all_pages, + MODE_PAGE_ALL_LEN); + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out: + return res; +} + +static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + u32 alloc_len = READ_CAP_10_RESP_SIZE; + u32 resp_size = READ_CAP_10_RESP_SIZE; + u32 xfer_len; + u8 cdb16; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 *response; + + cdb16 = IS_READ_CAP_16(cmd); + if (cdb16) { + alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd); + resp_size = READ_CAP_16_RESP_SIZE; + } + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out_dma; + } + memset(response, 0, resp_size); + nvme_trans_fill_read_cap(response, id_ns, cdb16); + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + kfree(response); + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out: + return res; +} + +static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + u32 alloc_len, xfer_len, resp_size; + u8 select_report; + u8 *response; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + u32 ll_length, lun_id; + u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; + u32 tmp_len; + + alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd); + select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET); + + if ((select_report != ALL_LUNS_RETURNED) && + (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) && + (select_report != RESTRICTED_LUNS_RETURNED)) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } else { + /* NVMe Controller Identify */ + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_id_ctrl), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ctrl = mem; + ll_length = id_ctrl->nn * LUN_ENTRY_SIZE; + resp_size = ll_length + LUN_DATA_HEADER_SIZE; + + if (alloc_len < resp_size) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_dma; + } + + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out_dma; + } + memset(response, 0, resp_size); + + /* The first LUN ID will always be 0 per the SAM spec */ + for (lun_id = 0; lun_id < id_ctrl->nn; lun_id++) { + /* + * Set the LUN Id and then increment to the next LUN + * location in the parameter data. + */ + u64 tmp_id = cpu_to_be64(lun_id); + memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64)); + lun_id_offset += LUN_ENTRY_SIZE; + } + tmp_len = cpu_to_be32(ll_length); + memcpy(response, &tmp_len, sizeof(u32)); + } + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + kfree(response); + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, + dma_addr); + out: + return res; +} + +static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 alloc_len, xfer_len, resp_size; + u8 desc_format; + u8 *response; + + alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd); + desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET); + desc_format &= REQUEST_SENSE_DESC_MASK; + + resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : + (FIXED_FMT_SENSE_DATA_SIZE)); + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out; + } + memset(response, 0, resp_size); + + if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { + /* Descriptor Format Sense Data */ + response[0] = DESC_FORMAT_SENSE_DATA; + response[1] = NO_SENSE; + /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */ + response[2] = SCSI_ASC_NO_SENSE; + response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + /* SDAT_OVFL = 0 | Additional Sense Length = 0 */ + } else { + /* Fixed Format Sense Data */ + response[0] = FIXED_SENSE_DATA; + /* Byte 1 = Obsolete */ + response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */ + /* Bytes 3-6 - Information - set to zero */ + response[7] = FIXED_SENSE_DATA_ADD_LENGTH; + /* Bytes 8-11 - Cmd Specific Information - set to zero */ + response[12] = SCSI_ASC_NO_SENSE; + response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + /* Byte 14 = Field Replaceable Unit Code = 0 */ + /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */ + } + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + kfree(response); + out: + return res; +} + +static int nvme_trans_security_protocol(struct nvme_ns *ns, + struct sg_io_hdr *hdr, + u8 *cmd) +{ + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); +} + +static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_queue *nvmeq = get_nvmeq(ns->dev); + u8 immed, pcmod, pc, no_flush, start; + + immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET); + pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET); + pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET); + no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET); + start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET); + + immed &= START_STOP_UNIT_CDB_IMMED_MASK; + pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK; + pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT; + no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK; + start &= START_STOP_UNIT_CDB_START_MASK; + + if (immed != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } else { + if (no_flush == 0) { + /* Issue NVME FLUSH command prior to START STOP UNIT */ + nvme_sc = nvme_submit_flush_data(nvmeq, ns); + put_nvmeq(nvmeq); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out; + if (nvme_sc) { + res = nvme_sc; + goto out; + } + } + /* Setup the expected power state transition */ + res = nvme_trans_power_state(ns, hdr, pc, pcmod, start); + } + + out: + return res; +} + +static int nvme_trans_synchronize_cache(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_queue *nvmeq = get_nvmeq(ns->dev); + put_nvmeq(nvmeq); + nvme_sc = nvme_submit_flush_data(nvmeq, ns); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out; + if (nvme_sc) + res = nvme_sc; + + out: + return res; +} + +static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 parm_hdr_len = 0; + u8 nvme_pf_code = 0; + u8 format_prot_info, long_list, format_data; + + format_prot_info = GET_U8_FROM_CDB(cmd, + FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET); + long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET); + format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET); + + format_prot_info = (format_prot_info & + FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >> + FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT; + long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK; + format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK; + + if (format_data != 0) { + if (format_prot_info != 0) { + if (long_list == 0) + parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN; + else + parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN; + } + } else if (format_data == 0 && format_prot_info != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + + /* Get parm header from data-in/out buffer */ + /* + * According to the translation spec, the only fields in the parameter + * list we are concerned with are in the header. So allocate only that. + */ + if (parm_hdr_len > 0) { + res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len, + format_prot_info, &nvme_pf_code); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + } + + /* Attempt to activate any previously downloaded firmware image */ + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0); + + /* Determine Block size and count and send format command */ + res = nvme_trans_fmt_set_blk_size_count(ns, hdr); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + + res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code); + + out: + return res; +} + +static int nvme_trans_test_unit_ready(struct nvme_ns *ns, + struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + struct nvme_dev *dev = ns->dev; + + if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + NOT_READY, SCSI_ASC_LUN_NOT_READY, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + else + res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0); + + return res; +} + +static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u32 buffer_offset, parm_list_length; + u8 buffer_id, mode; + + parm_list_length = + GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET); + if (parm_list_length % BYTES_TO_DWORDS != 0) { + /* NVMe expects Firmware file to be a whole number of DWORDS */ + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET); + if (buffer_id > NVME_MAX_FIRMWARE_SLOT) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) & + WRITE_BUFFER_CDB_MODE_MASK; + buffer_offset = + GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET); + + switch (mode) { + case DOWNLOAD_SAVE_ACTIVATE: + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + parm_list_length, buffer_offset, + buffer_id); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, + parm_list_length, buffer_offset, + buffer_id); + break; + case DOWNLOAD_SAVE_DEFER_ACTIVATE: + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + parm_list_length, buffer_offset, + buffer_id); + break; + case ACTIVATE_DEFERRED_MICROCODE: + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, + parm_list_length, buffer_offset, + buffer_id); + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out: + return res; +} + +static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) +{ + u8 cmd[BLK_MAX_CDB]; + int retcode; + unsigned int opcode; + + if (hdr->cmdp == NULL) + return -EMSGSIZE; + if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) + return -EFAULT; + + opcode = cmd[0]; + + switch (opcode) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + retcode = nvme_trans_io(ns, hdr, 0, cmd); + break; + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + retcode = nvme_trans_io(ns, hdr, 1, cmd); + break; + case INQUIRY: + retcode = nvme_trans_inquiry(ns, hdr, cmd); + break; + case LOG_SENSE: + retcode = nvme_trans_log_sense(ns, hdr, cmd); + break; + case MODE_SELECT: + case MODE_SELECT_10: + retcode = nvme_trans_mode_select(ns, hdr, cmd); + break; + case MODE_SENSE: + case MODE_SENSE_10: + retcode = nvme_trans_mode_sense(ns, hdr, cmd); + break; + case READ_CAPACITY: + retcode = nvme_trans_read_capacity(ns, hdr, cmd); + break; + case SERVICE_ACTION_IN: + if (IS_READ_CAP_16(cmd)) + retcode = nvme_trans_read_capacity(ns, hdr, cmd); + else + goto out; + break; + case REPORT_LUNS: + retcode = nvme_trans_report_luns(ns, hdr, cmd); + break; + case REQUEST_SENSE: + retcode = nvme_trans_request_sense(ns, hdr, cmd); + break; + case SECURITY_PROTOCOL_IN: + case SECURITY_PROTOCOL_OUT: + retcode = nvme_trans_security_protocol(ns, hdr, cmd); + break; + case START_STOP: + retcode = nvme_trans_start_stop(ns, hdr, cmd); + break; + case SYNCHRONIZE_CACHE: + retcode = nvme_trans_synchronize_cache(ns, hdr, cmd); + break; + case FORMAT_UNIT: + retcode = nvme_trans_format_unit(ns, hdr, cmd); + break; + case TEST_UNIT_READY: + retcode = nvme_trans_test_unit_ready(ns, hdr, cmd); + break; + case WRITE_BUFFER: + retcode = nvme_trans_write_buffer(ns, hdr, cmd); + break; + default: + out: + retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + return retcode; +} + +int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) +{ + struct sg_io_hdr hdr; + int retcode; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&hdr, u_hdr, sizeof(hdr))) + return -EFAULT; + if (hdr.interface_id != 'S') + return -EINVAL; + if (hdr.cmd_len > BLK_MAX_CDB) + return -EINVAL; + + retcode = nvme_scsi_translate(ns, &hdr); + if (retcode < 0) + return retcode; + if (retcode > 0) + retcode = SNTI_TRANSLATION_SUCCESS; + if (copy_to_user(__user u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0) + return -EFAULT; + + return retcode; +} + +int nvme_sg_get_version_num(int __user *ip) +{ + return put_user(sg_version_num, ip); +} diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f1974cab60cf..aa575033dbe7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -546,6 +546,8 @@ struct nvme_ns { int ns_id; int lba_shift; + u64 mode_select_num_blocks; + u32 mode_select_block_len; }; /* @@ -563,6 +565,39 @@ struct nvme_iod { dma_addr_t first_dma; struct scatterlist sg[0]; }; + +/** + * nvme_free_iod - frees an nvme_iod + * @dev: The device that the I/O was submitted to + * @iod: The memory to free + */ +void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); + +int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, + struct nvme_iod *iod, int total_len, gfp_t gfp); +struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, + unsigned long addr, unsigned length); +void nvme_unmap_user_pages(struct nvme_dev *dev, int write, + struct nvme_iod *iod); +struct nvme_queue *get_nvmeq(struct nvme_dev *dev); +void put_nvmeq(struct nvme_queue *nvmeq); +int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + u32 *result, unsigned timeout); +int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); +int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, + u32 *result); +int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, + dma_addr_t dma_addr); +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result); +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result); + +struct sg_io_hdr; + +int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); +int nvme_sg_get_version_num(int __user *ip); + #endif #endif /* _LINUX_NVME_H */ -- cgit From fddddb52a6c4e2438f4514ed979183653ca0732a Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 21 Mar 2013 17:59:14 +0100 Subject: bus: introduce an Marvell EBU MBus driver The Marvell EBU SoCs have a configurable physical address space layout: the physical ranges of memory used to address PCI(e) interfaces, NOR flashes, SRAM and various other types of memory are configurable by software, through a mechanism of so-called 'address decoding windows'. This new driver mvebu-mbus consolidates the existing code to address the configuration of these memory ranges, which is spread into mach-mvebu, mach-orion5x, mach-mv78xx0, mach-dove and mach-kirkwood. Following patches convert each Marvell EBU SoC family to use this driver, therefore removing the old code that was configuring the address decoding windows. It is worth mentioning that the MVEBU_MBUS Kconfig option is intentionally added as a blind option. The new driver implements and exports the mv_mbus_dram_info() function, which is used by various Marvell drivers throughout the tree to get access to window configuration parameters that they require. This function is also implemented in arch/arm/plat-orion/addr-map.c, which ultimately gets removed at the end of this patch series. So, in order to preserve bisectability, we want to ensure that *either* this new driver, *or* the legacy code in plat-orion/addr-map.c gets compiled in. By making MVEBU_MBUS a blind option, we are sure that only a platform that does 'select MVEBU_MBUS' will get this new driver compiled in. Therefore, throughout the next patches that convert the Marvell sub-architectures one after the other to this new driver, we add the 'select MVEBU_MBUS' and also ensure to remove plat-orion/addr-map.c from the build for this specific sub-architecture. This ensures that bisectability is preserved. Ealier versions of this driver had a DT binding, but since those were not yet agreed upon, they were removed. The driver still uses of_device_id to find the SoC specific details according to the string passed to mvebu_mbus_init(). The plan is to re-introduce a proper DT binding as a followup set of patches. Signed-off-by: Thomas Petazzoni Acked-by: Arnd Bergmann Signed-off-by: Jason Cooper --- drivers/bus/Kconfig | 7 + drivers/bus/Makefile | 1 + drivers/bus/mvebu-mbus.c | 867 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mbus.h | 24 +- 4 files changed, 898 insertions(+), 1 deletion(-) create mode 100644 drivers/bus/mvebu-mbus.c (limited to 'include/linux') diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 0f51ed687dc8..b05ecab915c4 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -4,6 +4,13 @@ menu "Bus devices" +config MVEBU_MBUS + bool + depends on PLAT_ORION + help + Driver needed for the MBus configuration on Marvell EBU SoCs + (Kirkwood, Dove, Orion5x, MV78XX0 and Armada 370/XP). + config OMAP_OCP2SCP tristate "OMAP OCP2SCP DRIVER" depends on ARCH_OMAP2PLUS diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile index 45d997c85453..3c7b53c12091 100644 --- a/drivers/bus/Makefile +++ b/drivers/bus/Makefile @@ -2,6 +2,7 @@ # Makefile for the bus drivers. # +obj-$(CONFIG_MVEBU_MBUS) += mvebu-mbus.o obj-$(CONFIG_OMAP_OCP2SCP) += omap-ocp2scp.o # Interconnect bus driver for OMAP SoCs. diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c new file mode 100644 index 000000000000..586d03e29e9e --- /dev/null +++ b/drivers/bus/mvebu-mbus.c @@ -0,0 +1,867 @@ +/* + * Address map functions for Marvell EBU SoCs (Kirkwood, Armada + * 370/XP, Dove, Orion5x and MV78xx0) + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * The Marvell EBU SoCs have a configurable physical address space: + * the physical address at which certain devices (PCIe, NOR, NAND, + * etc.) sit can be configured. The configuration takes place through + * two sets of registers: + * + * - One to configure the access of the CPU to the devices. Depending + * on the families, there are between 8 and 20 configurable windows, + * each can be use to create a physical memory window that maps to a + * specific device. Devices are identified by a tuple (target, + * attribute). + * + * - One to configure the access to the CPU to the SDRAM. There are + * either 2 (for Dove) or 4 (for other families) windows to map the + * SDRAM into the physical address space. + * + * This driver: + * + * - Reads out the SDRAM address decoding windows at initialization + * time, and fills the mvebu_mbus_dram_info structure with these + * informations. The exported function mv_mbus_dram_info() allow + * device drivers to get those informations related to the SDRAM + * address decoding windows. This is because devices also have their + * own windows (configured through registers that are part of each + * device register space), and therefore the drivers for Marvell + * devices have to configure those device -> SDRAM windows to ensure + * that DMA works properly. + * + * - Provides an API for platform code or device drivers to + * dynamically add or remove address decoding windows for the CPU -> + * device accesses. This API is mvebu_mbus_add_window(), + * mvebu_mbus_add_window_remap_flags() and + * mvebu_mbus_del_window(). Since the (target, attribute) values + * differ from one SoC family to another, the API uses a 'const char + * *' string to identify devices, and this driver is responsible for + * knowing the mapping between the name of a device and its + * corresponding (target, attribute) in the current SoC family. + * + * - Provides a debugfs interface in /sys/kernel/debug/mvebu-mbus/ to + * see the list of CPU -> SDRAM windows and their configuration + * (file 'sdram') and the list of CPU -> devices windows and their + * configuration (file 'devices'). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * DDR target is the same on all platforms. + */ +#define TARGET_DDR 0 + +/* + * CPU Address Decode Windows registers + */ +#define WIN_CTRL_OFF 0x0000 +#define WIN_CTRL_ENABLE BIT(0) +#define WIN_CTRL_TGT_MASK 0xf0 +#define WIN_CTRL_TGT_SHIFT 4 +#define WIN_CTRL_ATTR_MASK 0xff00 +#define WIN_CTRL_ATTR_SHIFT 8 +#define WIN_CTRL_SIZE_MASK 0xffff0000 +#define WIN_CTRL_SIZE_SHIFT 16 +#define WIN_BASE_OFF 0x0004 +#define WIN_BASE_LOW 0xffff0000 +#define WIN_BASE_HIGH 0xf +#define WIN_REMAP_LO_OFF 0x0008 +#define WIN_REMAP_LOW 0xffff0000 +#define WIN_REMAP_HI_OFF 0x000c + +#define ATTR_HW_COHERENCY (0x1 << 4) + +#define DDR_BASE_CS_OFF(n) (0x0000 + ((n) << 3)) +#define DDR_BASE_CS_HIGH_MASK 0xf +#define DDR_BASE_CS_LOW_MASK 0xff000000 +#define DDR_SIZE_CS_OFF(n) (0x0004 + ((n) << 3)) +#define DDR_SIZE_ENABLED BIT(0) +#define DDR_SIZE_CS_MASK 0x1c +#define DDR_SIZE_CS_SHIFT 2 +#define DDR_SIZE_MASK 0xff000000 + +#define DOVE_DDR_BASE_CS_OFF(n) ((n) << 4) + +struct mvebu_mbus_mapping { + const char *name; + u8 target; + u8 attr; + u8 attrmask; +}; + +/* + * Masks used for the 'attrmask' field of mvebu_mbus_mapping. They + * allow to get the real attribute value, discarding the special bits + * used to select a PCI MEM region or a PCI WA region. This allows the + * debugfs code to reverse-match the name of a device from its + * target/attr values. + * + * For all devices except PCI, all bits of 'attr' must be + * considered. For most SoCs, only bit 3 should be ignored (it allows + * to select between PCI MEM and PCI I/O). On Orion5x however, there + * is the special bit 5 to select a PCI WA region. + */ +#define MAPDEF_NOMASK 0xff +#define MAPDEF_PCIMASK 0xf7 +#define MAPDEF_ORIONPCIMASK 0xd7 + +/* Macro used to define one mvebu_mbus_mapping entry */ +#define MAPDEF(__n, __t, __a, __m) \ + { .name = __n, .target = __t, .attr = __a, .attrmask = __m } + +struct mvebu_mbus_state; + +struct mvebu_mbus_soc_data { + unsigned int num_wins; + unsigned int num_remappable_wins; + unsigned int (*win_cfg_offset)(const int win); + void (*setup_cpu_target)(struct mvebu_mbus_state *s); + int (*show_cpu_target)(struct mvebu_mbus_state *s, + struct seq_file *seq, void *v); + const struct mvebu_mbus_mapping *map; +}; + +struct mvebu_mbus_state { + void __iomem *mbuswins_base; + void __iomem *sdramwins_base; + struct dentry *debugfs_root; + struct dentry *debugfs_sdram; + struct dentry *debugfs_devs; + const struct mvebu_mbus_soc_data *soc; + int hw_io_coherency; +}; + +static struct mvebu_mbus_state mbus_state; + +static struct mbus_dram_target_info mvebu_mbus_dram_info; +const struct mbus_dram_target_info *mv_mbus_dram_info(void) +{ + return &mvebu_mbus_dram_info; +} +EXPORT_SYMBOL_GPL(mv_mbus_dram_info); + +/* + * Functions to manipulate the address decoding windows + */ + +static void mvebu_mbus_read_window(struct mvebu_mbus_state *mbus, + int win, int *enabled, u64 *base, + u32 *size, u8 *target, u8 *attr, + u64 *remap) +{ + void __iomem *addr = mbus->mbuswins_base + + mbus->soc->win_cfg_offset(win); + u32 basereg = readl(addr + WIN_BASE_OFF); + u32 ctrlreg = readl(addr + WIN_CTRL_OFF); + + if (!(ctrlreg & WIN_CTRL_ENABLE)) { + *enabled = 0; + return; + } + + *enabled = 1; + *base = ((u64)basereg & WIN_BASE_HIGH) << 32; + *base |= (basereg & WIN_BASE_LOW); + *size = (ctrlreg | ~WIN_CTRL_SIZE_MASK) + 1; + + if (target) + *target = (ctrlreg & WIN_CTRL_TGT_MASK) >> WIN_CTRL_TGT_SHIFT; + + if (attr) + *attr = (ctrlreg & WIN_CTRL_ATTR_MASK) >> WIN_CTRL_ATTR_SHIFT; + + if (remap) { + if (win < mbus->soc->num_remappable_wins) { + u32 remap_low = readl(addr + WIN_REMAP_LO_OFF); + u32 remap_hi = readl(addr + WIN_REMAP_HI_OFF); + *remap = ((u64)remap_hi << 32) | remap_low; + } else + *remap = 0; + } +} + +static void mvebu_mbus_disable_window(struct mvebu_mbus_state *mbus, + int win) +{ + void __iomem *addr; + + addr = mbus->mbuswins_base + mbus->soc->win_cfg_offset(win); + + writel(0, addr + WIN_BASE_OFF); + writel(0, addr + WIN_CTRL_OFF); + if (win < mbus->soc->num_remappable_wins) { + writel(0, addr + WIN_REMAP_LO_OFF); + writel(0, addr + WIN_REMAP_HI_OFF); + } +} + +/* Checks whether the given window number is available */ +static int mvebu_mbus_window_is_free(struct mvebu_mbus_state *mbus, + const int win) +{ + void __iomem *addr = mbus->mbuswins_base + + mbus->soc->win_cfg_offset(win); + u32 ctrl = readl(addr + WIN_CTRL_OFF); + return !(ctrl & WIN_CTRL_ENABLE); +} + +/* + * Checks whether the given (base, base+size) area doesn't overlap an + * existing region + */ +static int mvebu_mbus_window_conflicts(struct mvebu_mbus_state *mbus, + phys_addr_t base, size_t size, + u8 target, u8 attr) +{ + u64 end = (u64)base + size; + int win; + + for (win = 0; win < mbus->soc->num_wins; win++) { + u64 wbase, wend; + u32 wsize; + u8 wtarget, wattr; + int enabled; + + mvebu_mbus_read_window(mbus, win, + &enabled, &wbase, &wsize, + &wtarget, &wattr, NULL); + + if (!enabled) + continue; + + wend = wbase + wsize; + + /* + * Check if the current window overlaps with the + * proposed physical range + */ + if ((u64)base < wend && end > wbase) + return 0; + + /* + * Check if target/attribute conflicts + */ + if (target == wtarget && attr == wattr) + return 0; + } + + return 1; +} + +static int mvebu_mbus_find_window(struct mvebu_mbus_state *mbus, + phys_addr_t base, size_t size) +{ + int win; + + for (win = 0; win < mbus->soc->num_wins; win++) { + u64 wbase; + u32 wsize; + int enabled; + + mvebu_mbus_read_window(mbus, win, + &enabled, &wbase, &wsize, + NULL, NULL, NULL); + + if (!enabled) + continue; + + if (base == wbase && size == wsize) + return win; + } + + return -ENODEV; +} + +static int mvebu_mbus_setup_window(struct mvebu_mbus_state *mbus, + int win, phys_addr_t base, size_t size, + phys_addr_t remap, u8 target, + u8 attr) +{ + void __iomem *addr = mbus->mbuswins_base + + mbus->soc->win_cfg_offset(win); + u32 ctrl, remap_addr; + + ctrl = ((size - 1) & WIN_CTRL_SIZE_MASK) | + (attr << WIN_CTRL_ATTR_SHIFT) | + (target << WIN_CTRL_TGT_SHIFT) | + WIN_CTRL_ENABLE; + + writel(base & WIN_BASE_LOW, addr + WIN_BASE_OFF); + writel(ctrl, addr + WIN_CTRL_OFF); + if (win < mbus->soc->num_remappable_wins) { + if (remap == MVEBU_MBUS_NO_REMAP) + remap_addr = base; + else + remap_addr = remap; + writel(remap_addr & WIN_REMAP_LOW, addr + WIN_REMAP_LO_OFF); + writel(0, addr + WIN_REMAP_HI_OFF); + } + + return 0; +} + +static int mvebu_mbus_alloc_window(struct mvebu_mbus_state *mbus, + phys_addr_t base, size_t size, + phys_addr_t remap, u8 target, + u8 attr) +{ + int win; + + if (remap == MVEBU_MBUS_NO_REMAP) { + for (win = mbus->soc->num_remappable_wins; + win < mbus->soc->num_wins; win++) + if (mvebu_mbus_window_is_free(mbus, win)) + return mvebu_mbus_setup_window(mbus, win, base, + size, remap, + target, attr); + } + + + for (win = 0; win < mbus->soc->num_wins; win++) + if (mvebu_mbus_window_is_free(mbus, win)) + return mvebu_mbus_setup_window(mbus, win, base, size, + remap, target, attr); + + return -ENOMEM; +} + +/* + * Debugfs debugging + */ + +/* Common function used for Dove, Kirkwood, Armada 370/XP and Orion 5x */ +static int mvebu_sdram_debug_show_orion(struct mvebu_mbus_state *mbus, + struct seq_file *seq, void *v) +{ + int i; + + for (i = 0; i < 4; i++) { + u32 basereg = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i)); + u32 sizereg = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i)); + u64 base; + u32 size; + + if (!(sizereg & DDR_SIZE_ENABLED)) { + seq_printf(seq, "[%d] disabled\n", i); + continue; + } + + base = ((u64)basereg & DDR_BASE_CS_HIGH_MASK) << 32; + base |= basereg & DDR_BASE_CS_LOW_MASK; + size = (sizereg | ~DDR_SIZE_MASK); + + seq_printf(seq, "[%d] %016llx - %016llx : cs%d\n", + i, (unsigned long long)base, + (unsigned long long)base + size + 1, + (sizereg & DDR_SIZE_CS_MASK) >> DDR_SIZE_CS_SHIFT); + } + + return 0; +} + +/* Special function for Dove */ +static int mvebu_sdram_debug_show_dove(struct mvebu_mbus_state *mbus, + struct seq_file *seq, void *v) +{ + int i; + + for (i = 0; i < 2; i++) { + u32 map = readl(mbus->sdramwins_base + DOVE_DDR_BASE_CS_OFF(i)); + u64 base; + u32 size; + + if (!(map & 1)) { + seq_printf(seq, "[%d] disabled\n", i); + continue; + } + + base = map & 0xff800000; + size = 0x100000 << (((map & 0x000f0000) >> 16) - 4); + + seq_printf(seq, "[%d] %016llx - %016llx : cs%d\n", + i, (unsigned long long)base, + (unsigned long long)base + size, i); + } + + return 0; +} + +static int mvebu_sdram_debug_show(struct seq_file *seq, void *v) +{ + struct mvebu_mbus_state *mbus = &mbus_state; + return mbus->soc->show_cpu_target(mbus, seq, v); +} + +static int mvebu_sdram_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mvebu_sdram_debug_show, inode->i_private); +} + +static const struct file_operations mvebu_sdram_debug_fops = { + .open = mvebu_sdram_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int mvebu_devs_debug_show(struct seq_file *seq, void *v) +{ + struct mvebu_mbus_state *mbus = &mbus_state; + int win; + + for (win = 0; win < mbus->soc->num_wins; win++) { + u64 wbase, wremap; + u32 wsize; + u8 wtarget, wattr; + int enabled, i; + const char *name; + + mvebu_mbus_read_window(mbus, win, + &enabled, &wbase, &wsize, + &wtarget, &wattr, &wremap); + + if (!enabled) { + seq_printf(seq, "[%02d] disabled\n", win); + continue; + } + + + for (i = 0; mbus->soc->map[i].name; i++) + if (mbus->soc->map[i].target == wtarget && + mbus->soc->map[i].attr == + (wattr & mbus->soc->map[i].attrmask)) + break; + + name = mbus->soc->map[i].name ?: "unknown"; + + seq_printf(seq, "[%02d] %016llx - %016llx : %s", + win, (unsigned long long)wbase, + (unsigned long long)(wbase + wsize), name); + + if (win < mbus->soc->num_remappable_wins) { + seq_printf(seq, " (remap %016llx)\n", + (unsigned long long)wremap); + } else + seq_printf(seq, "\n"); + } + + return 0; +} + +static int mvebu_devs_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mvebu_devs_debug_show, inode->i_private); +} + +static const struct file_operations mvebu_devs_debug_fops = { + .open = mvebu_devs_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * SoC-specific functions and definitions + */ + +static unsigned int orion_mbus_win_offset(int win) +{ + return win << 4; +} + +static unsigned int armada_370_xp_mbus_win_offset(int win) +{ + /* The register layout is a bit annoying and the below code + * tries to cope with it. + * - At offset 0x0, there are the registers for the first 8 + * windows, with 4 registers of 32 bits per window (ctrl, + * base, remap low, remap high) + * - Then at offset 0x80, there is a hole of 0x10 bytes for + * the internal registers base address and internal units + * sync barrier register. + * - Then at offset 0x90, there the registers for 12 + * windows, with only 2 registers of 32 bits per window + * (ctrl, base). + */ + if (win < 8) + return win << 4; + else + return 0x90 + ((win - 8) << 3); +} + +static unsigned int mv78xx0_mbus_win_offset(int win) +{ + if (win < 8) + return win << 4; + else + return 0x900 + ((win - 8) << 4); +} + +static void __init +mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus) +{ + int i; + int cs; + + mvebu_mbus_dram_info.mbus_dram_target_id = TARGET_DDR; + + for (i = 0, cs = 0; i < 4; i++) { + u32 base = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i)); + u32 size = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i)); + + /* + * We only take care of entries for which the chip + * select is enabled, and that don't have high base + * address bits set (devices can only access the first + * 32 bits of the memory). + */ + if ((size & DDR_SIZE_ENABLED) && + !(base & DDR_BASE_CS_HIGH_MASK)) { + struct mbus_dram_window *w; + + w = &mvebu_mbus_dram_info.cs[cs++]; + w->cs_index = i; + w->mbus_attr = 0xf & ~(1 << i); + if (mbus->hw_io_coherency) + w->mbus_attr |= ATTR_HW_COHERENCY; + w->base = base & DDR_BASE_CS_LOW_MASK; + w->size = (size | ~DDR_SIZE_MASK) + 1; + } + } + mvebu_mbus_dram_info.num_cs = cs; +} + +static void __init +mvebu_mbus_dove_setup_cpu_target(struct mvebu_mbus_state *mbus) +{ + int i; + int cs; + + mvebu_mbus_dram_info.mbus_dram_target_id = TARGET_DDR; + + for (i = 0, cs = 0; i < 2; i++) { + u32 map = readl(mbus->sdramwins_base + DOVE_DDR_BASE_CS_OFF(i)); + + /* + * Chip select enabled? + */ + if (map & 1) { + struct mbus_dram_window *w; + + w = &mvebu_mbus_dram_info.cs[cs++]; + w->cs_index = i; + w->mbus_attr = 0; /* CS address decoding done inside */ + /* the DDR controller, no need to */ + /* provide attributes */ + w->base = map & 0xff800000; + w->size = 0x100000 << (((map & 0x000f0000) >> 16) - 4); + } + } + + mvebu_mbus_dram_info.num_cs = cs; +} + +static const struct mvebu_mbus_mapping armada_370_map[] = { + MAPDEF("bootrom", 1, 0xe0, MAPDEF_NOMASK), + MAPDEF("devbus-boot", 1, 0x2f, MAPDEF_NOMASK), + MAPDEF("devbus-cs0", 1, 0x3e, MAPDEF_NOMASK), + MAPDEF("devbus-cs1", 1, 0x3d, MAPDEF_NOMASK), + MAPDEF("devbus-cs2", 1, 0x3b, MAPDEF_NOMASK), + MAPDEF("devbus-cs3", 1, 0x37, MAPDEF_NOMASK), + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data armada_370_mbus_data = { + .num_wins = 20, + .num_remappable_wins = 8, + .win_cfg_offset = armada_370_xp_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = armada_370_map, +}; + +static const struct mvebu_mbus_mapping armada_xp_map[] = { + MAPDEF("bootrom", 1, 0x1d, MAPDEF_NOMASK), + MAPDEF("devbus-boot", 1, 0x2f, MAPDEF_NOMASK), + MAPDEF("devbus-cs0", 1, 0x3e, MAPDEF_NOMASK), + MAPDEF("devbus-cs1", 1, 0x3d, MAPDEF_NOMASK), + MAPDEF("devbus-cs2", 1, 0x3b, MAPDEF_NOMASK), + MAPDEF("devbus-cs3", 1, 0x37, MAPDEF_NOMASK), + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie0.1", 4, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie0.2", 4, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie0.3", 4, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.1", 8, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie1.2", 8, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie1.3", 8, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie2.0", 4, 0xf0, MAPDEF_PCIMASK), + MAPDEF("pcie3.0", 8, 0xf0, MAPDEF_PCIMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data armada_xp_mbus_data = { + .num_wins = 20, + .num_remappable_wins = 8, + .win_cfg_offset = armada_370_xp_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = armada_xp_map, +}; + +static const struct mvebu_mbus_mapping kirkwood_map[] = { + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("sram", 3, 0x01, MAPDEF_NOMASK), + MAPDEF("nand", 1, 0x2f, MAPDEF_NOMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data kirkwood_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 4, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = kirkwood_map, +}; + +static const struct mvebu_mbus_mapping dove_map[] = { + MAPDEF("pcie0.0", 0x4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 0x8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("cesa", 0x3, 0x01, MAPDEF_NOMASK), + MAPDEF("bootrom", 0x1, 0xfd, MAPDEF_NOMASK), + MAPDEF("scratchpad", 0xd, 0x0, MAPDEF_NOMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data dove_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 4, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_dove_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_dove, + .map = dove_map, +}; + +static const struct mvebu_mbus_mapping orion5x_map[] = { + MAPDEF("pcie0.0", 4, 0x51, MAPDEF_ORIONPCIMASK), + MAPDEF("pci0.0", 3, 0x51, MAPDEF_ORIONPCIMASK), + MAPDEF("devbus-boot", 1, 0x0f, MAPDEF_NOMASK), + MAPDEF("devbus-cs0", 1, 0x1e, MAPDEF_NOMASK), + MAPDEF("devbus-cs1", 1, 0x1d, MAPDEF_NOMASK), + MAPDEF("devbus-cs2", 1, 0x1b, MAPDEF_NOMASK), + MAPDEF("sram", 0, 0x00, MAPDEF_NOMASK), + {}, +}; + +/* + * Some variants of Orion5x have 4 remappable windows, some other have + * only two of them. + */ +static const struct mvebu_mbus_soc_data orion5x_4win_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 4, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = orion5x_map, +}; + +static const struct mvebu_mbus_soc_data orion5x_2win_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 2, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = orion5x_map, +}; + +static const struct mvebu_mbus_mapping mv78xx0_map[] = { + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie0.1", 4, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie0.2", 4, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie0.3", 4, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.1", 8, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie1.2", 8, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie1.3", 8, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie2.0", 4, 0xf0, MAPDEF_PCIMASK), + MAPDEF("pcie3.0", 8, 0xf0, MAPDEF_PCIMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data mv78xx0_mbus_data = { + .num_wins = 14, + .num_remappable_wins = 8, + .win_cfg_offset = mv78xx0_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = mv78xx0_map, +}; + +/* + * The driver doesn't yet have a DT binding because the details of + * this DT binding still need to be sorted out. However, as a + * preparation, we already use of_device_id to match a SoC description + * string against the SoC specific details of this driver. + */ +static const struct of_device_id of_mvebu_mbus_ids[] = { + { .compatible = "marvell,armada370-mbus", + .data = &armada_370_mbus_data, }, + { .compatible = "marvell,armadaxp-mbus", + .data = &armada_xp_mbus_data, }, + { .compatible = "marvell,kirkwood-mbus", + .data = &kirkwood_mbus_data, }, + { .compatible = "marvell,dove-mbus", + .data = &dove_mbus_data, }, + { .compatible = "marvell,orion5x-88f5281-mbus", + .data = &orion5x_4win_mbus_data, }, + { .compatible = "marvell,orion5x-88f5182-mbus", + .data = &orion5x_2win_mbus_data, }, + { .compatible = "marvell,orion5x-88f5181-mbus", + .data = &orion5x_2win_mbus_data, }, + { .compatible = "marvell,orion5x-88f6183-mbus", + .data = &orion5x_4win_mbus_data, }, + { .compatible = "marvell,mv78xx0-mbus", + .data = &mv78xx0_mbus_data, }, + { }, +}; + +/* + * Public API of the driver + */ +int mvebu_mbus_add_window_remap_flags(const char *devname, phys_addr_t base, + size_t size, phys_addr_t remap, + unsigned int flags) +{ + struct mvebu_mbus_state *s = &mbus_state; + u8 target, attr; + int i; + + if (!s->soc->map) + return -ENODEV; + + for (i = 0; s->soc->map[i].name; i++) + if (!strcmp(s->soc->map[i].name, devname)) + break; + + if (!s->soc->map[i].name) { + pr_err("mvebu-mbus: unknown device '%s'\n", devname); + return -ENODEV; + } + + target = s->soc->map[i].target; + attr = s->soc->map[i].attr; + + if (flags == MVEBU_MBUS_PCI_MEM) + attr |= 0x8; + else if (flags == MVEBU_MBUS_PCI_WA) + attr |= 0x28; + + if (!mvebu_mbus_window_conflicts(s, base, size, target, attr)) { + pr_err("mvebu-mbus: cannot add window '%s', conflicts with another window\n", + devname); + return -EINVAL; + } + + return mvebu_mbus_alloc_window(s, base, size, remap, target, attr); + +} + +int mvebu_mbus_add_window(const char *devname, phys_addr_t base, size_t size) +{ + return mvebu_mbus_add_window_remap_flags(devname, base, size, + MVEBU_MBUS_NO_REMAP, 0); +} + +int mvebu_mbus_del_window(phys_addr_t base, size_t size) +{ + int win; + + win = mvebu_mbus_find_window(&mbus_state, base, size); + if (win < 0) + return win; + + mvebu_mbus_disable_window(&mbus_state, win); + return 0; +} + +static __init int mvebu_mbus_debugfs_init(void) +{ + struct mvebu_mbus_state *s = &mbus_state; + + /* + * If no base has been initialized, doesn't make sense to + * register the debugfs entries. We may be on a multiplatform + * kernel that isn't running a Marvell EBU SoC. + */ + if (!s->mbuswins_base) + return 0; + + s->debugfs_root = debugfs_create_dir("mvebu-mbus", NULL); + if (s->debugfs_root) { + s->debugfs_sdram = debugfs_create_file("sdram", S_IRUGO, + s->debugfs_root, NULL, + &mvebu_sdram_debug_fops); + s->debugfs_devs = debugfs_create_file("devices", S_IRUGO, + s->debugfs_root, NULL, + &mvebu_devs_debug_fops); + } + + return 0; +} +fs_initcall(mvebu_mbus_debugfs_init); + +int __init mvebu_mbus_init(const char *soc, phys_addr_t mbuswins_phys_base, + size_t mbuswins_size, + phys_addr_t sdramwins_phys_base, + size_t sdramwins_size) +{ + struct mvebu_mbus_state *mbus = &mbus_state; + const struct of_device_id *of_id; + int win; + + for (of_id = of_mvebu_mbus_ids; of_id->compatible; of_id++) + if (!strcmp(of_id->compatible, soc)) + break; + + if (!of_id->compatible) { + pr_err("mvebu-mbus: could not find a matching SoC family\n"); + return -ENODEV; + } + + mbus->soc = of_id->data; + + mbus->mbuswins_base = ioremap(mbuswins_phys_base, mbuswins_size); + if (!mbus->mbuswins_base) + return -ENOMEM; + + mbus->sdramwins_base = ioremap(sdramwins_phys_base, sdramwins_size); + if (!mbus->sdramwins_base) { + iounmap(mbus_state.mbuswins_base); + return -ENOMEM; + } + + for (win = 0; win < mbus->soc->num_wins; win++) + mvebu_mbus_disable_window(mbus, win); + + mbus->soc->setup_cpu_target(mbus); + + return 0; +} diff --git a/include/linux/mbus.h b/include/linux/mbus.h index efa1a6d7aca8..462eb9791012 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -32,6 +32,17 @@ struct mbus_dram_target_info } cs[4]; }; +/* Flags for PCI/PCIe address decoding regions */ +#define MVEBU_MBUS_PCI_IO 0x1 +#define MVEBU_MBUS_PCI_MEM 0x2 +#define MVEBU_MBUS_PCI_WA 0x3 + +/* + * Magic value that explicits that we don't need a remapping-capable + * address decoding window. + */ +#define MVEBU_MBUS_NO_REMAP (0xffffffff) + /* * The Marvell mbus is to be found only on SOCs from the Orion family * at the moment. Provide a dummy stub for other architectures. @@ -44,4 +55,15 @@ static inline const struct mbus_dram_target_info *mv_mbus_dram_info(void) return NULL; } #endif -#endif + +int mvebu_mbus_add_window_remap_flags(const char *devname, phys_addr_t base, + size_t size, phys_addr_t remap, + unsigned int flags); +int mvebu_mbus_add_window(const char *devname, phys_addr_t base, + size_t size); +int mvebu_mbus_del_window(phys_addr_t base, size_t size); +int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base, + size_t mbus_size, phys_addr_t sdram_phys_base, + size_t sdram_size); + +#endif /* __LINUX_MBUS_H */ -- cgit From e5354107e14755991da82e0d2a4791db92908d9d Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 27 Mar 2013 17:29:53 +0200 Subject: mei: bus: Initial MEI Client bus type implementation mei client bus will present some of the mei clients as devices for other standard subsystems Implement the probe, remove, match, device addtion routines, along with the sysfs and uevent ones. mei_cl_device_id is also added to mod_devicetable.h A mei-cleint-bus.txt document describing the rationale and the API usage is also added while ABI/testing/sysfs-bus-mei describeis the modalias ABI. Signed-off-by: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-mei | 7 + Documentation/misc-devices/mei/mei-client-bus.txt | 135 +++++++++++++++++ drivers/misc/mei/Makefile | 1 + drivers/misc/mei/bus.c | 172 ++++++++++++++++++++++ drivers/misc/mei/mei_dev.h | 26 ++++ include/linux/mei_cl_bus.h | 20 +++ include/linux/mod_devicetable.h | 9 ++ scripts/mod/devicetable-offsets.c | 3 + scripts/mod/file2alias.c | 12 ++ 9 files changed, 385 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-mei create mode 100644 Documentation/misc-devices/mei/mei-client-bus.txt create mode 100644 drivers/misc/mei/bus.c create mode 100644 include/linux/mei_cl_bus.h (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei new file mode 100644 index 000000000000..2066f0bbd453 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-mei @@ -0,0 +1,7 @@ +What: /sys/bus/mei/devices/.../modalias +Date: March 2013 +KernelVersion: 3.10 +Contact: Samuel Ortiz + linux-mei@linux.intel.com +Description: Stores the same MODALIAS value emitted by uevent + Format: mei: diff --git a/Documentation/misc-devices/mei/mei-client-bus.txt b/Documentation/misc-devices/mei/mei-client-bus.txt new file mode 100644 index 000000000000..9dc5ebf94eb1 --- /dev/null +++ b/Documentation/misc-devices/mei/mei-client-bus.txt @@ -0,0 +1,135 @@ +Intel(R) Management Engine (ME) Client bus API +=============================================== + + +Rationale +========= +MEI misc character device is useful for dedicated applications to send and receive +data to the many FW appliance found in Intel's ME from the user space. +However for some of the ME functionalities it make sense to leverage existing software +stack and expose them through existing kernel subsystems. + +In order to plug seamlessly into the kernel device driver model we add kernel virtual +bus abstraction on top of the MEI driver. This allows implementing linux kernel drivers +for the various MEI features as a stand alone entities found in their respective subsystem. +Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to +the existing code. + + +MEI CL bus API +=========== +A driver implementation for an MEI Client is very similar to existing bus +based device drivers. The driver registers itself as an MEI CL bus driver through +the mei_cl_driver structure: + +struct mei_cl_driver { + struct device_driver driver; + const char *name; + + const struct mei_cl_device_id *id_table; + + int (*probe)(struct mei_cl_device *dev, const struct mei_cl_id *id); + int (*remove)(struct mei_cl_device *dev); +}; + +struct mei_cl_id { + char name[MEI_NAME_SIZE]; + kernel_ulong_t driver_info; +}; + +The mei_cl_id structure allows the driver to bind itself against a device name. + +To actually register a driver on the ME Client bus one must call the mei_cl_add_driver() +API. This is typically called at module init time. + +Once registered on the ME Client bus, a driver will typically try to do some I/O on +this bus and this should be done through the mei_cl_send() and mei_cl_recv() +routines. The latter is synchronous (blocks and sleeps until data shows up). +In order for drivers to be notified of pending events waiting for them (e.g. +an Rx event) they can register an event handler through the +mei_cl_register_event_cb() routine. Currently only the MEI_EVENT_RX event +will trigger an event handler call and the driver implementation is supposed +to call mei_recv() from the event handler in order to fetch the pending +received buffers. + + +Example +======= +As a theoretical example let's pretend the ME comes with a "contact" NFC IP. +The driver init and exit routines for this device would look like: + +#define CONTACT_DRIVER_NAME "contact" + +static struct mei_cl_device_id contact_mei_cl_tbl[] = { + { CONTACT_DRIVER_NAME, }, + + /* required last entry */ + { } +}; +MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl); + +static struct mei_cl_driver contact_driver = { + .id_table = contact_mei_tbl, + .name = CONTACT_DRIVER_NAME, + + .probe = contact_probe, + .remove = contact_remove, +}; + +static int contact_init(void) +{ + int r; + + r = mei_cl_driver_register(&contact_driver); + if (r) { + pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n"); + return r; + } + + return 0; +} + +static void __exit contact_exit(void) +{ + mei_cl_driver_unregister(&contact_driver); +} + +module_init(contact_init); +module_exit(contact_exit); + +And the driver's simplified probe routine would look like that: + +int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id) +{ + struct contact_driver *contact; + + [...] + mei_cl_register_event_cb(dev, contact_event_cb, contact); + + return 0; + } + +In the probe routine the driver basically registers an ME bus event handler +which is as close as it can get to registering a threaded IRQ handler. +The handler implementation will typically call some I/O routine depending on +the pending events: + +#define MAX_NFC_PAYLOAD 128 + +static void contact_event_cb(struct mei_cl_device *dev, u32 events, + void *context) +{ + struct contact_driver *contact = context; + + if (events & BIT(MEI_EVENT_RX)) { + u8 payload[MAX_NFC_PAYLOAD]; + int payload_size; + + payload_size = mei_recv(dev, payload, MAX_NFC_PAYLOAD); + if (payload_size <= 0) + return; + + /* Hook to the NFC subsystem */ + nfc_hci_recv_frame(contact->hdev, payload, payload_size); + } +} diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile index 2c336d087749..1b29f7ccac49 100644 --- a/drivers/misc/mei/Makefile +++ b/drivers/misc/mei/Makefile @@ -10,6 +10,7 @@ mei-objs += client.o mei-objs += main.o mei-objs += amthif.o mei-objs += wd.o +mei-objs += bus.o obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o mei-me-objs := pci-me.o diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c new file mode 100644 index 000000000000..78c876af2676 --- /dev/null +++ b/drivers/misc/mei/bus.c @@ -0,0 +1,172 @@ +/* + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2012-2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mei_dev.h" + +#define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver) +#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev) + +static int mei_cl_device_match(struct device *dev, struct device_driver *drv) +{ + struct mei_cl_device *device = to_mei_cl_device(dev); + struct mei_cl_driver *driver = to_mei_cl_driver(drv); + const struct mei_cl_device_id *id; + + if (!device) + return 0; + + if (!driver || !driver->id_table) + return 0; + + id = driver->id_table; + + while (id->name[0]) { + if (!strcmp(dev_name(dev), id->name)) + return 1; + + id++; + } + + return 0; +} + +static int mei_cl_device_probe(struct device *dev) +{ + struct mei_cl_device *device = to_mei_cl_device(dev); + struct mei_cl_driver *driver; + struct mei_cl_device_id id; + + if (!device) + return 0; + + driver = to_mei_cl_driver(dev->driver); + if (!driver || !driver->probe) + return -ENODEV; + + dev_dbg(dev, "Device probe\n"); + + strncpy(id.name, dev_name(dev), MEI_CL_NAME_SIZE); + + return driver->probe(device, &id); +} + +static int mei_cl_device_remove(struct device *dev) +{ + struct mei_cl_device *device = to_mei_cl_device(dev); + struct mei_cl_driver *driver; + + if (!device || !dev->driver) + return 0; + + driver = to_mei_cl_driver(dev->driver); + if (!driver->remove) { + dev->driver = NULL; + + return 0; + } + + return driver->remove(device); +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + int len; + + len = snprintf(buf, PAGE_SIZE, "mei:%s\n", dev_name(dev)); + + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} + +static struct device_attribute mei_cl_dev_attrs[] = { + __ATTR_RO(modalias), + __ATTR_NULL, +}; + +static int mei_cl_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + if (add_uevent_var(env, "MODALIAS=mei:%s", dev_name(dev))) + return -ENOMEM; + + return 0; +} + +static struct bus_type mei_cl_bus_type = { + .name = "mei", + .dev_attrs = mei_cl_dev_attrs, + .match = mei_cl_device_match, + .probe = mei_cl_device_probe, + .remove = mei_cl_device_remove, + .uevent = mei_cl_uevent, +}; + +static void mei_cl_dev_release(struct device *dev) +{ + kfree(to_mei_cl_device(dev)); +} + +static struct device_type mei_cl_device_type = { + .release = mei_cl_dev_release, +}; + +struct mei_cl_device *mei_cl_add_device(struct mei_device *mei_device, + uuid_le uuid, char *name) +{ + struct mei_cl_device *device; + int status; + + device = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL); + if (!device) + return NULL; + + device->dev.parent = &mei_device->pdev->dev; + device->dev.bus = &mei_cl_bus_type; + device->dev.type = &mei_cl_device_type; + + dev_set_name(&device->dev, "%s", name); + + status = device_register(&device->dev); + if (status) + goto out_err; + + dev_dbg(&device->dev, "client %s registered\n", name); + + return device; + +out_err: + dev_err(device->dev.parent, "Failed to register MEI client\n"); + + kfree(device); + + return NULL; +} +EXPORT_SYMBOL_GPL(mei_cl_add_device); + +void mei_cl_remove_device(struct mei_cl_device *device) +{ + device_unregister(&device->dev); +} +EXPORT_SYMBOL_GPL(mei_cl_remove_device); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index b5d66076de3d..7abb705ddf3f 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "hw.h" #include "hw-me-regs.h" @@ -262,6 +263,31 @@ struct mei_hw_ops { unsigned char *buf, unsigned long len); }; +/* MEI bus API*/ +struct mei_cl_device *mei_cl_add_device(struct mei_device *dev, + uuid_le uuid, char *name); +void mei_cl_remove_device(struct mei_cl_device *device); + +/** + * struct mei_cl_device - MEI device handle + * An mei_cl_device pointer is returned from mei_add_device() + * and links MEI bus clients to their actual ME host client pointer. + * Drivers for MEI devices will get an mei_cl_device pointer + * when being probed and shall use it for doing ME bus I/O. + * + * @dev: linux driver model device pointer + * @uuid: me client uuid + * @cl: mei client + * @priv_data: client private data + */ +struct mei_cl_device { + struct device dev; + + struct mei_cl *cl; + + void *priv_data; +}; + /** * struct mei_device - MEI private device struct diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h new file mode 100644 index 000000000000..4e7351de7eca --- /dev/null +++ b/include/linux/mei_cl_bus.h @@ -0,0 +1,20 @@ +#ifndef _LINUX_MEI_CL_BUS_H +#define _LINUX_MEI_CL_BUS_H + +#include +#include + +struct mei_cl_device; + +struct mei_cl_driver { + struct device_driver driver; + const char *name; + + const struct mei_cl_device_id *id_table; + + int (*probe)(struct mei_cl_device *dev, + const struct mei_cl_device_id *id); + int (*remove)(struct mei_cl_device *dev); +}; + +#endif /* _LINUX_MEI_CL_BUS_H */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 779cf7c4a3d1..b508016fb76d 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -9,6 +9,7 @@ #ifdef __KERNEL__ #include +#include typedef unsigned long kernel_ulong_t; #endif @@ -568,4 +569,12 @@ struct ipack_device_id { __u32 device; /* Device ID or IPACK_ANY_ID */ }; +#define MEI_CL_MODULE_PREFIX "mei:" +#define MEI_CL_NAME_SIZE 32 + +struct mei_cl_device_id { + char name[MEI_CL_NAME_SIZE]; + kernel_ulong_t driver_info; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index b45260bfeaa0..e66d4d258e1a 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -174,5 +174,8 @@ int main(void) DEVID_FIELD(x86_cpu_id, model); DEVID_FIELD(x86_cpu_id, vendor); + DEVID(mei_cl_device_id); + DEVID_FIELD(mei_cl_device_id, name); + return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 771ac17f635d..45f9a3377dcd 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1133,6 +1133,18 @@ static int do_x86cpu_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry); +/* Looks like: mei:S */ +static int do_mei_entry(const char *filename, void *symval, + char *alias) +{ + DEF_FIELD_ADDR(symval, mei_cl_device_id, name); + + sprintf(alias, MEI_CL_MODULE_PREFIX "%s", *name); + + return 1; +} +ADD_TO_DEVTABLE("mei", mei_cl_device_id, do_mei_entry); + /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) { -- cgit From 333e4ee0781bd0b5938da263c4bb7ab66a0d1b57 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 27 Mar 2013 17:29:54 +0200 Subject: mei: bus: Implement driver registration Signed-off-by: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 26 ++++++++++++++++++++++++++ include/linux/mei_cl_bus.h | 7 +++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 78c876af2676..d16b3c3e1b38 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -170,3 +170,29 @@ void mei_cl_remove_device(struct mei_cl_device *device) device_unregister(&device->dev); } EXPORT_SYMBOL_GPL(mei_cl_remove_device); + +int __mei_cl_driver_register(struct mei_cl_driver *driver, struct module *owner) +{ + int err; + + driver->driver.name = driver->name; + driver->driver.owner = owner; + driver->driver.bus = &mei_cl_bus_type; + + err = driver_register(&driver->driver); + if (err) + return err; + + pr_debug("mei: driver [%s] registered\n", driver->driver.name); + + return 0; +} +EXPORT_SYMBOL_GPL(__mei_cl_driver_register); + +void mei_cl_driver_unregister(struct mei_cl_driver *driver) +{ + driver_unregister(&driver->driver); + + pr_debug("mei: driver [%s] unregistered\n", driver->driver.name); +} +EXPORT_SYMBOL_GPL(mei_cl_driver_unregister); diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 4e7351de7eca..ba2aa3b66f30 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -17,4 +17,11 @@ struct mei_cl_driver { int (*remove)(struct mei_cl_device *dev); }; +int __mei_cl_driver_register(struct mei_cl_driver *driver, + struct module *owner); +#define mei_cl_driver_register(driver) \ + __mei_cl_driver_register(driver, THIS_MODULE) + +void mei_cl_driver_unregister(struct mei_cl_driver *driver); + #endif /* _LINUX_MEI_CL_BUS_H */ -- cgit From 3e8332952dedd2c17bb497e3909e3b6fbac10ce7 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 27 Mar 2013 17:29:55 +0200 Subject: mei: bus: Initial implementation for I/O routines Signed-off-by: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 226 +++++++++++++++++++++++++++++++++++++++++++++ drivers/misc/mei/mei_dev.h | 30 ++++++ include/linux/mei_cl_bus.h | 11 +++ 3 files changed, 267 insertions(+) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index d16b3c3e1b38..16c7fff50549 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,8 @@ #include #include "mei_dev.h" +#include "hw-me.h" +#include "client.h" #define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver) #define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev) @@ -81,6 +84,11 @@ static int mei_cl_device_remove(struct device *dev) if (!device || !dev->driver) return 0; + if (device->event_cb) { + device->event_cb = NULL; + cancel_work_sync(&device->event_work); + } + driver = to_mei_cl_driver(dev->driver); if (!driver->remove) { dev->driver = NULL; @@ -196,3 +204,221 @@ void mei_cl_driver_unregister(struct mei_cl_driver *driver) pr_debug("mei: driver [%s] unregistered\n", driver->driver.name); } EXPORT_SYMBOL_GPL(mei_cl_driver_unregister); + +int __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length) +{ + struct mei_device *dev; + struct mei_msg_hdr mei_hdr; + struct mei_cl_cb *cb; + int me_cl_id, err; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + if (cl->state != MEI_FILE_CONNECTED) + return -ENODEV; + + cb = mei_io_cb_init(cl, NULL); + if (!cb) + return -ENOMEM; + + err = mei_io_cb_alloc_req_buf(cb, length); + if (err < 0) { + mei_io_cb_free(cb); + return err; + } + + memcpy(cb->request_buffer.data, buf, length); + cb->fop_type = MEI_FOP_WRITE; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + /* Check if we have an ME client device */ + me_cl_id = mei_me_cl_by_id(dev, cl->me_client_id); + if (me_cl_id == dev->me_clients_num) { + err = -ENODEV; + goto out_err; + } + + if (length > dev->me_clients[me_cl_id].props.max_msg_length) { + err = -EINVAL; + goto out_err; + } + + err = mei_cl_flow_ctrl_creds(cl); + if (err < 0) + goto out_err; + + /* Host buffer is not ready, we queue the request */ + if (err == 0 || !dev->hbuf_is_ready) { + cb->buf_idx = 0; + mei_hdr.msg_complete = 0; + cl->writing_state = MEI_WRITING; + list_add_tail(&cb->list, &dev->write_list.list); + + mutex_unlock(&dev->device_lock); + + return length; + } + + dev->hbuf_is_ready = false; + + /* Check for a maximum length */ + if (length > mei_hbuf_max_len(dev)) { + mei_hdr.length = mei_hbuf_max_len(dev); + mei_hdr.msg_complete = 0; + } else { + mei_hdr.length = length; + mei_hdr.msg_complete = 1; + } + + mei_hdr.host_addr = cl->host_client_id; + mei_hdr.me_addr = cl->me_client_id; + mei_hdr.reserved = 0; + + if (mei_write_message(dev, &mei_hdr, buf)) { + err = -EIO; + goto out_err; + } + + cl->writing_state = MEI_WRITING; + cb->buf_idx = mei_hdr.length; + + if (!mei_hdr.msg_complete) { + list_add_tail(&cb->list, &dev->write_list.list); + } else { + if (mei_cl_flow_ctrl_reduce(cl)) { + err = -EIO; + goto out_err; + } + + list_add_tail(&cb->list, &dev->write_waiting_list.list); + } + + mutex_unlock(&dev->device_lock); + + return mei_hdr.length; + +out_err: + mutex_unlock(&dev->device_lock); + mei_io_cb_free(cb); + + return err; +} + +int __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + size_t r_length; + int err; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + if (!cl->read_cb) { + err = mei_cl_read_start(cl); + if (err < 0) { + mutex_unlock(&dev->device_lock); + return err; + } + } + + if (cl->reading_state != MEI_READ_COMPLETE && + !waitqueue_active(&cl->rx_wait)) { + mutex_unlock(&dev->device_lock); + + if (wait_event_interruptible(cl->rx_wait, + (MEI_READ_COMPLETE == cl->reading_state))) { + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + + mutex_lock(&dev->device_lock); + } + + cb = cl->read_cb; + + if (cl->reading_state != MEI_READ_COMPLETE) { + r_length = 0; + goto out; + } + + r_length = min_t(size_t, length, cb->buf_idx); + + memcpy(buf, cb->response_buffer.data, r_length); + + mei_io_cb_free(cb); + cl->reading_state = MEI_IDLE; + cl->read_cb = NULL; + +out: + mutex_unlock(&dev->device_lock); + + return r_length; +} + +int mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length) +{ + struct mei_cl *cl = NULL; + + /* TODO: hook between mei_bus_client and mei_cl */ + + if (device->ops && device->ops->send) + return device->ops->send(device, buf, length); + + return __mei_cl_send(cl, buf, length); +} +EXPORT_SYMBOL_GPL(mei_cl_send); + +int mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length) +{ + struct mei_cl *cl = NULL; + + /* TODO: hook between mei_bus_client and mei_cl */ + + if (device->ops && device->ops->recv) + return device->ops->recv(device, buf, length); + + return __mei_cl_recv(cl, buf, length); +} +EXPORT_SYMBOL_GPL(mei_cl_recv); + +static void mei_bus_event_work(struct work_struct *work) +{ + struct mei_cl_device *device; + + device = container_of(work, struct mei_cl_device, event_work); + + if (device->event_cb) + device->event_cb(device, device->events, device->event_context); + + device->events = 0; + + /* Prepare for the next read */ + mei_cl_read_start(device->cl); +} + +int mei_cl_register_event_cb(struct mei_cl_device *device, + mei_cl_event_cb_t event_cb, void *context) +{ + if (device->event_cb) + return -EALREADY; + + device->events = 0; + device->event_cb = event_cb; + device->event_context = context; + INIT_WORK(&device->event_work, mei_bus_event_work); + + mei_cl_read_start(device->cl); + + return 0; +} +EXPORT_SYMBOL_GPL(mei_cl_register_event_cb); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 7abb705ddf3f..cde5687039f3 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -268,6 +268,25 @@ struct mei_cl_device *mei_cl_add_device(struct mei_device *dev, uuid_le uuid, char *name); void mei_cl_remove_device(struct mei_cl_device *device); +int __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length); +int __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length); + +/** + * struct mei_cl_transport_ops - MEI CL device transport ops + * This structure allows ME host clients to implement technology + * specific transport layers. + * + * @send: Tx hook for the device. This allows ME host clients to trap + * the device driver buffers before actually physically + * pushing it to the ME. + * @recv: Rx hook for the device. This allows ME host clients to trap the + * ME buffers before forwarding them to the device driver. + */ +struct mei_cl_transport_ops { + int (*send)(struct mei_cl_device *device, u8 *buf, size_t length); + int (*recv)(struct mei_cl_device *device, u8 *buf, size_t length); +}; + /** * struct mei_cl_device - MEI device handle * An mei_cl_device pointer is returned from mei_add_device() @@ -278,6 +297,10 @@ void mei_cl_remove_device(struct mei_cl_device *device); * @dev: linux driver model device pointer * @uuid: me client uuid * @cl: mei client + * @ops: ME transport ops + * @event_cb: Drivers register this callback to get asynchronous ME + * events (e.g. Rx buffer pending) notifications. + * @events: Events bitmask sent to the driver. * @priv_data: client private data */ struct mei_cl_device { @@ -285,6 +308,13 @@ struct mei_cl_device { struct mei_cl *cl; + const struct mei_cl_transport_ops *ops; + + struct work_struct event_work; + mei_cl_event_cb_t event_cb; + void *event_context; + unsigned long events; + void *priv_data; }; diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index ba2aa3b66f30..d9958c3960a2 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -24,4 +24,15 @@ int __mei_cl_driver_register(struct mei_cl_driver *driver, void mei_cl_driver_unregister(struct mei_cl_driver *driver); +int mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length); +int mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length); + +typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, + u32 events, void *context); +int mei_cl_register_event_cb(struct mei_cl_device *device, + mei_cl_event_cb_t read_cb, void *context); + +#define MEI_CL_EVENT_RX 0 +#define MEI_CL_EVENT_TX 1 + #endif /* _LINUX_MEI_CL_BUS_H */ -- cgit From aa6aef216f8aea1a00b56aafc29b8745237a9b62 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 27 Mar 2013 17:29:59 +0200 Subject: mei: bus: Implement bus driver data setter/getter MEI drivers should be able to carry their private data around. Signed-off-by: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 12 ++++++++++++ include/linux/mei_cl_bus.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 2b4b5b3f639f..8dbcb1516dc6 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -461,6 +461,18 @@ int mei_cl_register_event_cb(struct mei_cl_device *device, } EXPORT_SYMBOL_GPL(mei_cl_register_event_cb); +void *mei_cl_get_drvdata(const struct mei_cl_device *device) +{ + return dev_get_drvdata(&device->dev); +} +EXPORT_SYMBOL_GPL(mei_cl_get_drvdata); + +void mei_cl_set_drvdata(struct mei_cl_device *device, void *data) +{ + dev_set_drvdata(&device->dev, data); +} +EXPORT_SYMBOL_GPL(mei_cl_set_drvdata); + void mei_cl_bus_rx_event(struct mei_cl *cl) { struct mei_cl_device *device = cl->device; diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index d9958c3960a2..1bece18825ba 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -35,4 +35,7 @@ int mei_cl_register_event_cb(struct mei_cl_device *device, #define MEI_CL_EVENT_RX 0 #define MEI_CL_EVENT_TX 1 +void *mei_cl_get_drvdata(const struct mei_cl_device *device); +void mei_cl_set_drvdata(struct mei_cl_device *device, void *data); + #endif /* _LINUX_MEI_CL_BUS_H */ -- cgit From 6ae07f27ab202069bd567967a0099070eb7f77d5 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 26 Mar 2013 10:35:17 +0100 Subject: driver core: platform_device.h: fix checkpatch errors and warnings Signed-off-by: Fabio Porcedda Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index c082c71f7225..9abf1db6aea6 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -20,12 +20,12 @@ struct mfd_cell; struct platform_device { - const char * name; + const char *name; int id; bool id_auto; struct device dev; u32 num_resources; - struct resource * resource; + struct resource *resource; const struct platform_device_id *id_entry; @@ -47,9 +47,12 @@ extern struct bus_type platform_bus_type; extern struct device platform_bus; extern void arch_setup_pdev_archdata(struct platform_device *); -extern struct resource *platform_get_resource(struct platform_device *, unsigned int, unsigned int); +extern struct resource *platform_get_resource(struct platform_device *, + unsigned int, unsigned int); extern int platform_get_irq(struct platform_device *, unsigned int); -extern struct resource *platform_get_resource_byname(struct platform_device *, unsigned int, const char *); +extern struct resource *platform_get_resource_byname(struct platform_device *, + unsigned int, + const char *); extern int platform_get_irq_byname(struct platform_device *, const char *); extern int platform_add_devices(struct platform_device **, int); @@ -161,7 +164,8 @@ extern struct platform_device *platform_device_alloc(const char *name, int id); extern int platform_device_add_resources(struct platform_device *pdev, const struct resource *res, unsigned int num); -extern int platform_device_add_data(struct platform_device *pdev, const void *data, size_t size); +extern int platform_device_add_data(struct platform_device *pdev, + const void *data, size_t size); extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); @@ -190,7 +194,8 @@ static inline void *platform_get_drvdata(const struct platform_device *pdev) return dev_get_drvdata(&pdev->dev); } -static inline void platform_set_drvdata(struct platform_device *pdev, void *data) +static inline void platform_set_drvdata(struct platform_device *pdev, + void *data) { dev_set_drvdata(&pdev->dev, data); } @@ -222,10 +227,10 @@ static void __exit __platform_driver##_exit(void) \ } \ module_exit(__platform_driver##_exit); -extern struct platform_device *platform_create_bundle(struct platform_driver *driver, - int (*probe)(struct platform_device *), - struct resource *res, unsigned int n_res, - const void *data, size_t size); +extern struct platform_device *platform_create_bundle( + struct platform_driver *driver, int (*probe)(struct platform_device *), + struct resource *res, unsigned int n_res, + const void *data, size_t size); /* early platform driver interface */ struct early_platform_driver { -- cgit From cb06ff102e2d79a82cf780aa5e6947b2e0529ac0 Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Wed, 27 Mar 2013 18:38:11 +0900 Subject: ARM: PL011: Add support for Rx DMA buffer polling. In DMA support, The received data is not pushed to tty until the DMA buffer is filled. But some megabyte rate chips such as BT expect fast response and data should be pushed immediately. In order to fix this issue, We suggest the use of the timer for polling DMA buffer. In our test, no data loss occurred at high-baudrate as compared with interrupt- driven (We tested with 3Mbps). We changes: - We add timer for polling. If we set poll_timer to 10, every 10ms, timer handler checks the residue in the dma buffer and transfer data to the tty. Also, last_residue is updated for the next polling. - poll_timeout is used to prevent the timer's system cost. If poll_timeout is set to 3000 and no data is received in 3 seconds, we inactivate poll timer and driver falls back to interrupt-driven. When data is received again in FIFO and UART irq is occurred, we switch back to DMA mode and start polling. - We use consistent DMA mappings to avoid from the frequent cache operation of the timer function for default. - pl011_dma_rx_chars is modified. the pending size is recalculated because data can be taken by polling. - the polling time is adjusted if dma rx poll is enabled but no rate is specified. Ideal polling interval to push 1 character at every interval is the reciprocal of 'baud rate / 10 line bits per character / 1000 ms per sec'. But It is very aggressive to system. Experimentally, '10000000 / baud' is suitable to receive dozens of characters. the poll rate can be specified statically by dma_rx_poll_rate of the platform data as well. Changes compared to v1: - Use of consistent DMA mappings. - Added dma_rx_poll_rate in platform data to specify the polling interval. - Added dma_rx_poll_timeout in platform data to specify the polling timeout. Changes compared to v2: - Use of consistent DMA mappings for default. - Added dma_rx_poll_enable in platform data to adjust the polling time according to the baud rate. - remove unnecessary lock from the polling function. Signed-off-by: Chanho Min Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 157 +++++++++++++++++++++++++++++++++++----- include/linux/amba/serial.h | 3 + 2 files changed, 141 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 3ea5408fcbeb..b031abf43a7a 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -29,6 +29,7 @@ * and hooked into this driver. */ + #if defined(CONFIG_SERIAL_AMBA_PL011_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) #define SUPPORT_SYSRQ #endif @@ -117,6 +118,12 @@ struct pl011_dmarx_data { struct pl011_sgbuf sgbuf_b; dma_cookie_t cookie; bool running; + struct timer_list timer; + unsigned int last_residue; + unsigned long last_jiffies; + bool auto_poll_rate; + unsigned int poll_rate; + unsigned int poll_timeout; }; struct pl011_dmatx_data { @@ -223,16 +230,18 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg, enum dma_data_direction dir) { - sg->buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL); + dma_addr_t dma_addr; + + sg->buf = dma_alloc_coherent(chan->device->dev, + PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL); if (!sg->buf) return -ENOMEM; - sg_init_one(&sg->sg, sg->buf, PL011_DMA_BUFFER_SIZE); + sg_init_table(&sg->sg, 1); + sg_set_page(&sg->sg, phys_to_page(dma_addr), + PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr)); + sg_dma_address(&sg->sg) = dma_addr; - if (dma_map_sg(chan->device->dev, &sg->sg, 1, dir) != 1) { - kfree(sg->buf); - return -EINVAL; - } return 0; } @@ -240,8 +249,9 @@ static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg, enum dma_data_direction dir) { if (sg->buf) { - dma_unmap_sg(chan->device->dev, &sg->sg, 1, dir); - kfree(sg->buf); + dma_free_coherent(chan->device->dev, + PL011_DMA_BUFFER_SIZE, sg->buf, + sg_dma_address(&sg->sg)); } } @@ -300,6 +310,29 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap) dmaengine_slave_config(chan, &rx_conf); uap->dmarx.chan = chan; + if (plat->dma_rx_poll_enable) { + /* Set poll rate if specified. */ + if (plat->dma_rx_poll_rate) { + uap->dmarx.auto_poll_rate = false; + uap->dmarx.poll_rate = plat->dma_rx_poll_rate; + } else { + /* + * 100 ms defaults to poll rate if not + * specified. This will be adjusted with + * the baud rate at set_termios. + */ + uap->dmarx.auto_poll_rate = true; + uap->dmarx.poll_rate = 100; + } + /* 3 secs defaults poll_timeout if not specified. */ + if (plat->dma_rx_poll_timeout) + uap->dmarx.poll_timeout = + plat->dma_rx_poll_timeout; + else + uap->dmarx.poll_timeout = 3000; + } else + uap->dmarx.auto_poll_rate = false; + dev_info(uap->port.dev, "DMA channel RX %s\n", dma_chan_name(uap->dmarx.chan)); } @@ -701,24 +734,30 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, struct tty_port *port = &uap->port.state->port; struct pl011_sgbuf *sgbuf = use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; - struct device *dev = uap->dmarx.chan->device->dev; int dma_count = 0; u32 fifotaken = 0; /* only used for vdbg() */ - /* Pick everything from the DMA first */ + struct pl011_dmarx_data *dmarx = &uap->dmarx; + int dmataken = 0; + + if (uap->dmarx.poll_rate) { + /* The data can be taken by polling */ + dmataken = sgbuf->sg.length - dmarx->last_residue; + /* Recalculate the pending size */ + if (pending >= dmataken) + pending -= dmataken; + } + + /* Pick the remain data from the DMA */ if (pending) { - /* Sync in buffer */ - dma_sync_sg_for_cpu(dev, &sgbuf->sg, 1, DMA_FROM_DEVICE); /* * First take all chars in the DMA pipe, then look in the FIFO. * Note that tty_insert_flip_buf() tries to take as many chars * as it can. */ - dma_count = tty_insert_flip_string(port, sgbuf->buf, pending); - - /* Return buffer to device */ - dma_sync_sg_for_device(dev, &sgbuf->sg, 1, DMA_FROM_DEVICE); + dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + pending); uap->port.icount.rx += dma_count; if (dma_count < pending) @@ -726,6 +765,10 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, "couldn't insert all characters (TTY is full?)\n"); } + /* Reset the last_residue for Rx DMA poll */ + if (uap->dmarx.poll_rate) + dmarx->last_residue = sgbuf->sg.length; + /* * Only continue with trying to read the FIFO if all DMA chars have * been taken first. @@ -865,6 +908,57 @@ static inline void pl011_dma_rx_stop(struct uart_amba_port *uap) writew(uap->dmacr, uap->port.membase + UART011_DMACR); } +/* + * Timer handler for Rx DMA polling. + * Every polling, It checks the residue in the dma buffer and transfer + * data to the tty. Also, last_residue is updated for the next polling. + */ +static void pl011_dma_rx_poll(unsigned long args) +{ + struct uart_amba_port *uap = (struct uart_amba_port *)args; + struct tty_port *port = &uap->port.state->port; + struct pl011_dmarx_data *dmarx = &uap->dmarx; + struct dma_chan *rxchan = uap->dmarx.chan; + unsigned long flags = 0; + unsigned int dmataken = 0; + unsigned int size = 0; + struct pl011_sgbuf *sgbuf; + int dma_count; + struct dma_tx_state state; + + sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); + if (likely(state.residue < dmarx->last_residue)) { + dmataken = sgbuf->sg.length - dmarx->last_residue; + size = dmarx->last_residue - state.residue; + dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + size); + if (dma_count == size) + dmarx->last_residue = state.residue; + dmarx->last_jiffies = jiffies; + } + tty_flip_buffer_push(port); + + /* + * If no data is received in poll_timeout, the driver will fall back + * to interrupt mode. We will retrigger DMA at the first interrupt. + */ + if (jiffies_to_msecs(jiffies - dmarx->last_jiffies) + > uap->dmarx.poll_timeout) { + + spin_lock_irqsave(&uap->port.lock, flags); + pl011_dma_rx_stop(uap); + spin_unlock_irqrestore(&uap->port.lock, flags); + + uap->dmarx.running = false; + dmaengine_terminate_all(rxchan); + del_timer(&uap->dmarx.timer); + } else { + mod_timer(&uap->dmarx.timer, + jiffies + msecs_to_jiffies(uap->dmarx.poll_rate)); + } +} + static void pl011_dma_startup(struct uart_amba_port *uap) { int ret; @@ -927,6 +1021,16 @@ skip_rx: if (pl011_dma_rx_trigger_dma(uap)) dev_dbg(uap->port.dev, "could not trigger initial " "RX DMA job, fall back to interrupt mode\n"); + if (uap->dmarx.poll_rate) { + init_timer(&(uap->dmarx.timer)); + uap->dmarx.timer.function = pl011_dma_rx_poll; + uap->dmarx.timer.data = (unsigned long)uap; + mod_timer(&uap->dmarx.timer, + jiffies + + msecs_to_jiffies(uap->dmarx.poll_rate)); + uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE; + uap->dmarx.last_jiffies = jiffies; + } } } @@ -962,6 +1066,8 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) /* Clean up the RX DMA */ pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE); pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE); + if (uap->dmarx.poll_rate) + del_timer_sync(&uap->dmarx.timer); uap->using_rx_dma = false; } } @@ -976,7 +1082,6 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) return uap->using_rx_dma && uap->dmarx.running; } - #else /* Blank functions if the DMA engine is not available */ static inline void pl011_dma_probe(struct uart_amba_port *uap) @@ -1088,8 +1193,18 @@ static void pl011_rx_chars(struct uart_amba_port *uap) dev_dbg(uap->port.dev, "could not trigger RX DMA job " "fall back to interrupt mode again\n"); uap->im |= UART011_RXIM; - } else + } else { uap->im &= ~UART011_RXIM; + /* Start Rx DMA poll */ + if (uap->dmarx.poll_rate) { + uap->dmarx.last_jiffies = jiffies; + uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE; + mod_timer(&uap->dmarx.timer, + jiffies + + msecs_to_jiffies(uap->dmarx.poll_rate)); + } + } + writew(uap->im, uap->port.membase + UART011_IMSC); } spin_lock(&uap->port.lock); @@ -1164,7 +1279,6 @@ static irqreturn_t pl011_int(int irq, void *dev_id) unsigned int dummy_read; spin_lock_irqsave(&uap->port.lock, flags); - status = readw(uap->port.membase + UART011_MIS); if (status) { do { @@ -1551,6 +1665,11 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, */ baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / clkdiv); + /* + * Adjust RX DMA polling rate with baud rate if not specified. + */ + if (uap->dmarx.auto_poll_rate) + uap->dmarx.poll_rate = DIV_ROUND_UP(10000000, baud); if (baud > port->uartclk/16) quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud); diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index f612c783170f..62d9303c2837 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -203,6 +203,9 @@ struct amba_pl011_data { bool (*dma_filter)(struct dma_chan *chan, void *filter_param); void *dma_rx_param; void *dma_tx_param; + bool dma_rx_poll_enable; + unsigned int dma_rx_poll_rate; + unsigned int dma_rx_poll_timeout; void (*init) (void); void (*exit) (void); }; -- cgit From d1a820011b2fbc11d5af80d1a961fe66c613fa4b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 28 Mar 2013 16:11:01 +0000 Subject: regulator: ab8500-ext: New driver to control external regulators The ABx500 is capable of controlling three external regulator supplies. Most commonly on and off are supported, but if an external regulator chipset or power supply supports high-power and low-power mode settings, we can control those too. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- drivers/regulator/Makefile | 2 +- drivers/regulator/ab8500-ext.c | 394 +++++++++++++++++++++++++++++++++++++++ drivers/regulator/ab8500.c | 12 +- include/linux/regulator/ab8500.h | 28 +++ 4 files changed, 434 insertions(+), 2 deletions(-) create mode 100644 drivers/regulator/ab8500-ext.c (limited to 'include/linux') diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 6e8250382def..47a34ff88f98 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o obj-$(CONFIG_REGULATOR_88PM8607) += 88pm8607.o obj-$(CONFIG_REGULATOR_AAT2870) += aat2870-regulator.o obj-$(CONFIG_REGULATOR_AB3100) += ab3100.o -obj-$(CONFIG_REGULATOR_AB8500) += ab8500.o +obj-$(CONFIG_REGULATOR_AB8500) += ab8500.o ab8500-ext.o obj-$(CONFIG_REGULATOR_AD5398) += ad5398.o obj-$(CONFIG_REGULATOR_ANATOP) += anatop-regulator.o obj-$(CONFIG_REGULATOR_ARIZONA) += arizona-micsupp.o arizona-ldo1.o diff --git a/drivers/regulator/ab8500-ext.c b/drivers/regulator/ab8500-ext.c new file mode 100644 index 000000000000..95008dec5190 --- /dev/null +++ b/drivers/regulator/ab8500-ext.c @@ -0,0 +1,394 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * + * License Terms: GNU General Public License v2 + * + * Authors: Bengt Jonsson + * + * This file is based on drivers/regulator/ab8500.c + * + * AB8500 external regulators + * + * ab8500-ext supports the following regulators: + * - VextSupply3 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * struct ab8500_ext_regulator_info - ab8500 regulator information + * @dev: device pointer + * @desc: regulator description + * @rdev: regulator device + * @is_enabled: status of regulator (on/off) + * @update_bank: bank to control on/off + * @update_reg: register to control on/off + * @update_mask: mask to enable/disable and set mode of regulator + * @update_val: bits holding the regulator current mode + * @update_val_en: bits to set EN pin active (LPn pin deactive) + * normally this means high power mode + * @update_val_en_lp: bits to set EN pin active and LPn pin active + * normally this means low power mode + * @delay: startup delay in ms + */ +struct ab8500_ext_regulator_info { + struct device *dev; + struct regulator_desc desc; + struct regulator_dev *rdev; + bool is_enabled; + u8 update_bank; + u8 update_reg; + u8 update_mask; + u8 update_val; + u8 update_val_en; + u8 update_val_en_lp; +}; + +static int ab8500_ext_regulator_enable(struct regulator_dev *rdev) +{ + int ret; + struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); + + if (info == NULL) { + dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); + return -EINVAL; + } + + ret = abx500_mask_and_set_register_interruptible(info->dev, + info->update_bank, info->update_reg, + info->update_mask, info->update_val); + if (ret < 0) + dev_err(rdev_get_dev(info->rdev), + "couldn't set enable bits for regulator\n"); + + info->is_enabled = true; + + dev_dbg(rdev_get_dev(rdev), "%s-enable (bank, reg, mask, value):" + " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", + info->desc.name, info->update_bank, info->update_reg, + info->update_mask, info->update_val); + + return ret; +} + +static int ab8500_ext_regulator_disable(struct regulator_dev *rdev) +{ + int ret; + struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); + + if (info == NULL) { + dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); + return -EINVAL; + } + + ret = abx500_mask_and_set_register_interruptible(info->dev, + info->update_bank, info->update_reg, + info->update_mask, 0x0); + if (ret < 0) + dev_err(rdev_get_dev(info->rdev), + "couldn't set disable bits for regulator\n"); + + info->is_enabled = false; + + dev_dbg(rdev_get_dev(rdev), "%s-disable (bank, reg, mask, value):" + " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", + info->desc.name, info->update_bank, info->update_reg, + info->update_mask, 0x0); + + return ret; +} + +static int ab8500_ext_regulator_is_enabled(struct regulator_dev *rdev) +{ + int ret; + struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); + u8 regval; + + if (info == NULL) { + dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); + return -EINVAL; + } + + ret = abx500_get_register_interruptible(info->dev, + info->update_bank, info->update_reg, ®val); + if (ret < 0) { + dev_err(rdev_get_dev(rdev), + "couldn't read 0x%x register\n", info->update_reg); + return ret; + } + + dev_dbg(rdev_get_dev(rdev), "%s-is_enabled (bank, reg, mask, value):" + " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", + info->desc.name, info->update_bank, info->update_reg, + info->update_mask, regval); + + if (regval & info->update_mask) + info->is_enabled = true; + else + info->is_enabled = false; + + return info->is_enabled; +} + +static int ab8500_ext_regulator_set_mode(struct regulator_dev *rdev, + unsigned int mode) +{ + int ret = 0; + struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); + + if (info == NULL) { + dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); + return -EINVAL; + } + + switch (mode) { + case REGULATOR_MODE_NORMAL: + info->update_val = info->update_val_hp; + break; + case REGULATOR_MODE_IDLE: + info->update_val = info->update_val_lp; + break; + + default: + return -EINVAL; + } + + if (info->is_enabled) { + u8 regval; + + ret = enable(info, ®val); + if (ret < 0) + dev_err(rdev_get_dev(rdev), + "Could not set regulator mode.\n"); + + dev_dbg(rdev_get_dev(rdev), + "%s-set_mode (bank, reg, mask, value): " + "0x%x, 0x%x, 0x%x, 0x%x\n", + info->desc.name, info->update_bank, info->update_reg, + info->update_mask, regval); + } + + return ret; +} + +static unsigned int ab8500_ext_regulator_get_mode(struct regulator_dev *rdev) +{ + struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); + int ret; + + if (info == NULL) { + dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); + return -EINVAL; + } + + if (info->update_val == info->update_val_hp) + ret = REGULATOR_MODE_NORMAL; + else if (info->update_val == info->update_val_lp) + ret = REGULATOR_MODE_IDLE; + else + ret = -EINVAL; + + return ret; +} + +static int ab8500_ext_fixed_get_voltage(struct regulator_dev *rdev) +{ + struct regulation_constraints *regu_constraints = rdev->constraints; + + if (regu_constraints == NULL) { + dev_err(rdev_get_dev(rdev), "regulator constraints null pointer\n"); + return -EINVAL; + } + if (regu_constraints->min_uV && regu_constraints->max_uV) { + if (regu_constraints->min_uV == regu_constraints->max_uV) + return regu_constraints->min_uV; + } + return -EINVAL; +} + +static int ab8500_ext_list_voltage(struct regulator_dev *rdev, + unsigned selector) +{ + struct regulation_constraints *regu_constraints = rdev->constraints; + + if (regu_constraints == NULL) { + dev_err(rdev_get_dev(rdev), "regulator constraints null pointer\n"); + return -EINVAL; + } + /* return the uV for the fixed regulators */ + if (regu_constraints->min_uV && regu_constraints->max_uV) { + if (regu_constraints->min_uV == regu_constraints->max_uV) + return regu_constraints->min_uV; + } + return -EINVAL; +} + +static struct regulator_ops ab8500_ext_regulator_ops = { + .enable = ab8500_ext_regulator_enable, + .disable = ab8500_ext_regulator_disable, + .is_enabled = ab8500_ext_regulator_is_enabled, + .set_mode = ab8500_ext_regulator_set_mode, + .get_mode = ab8500_ext_regulator_get_mode, + .get_voltage = ab8500_ext_fixed_get_voltage, + .list_voltage = ab8500_ext_list_voltage, +}; + + +static struct ab8500_ext_regulator_info + ab8500_ext_regulator_info[AB8500_NUM_EXT_REGULATORS] = { + [AB8500_EXT_SUPPLY1] = { + .desc = { + .name = "VEXTSUPPLY1", + .ops = &ab8500_ext_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_EXT_SUPPLY1, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .update_bank = 0x04, + .update_reg = 0x08, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_hp = 0x01, + .update_val_lp = 0x03, + .update_val_hw = 0x02, + }, + [AB8500_EXT_SUPPLY2] = { + .desc = { + .name = "VEXTSUPPLY2", + .ops = &ab8500_ext_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_EXT_SUPPLY2, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .update_bank = 0x04, + .update_reg = 0x08, + .update_mask = 0x0c, + .update_val = 0x04, + .update_val_hp = 0x04, + .update_val_lp = 0x0c, + .update_val_hw = 0x08, + }, + [AB8500_EXT_SUPPLY3] = { + .desc = { + .name = "VEXTSUPPLY3", + .ops = &ab8500_ext_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_EXT_SUPPLY3, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .update_bank = 0x04, + .update_reg = 0x08, + .update_mask = 0x30, + .update_val = 0x10, + .update_val_en = 0x10, + .update_val_en_lp = 0x30, + }, +}; + +int ab8500_ext_regulator_init(struct platform_device *pdev) +{ + struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent); + struct ab8500_platform_data *ppdata; + struct ab8500_regulator_platform_data *pdata; + struct regulator_config config = { }; + int i, err; + + if (!ab8500) { + dev_err(&pdev->dev, "null mfd parent\n"); + return -EINVAL; + } + ppdata = dev_get_platdata(ab8500->dev); + if (!ppdata) { + dev_err(&pdev->dev, "null parent pdata\n"); + return -EINVAL; + } + + pdata = ppdata->regulator; + if (!pdata) { + dev_err(&pdev->dev, "null pdata\n"); + return -EINVAL; + } + + /* make sure the platform data has the correct size */ + if (pdata->num_ext_regulator != ARRAY_SIZE(ab8500_ext_regulator_info)) { + dev_err(&pdev->dev, "Configuration error: size mismatch.\n"); + return -EINVAL; + } + + /* check for AB8500 2.x */ + if (abx500_get_chip_id(&pdev->dev) < 0x30) { + struct ab8500_ext_regulator_info *info; + + /* VextSupply3LPn is inverted on AB8500 2.x */ + info = &ab8500_ext_regulator_info[AB8500_EXT_SUPPLY3]; + info->update_val = 0x30; + info->update_val_en = 0x30; + info->update_val_en_lp = 0x10; + } + + /* register all regulators */ + for (i = 0; i < ARRAY_SIZE(ab8500_ext_regulator_info); i++) { + struct ab8500_ext_regulator_info *info = NULL; + + /* assign per-regulator data */ + info = &ab8500_ext_regulator_info[i]; + info->dev = &pdev->dev; + + config.dev = &pdev->dev; + config.init_data = &pdata->ext_regulator[i]; + config.driver_data = info; + + /* register regulator with framework */ + info->rdev = regulator_register(&info->desc, &config); + + if (IS_ERR(info->rdev)) { + err = PTR_ERR(info->rdev); + dev_err(&pdev->dev, "failed to register regulator %s\n", + info->desc.name); + /* when we fail, un-register all earlier regulators */ + while (--i >= 0) { + info = &ab8500_ext_regulator_info[i]; + regulator_unregister(info->rdev); + } + return err; + } + + dev_dbg(rdev_get_dev(info->rdev), + "%s-probed\n", info->desc.name); + } + + return 0; +} + +int ab8500_ext_regulator_exit(struct platform_device *pdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ab8500_ext_regulator_info); i++) { + struct ab8500_ext_regulator_info *info = NULL; + info = &ab8500_ext_regulator_info[i]; + + dev_vdbg(rdev_get_dev(info->rdev), + "%s-remove\n", info->desc.name); + + regulator_unregister(info->rdev); + } + + return 0; +} + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Bengt Jonsson "); +MODULE_DESCRIPTION("AB8500 external regulator driver"); +MODULE_ALIAS("platform:ab8500-ext-regulator"); diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 3a1896655557..49746884923d 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -947,6 +947,11 @@ static int ab8500_regulator_probe(struct platform_device *pdev) return err; } + /* register external regulators (before Vaux1, 2 and 3) */ + err = ab8500_ext_regulator_init(pdev); + if (err) + return err; + /* register all regulators */ for (i = 0; i < ARRAY_SIZE(ab8500_regulator_info); i++) { err = ab8500_regulator_register(pdev, &pdata->regulator[i], i, NULL); @@ -959,7 +964,7 @@ static int ab8500_regulator_probe(struct platform_device *pdev) static int ab8500_regulator_remove(struct platform_device *pdev) { - int i; + int i, err; for (i = 0; i < ARRAY_SIZE(ab8500_regulator_info); i++) { struct ab8500_regulator_info *info = NULL; @@ -971,6 +976,11 @@ static int ab8500_regulator_remove(struct platform_device *pdev) regulator_unregister(info->regulator); } + /* remove external regulators (after Vaux1, 2 and 3) */ + err = ab8500_ext_regulator_exit(pdev); + if (err) + return err; + return 0; } diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 26792ff360be..4e92e5b879a5 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -10,6 +10,8 @@ #ifndef __LINUX_MFD_AB8500_REGULATOR_H #define __LINUX_MFD_AB8500_REGULATOR_H +#include + /* AB8500 regulators */ enum ab8500_regulator_id { AB8500_LDO_AUX1, @@ -140,11 +142,37 @@ enum ab9540_regulator_reg { AB9540_NUM_REGULATOR_REGISTERS, }; +/* AB8500 external regulators */ +enum ab8500_ext_regulator_id { + AB8500_EXT_SUPPLY1, + AB8500_EXT_SUPPLY2, + AB8500_EXT_SUPPLY3, + AB8500_NUM_EXT_REGULATORS, +}; + +/* AB8500 regulator platform data */ struct ab8500_regulator_platform_data { int num_reg_init; struct ab8500_regulator_reg_init *reg_init; int num_regulator; struct regulator_init_data *regulator; + int num_ext_regulator; + struct regulator_init_data *ext_regulator; }; +/* AB8500 external regulator functions (internal) */ +#ifdef CONFIG_REGULATOR_AB8500_EXT +int ab8500_ext_regulator_init(struct platform_device *pdev); +int ab8500_ext_regulator_exit(struct platform_device *pdev); +#else +inline int ab8500_ext_regulator_init(struct platform_device *pdev) +{ + return 0; +} +inline int ab8500_ext_regulator_exit(struct platform_device *pdev) +{ + return 0; +} +#endif + #endif -- cgit From 18bc2b39307b45527efc6c84836953c7a8f2181e Mon Sep 17 00:00:00 2001 From: Bengt Jonsson Date: Thu, 28 Mar 2013 16:11:06 +0000 Subject: regulator: ab8500-ext: Add HW request support Support for HW request is added in the external regulator driver. A flag in the board configuration can be set to let HW control the regulator when there is no SW request. This means that the regulator will be put in high power mode when there is a SW request and in HW-request mode otherwise. Signed-off-by: Bengt Jonsson Signed-off-by: Lee Jones Reviewed-by: Mattias NILSSON Reviewed-by: Jonas ABERG Signed-off-by: Mark Brown --- drivers/regulator/ab8500-ext.c | 98 +++++++++++++++++++++++++++++----------- include/linux/regulator/ab8500.h | 4 ++ 2 files changed, 75 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/ab8500-ext.c b/drivers/regulator/ab8500-ext.c index 95008dec5190..21b9bfb0fc5e 100644 --- a/drivers/regulator/ab8500-ext.c +++ b/drivers/regulator/ab8500-ext.c @@ -28,80 +28,121 @@ * @dev: device pointer * @desc: regulator description * @rdev: regulator device + * @cfg: regulator configuration (extension of regulator FW configuration) * @is_enabled: status of regulator (on/off) * @update_bank: bank to control on/off * @update_reg: register to control on/off * @update_mask: mask to enable/disable and set mode of regulator * @update_val: bits holding the regulator current mode - * @update_val_en: bits to set EN pin active (LPn pin deactive) + * @update_val_hp: bits to set EN pin active (LPn pin deactive) * normally this means high power mode - * @update_val_en_lp: bits to set EN pin active and LPn pin active - * normally this means low power mode - * @delay: startup delay in ms + * @update_val_lp: bits to set EN pin active and LPn pin active + * normally this means low power mode + * @update_val_hw: bits to set regulator pins in HW control + * SysClkReq pins and logic will choose mode */ struct ab8500_ext_regulator_info { struct device *dev; struct regulator_desc desc; struct regulator_dev *rdev; + struct ab8500_ext_regulator_cfg *cfg; bool is_enabled; u8 update_bank; u8 update_reg; u8 update_mask; u8 update_val; - u8 update_val_en; - u8 update_val_en_lp; + u8 update_val_hp; + u8 update_val_lp; + u8 update_val_hw; }; -static int ab8500_ext_regulator_enable(struct regulator_dev *rdev) +static int enable(struct ab8500_ext_regulator_info *info, u8 *regval) { int ret; - struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); - if (info == NULL) { - dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); - return -EINVAL; - } + *regval = info->update_val; + + /* + * To satisfy both HW high power request and SW request, the regulator + * must be on in high power. + */ + if (info->cfg && info->cfg->hwreq) + *regval = info->update_val_hp; ret = abx500_mask_and_set_register_interruptible(info->dev, info->update_bank, info->update_reg, - info->update_mask, info->update_val); + info->update_mask, *regval); if (ret < 0) dev_err(rdev_get_dev(info->rdev), "couldn't set enable bits for regulator\n"); info->is_enabled = true; - dev_dbg(rdev_get_dev(rdev), "%s-enable (bank, reg, mask, value):" - " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", - info->desc.name, info->update_bank, info->update_reg, - info->update_mask, info->update_val); - return ret; } -static int ab8500_ext_regulator_disable(struct regulator_dev *rdev) +static int ab8500_ext_regulator_enable(struct regulator_dev *rdev) { int ret; struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); + u8 regval; if (info == NULL) { dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); return -EINVAL; } + ret = enable(info, ®val); + + dev_dbg(rdev_get_dev(rdev), "%s-enable (bank, reg, mask, value):" + " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", + info->desc.name, info->update_bank, info->update_reg, + info->update_mask, regval); + + return ret; +} + +static int disable(struct ab8500_ext_regulator_info *info, u8 *regval) +{ + int ret; + + *regval = 0x0; + + /* + * Set the regulator in HW request mode if configured + */ + if (info->cfg && info->cfg->hwreq) + *regval = info->update_val_hw; + ret = abx500_mask_and_set_register_interruptible(info->dev, info->update_bank, info->update_reg, - info->update_mask, 0x0); + info->update_mask, *regval); if (ret < 0) dev_err(rdev_get_dev(info->rdev), "couldn't set disable bits for regulator\n"); info->is_enabled = false; + return ret; +} + +static int ab8500_ext_regulator_disable(struct regulator_dev *rdev) +{ + int ret; + struct ab8500_ext_regulator_info *info = rdev_get_drvdata(rdev); + u8 regval; + + if (info == NULL) { + dev_err(rdev_get_dev(rdev), "regulator info null pointer\n"); + return -EINVAL; + } + + ret = disable(info, ®val); + dev_dbg(rdev_get_dev(rdev), "%s-disable (bank, reg, mask, value):" " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", info->desc.name, info->update_bank, info->update_reg, - info->update_mask, 0x0); + info->update_mask, regval); return ret; } @@ -130,7 +171,8 @@ static int ab8500_ext_regulator_is_enabled(struct regulator_dev *rdev) info->desc.name, info->update_bank, info->update_reg, info->update_mask, regval); - if (regval & info->update_mask) + if (((regval & info->update_mask) == info->update_val_lp) || + ((regval & info->update_mask) == info->update_val_hp)) info->is_enabled = true; else info->is_enabled = false; @@ -241,7 +283,6 @@ static struct regulator_ops ab8500_ext_regulator_ops = { .list_voltage = ab8500_ext_list_voltage, }; - static struct ab8500_ext_regulator_info ab8500_ext_regulator_info[AB8500_NUM_EXT_REGULATORS] = { [AB8500_EXT_SUPPLY1] = { @@ -291,8 +332,9 @@ static struct ab8500_ext_regulator_info .update_reg = 0x08, .update_mask = 0x30, .update_val = 0x10, - .update_val_en = 0x10, - .update_val_en_lp = 0x30, + .update_val_hp = 0x10, + .update_val_lp = 0x30, + .update_val_hw = 0x20, }, }; @@ -333,8 +375,8 @@ int ab8500_ext_regulator_init(struct platform_device *pdev) /* VextSupply3LPn is inverted on AB8500 2.x */ info = &ab8500_ext_regulator_info[AB8500_EXT_SUPPLY3]; info->update_val = 0x30; - info->update_val_en = 0x30; - info->update_val_en_lp = 0x10; + info->update_val_hp = 0x30; + info->update_val_lp = 0x10; } /* register all regulators */ @@ -344,6 +386,8 @@ int ab8500_ext_regulator_init(struct platform_device *pdev) /* assign per-regulator data */ info = &ab8500_ext_regulator_info[i]; info->dev = &pdev->dev; + info->cfg = (struct ab8500_ext_regulator_cfg *) + pdata->ext_regulator[i].driver_data; config.dev = &pdev->dev; config.init_data = &pdata->ext_regulator[i]; diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 4e92e5b879a5..cf496e93c0cf 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -143,6 +143,10 @@ enum ab9540_regulator_reg { }; /* AB8500 external regulators */ +struct ab8500_ext_regulator_cfg { + bool hwreq; /* requires hw mode or high power mode */ +}; + enum ab8500_ext_regulator_id { AB8500_EXT_SUPPLY1, AB8500_EXT_SUPPLY2, -- cgit From 41a06aa738ad889cf96f56024ddf84ecf4a18a6f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 28 Mar 2013 16:11:08 +0000 Subject: regulator: ab8500: Remove USB regulator The USB regulator is controlled by hardware. The software support was only needed for early hardware (ED) which is no longer supported. Signed-off-by: Bengt Jonsson Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- drivers/regulator/ab8500.c | 20 -------------------- include/linux/regulator/ab8500.h | 1 - 2 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 49746884923d..4d88a604efd1 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -488,25 +488,6 @@ static struct ab8500_regulator_info .update_val_idle = 0x82, .update_val_normal = 0x02, }, - - /* - * Regulators with fixed voltage and normal mode - */ - [AB8500_LDO_USB] = { - .desc = { - .name = "LDO-USB", - .ops = &ab8500_regulator_ops, - .type = REGULATOR_VOLTAGE, - .id = AB8500_LDO_USB, - .owner = THIS_MODULE, - .n_voltages = 1, - .min_uV = 3300000, - .enable_time = 150, - }, - .update_bank = 0x03, - .update_reg = 0x82, - .update_mask = 0x03, - }, [AB8500_LDO_AUDIO] = { .desc = { .name = "LDO-AUDIO", @@ -862,7 +843,6 @@ static struct of_regulator_match ab8500_regulator_matches[] = { { .name = "ab8500_ldo_aux3", .driver_data = (void *) AB8500_LDO_AUX3, }, { .name = "ab8500_ldo_intcore", .driver_data = (void *) AB8500_LDO_INTCORE, }, { .name = "ab8500_ldo_tvout", .driver_data = (void *) AB8500_LDO_TVOUT, }, - { .name = "ab8500_ldo_usb", .driver_data = (void *) AB8500_LDO_USB, }, { .name = "ab8500_ldo_audio", .driver_data = (void *) AB8500_LDO_AUDIO, }, { .name = "ab8500_ldo_anamic1", .driver_data = (void *) AB8500_LDO_ANAMIC1, }, { .name = "ab8500_ldo_amamic2", .driver_data = (void *) AB8500_LDO_ANAMIC2, }, diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index cf496e93c0cf..b86e089195ea 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -19,7 +19,6 @@ enum ab8500_regulator_id { AB8500_LDO_AUX3, AB8500_LDO_INTCORE, AB8500_LDO_TVOUT, - AB8500_LDO_USB, AB8500_LDO_AUDIO, AB8500_LDO_ANAMIC1, AB8500_LDO_ANAMIC2, -- cgit From da0b0c47dcfd92317e2ece4c3434e1f82b55cf8a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 28 Mar 2013 16:11:09 +0000 Subject: regulator: ab8500: Init debug from regulator driver The purpose of this patch is to guarantee that ab8500-debug will record the regulator registers before they are modified by the ab8500 regulator driver. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- drivers/regulator/ab8500.c | 10 ++++++++++ include/linux/regulator/ab8500.h | 14 ++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 4d88a604efd1..bf34c4cd6631 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -911,6 +911,11 @@ static int ab8500_regulator_probe(struct platform_device *pdev) return -EINVAL; } + /* initialize debug (initial state is recorded with this call) */ + err = ab8500_regulator_debug_init(pdev); + if (err) + return err; + /* initialize registers */ for (i = 0; i < pdata->num_reg_init; i++) { int id, mask, value; @@ -961,6 +966,11 @@ static int ab8500_regulator_remove(struct platform_device *pdev) if (err) return err; + /* remove regulator debug */ + err = ab8500_regulator_debug_exit(pdev); + if (err) + return err; + return 0; } diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index b86e089195ea..592a3f3994c0 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -178,4 +178,18 @@ inline int ab8500_ext_regulator_exit(struct platform_device *pdev) } #endif +#ifdef CONFIG_REGULATOR_AB8500_DEBUG +int ab8500_regulator_debug_init(struct platform_device *pdev); +int ab8500_regulator_debug_exit(struct platform_device *pdev); +#else +static inline int ab8500_regulator_debug_init(struct platform_device *pdev) +{ + return 0; +} +static inline int ab8500_regulator_debug_exit(struct platform_device *pdev) +{ + return 0; +} +#endif + #endif -- cgit From a3f109bd793dfe5c611220ca5ab6c72f1aed479e Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 28 Mar 2013 11:51:31 +0000 Subject: sh_eth: add R-Car support for real Commit d0418bb7123f44b23d69ac349eec7daf9103472f (net: sh_eth: Add eth support for R8A7779 device) was a failed attempt to add support for one of members of the R-Car SoC family. That's for three reasons: it treated R8A7779 the same as SH7724 except including quite dirty hack adding ECMR_ELB bit to the mask in sh_eth_set_rate() while not removing ECMR_RTM bit (despite it's reserved in R-Car Ether), and it didn't add a new register offset array despite the closest SH_ETH_REG_FAST_SH4 mapping differs by 0x200 to the offsets all the R-Car Ether registers have, and also some of the registers in this old mapping don't exist on R-Car Ether (due to this, SH7724's 'sh_eth_my_cpu_data' structure is not adequeate for R-Car too). Fix all these shortcomings, restoring the SH7724 related section to its pristine state... Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 107 +++++++++++++++++++++++++++++++--- include/linux/sh_eth.h | 1 + 2 files changed, 100 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 13abe917cbdf..da604059b148 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2,7 +2,8 @@ * SuperH Ethernet device driver * * Copyright (C) 2006-2012 Nobuhiro Iwamatsu - * Copyright (C) 2008-2012 Renesas Solutions Corp. + * Copyright (C) 2008-2013 Renesas Solutions Corp. + * Copyright (C) 2013 Cogent Embedded, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -147,6 +148,51 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { [FWALCR1] = 0x00b4, }; +static const u16 sh_eth_offset_fast_rcar[SH_ETH_MAX_REGISTER_OFFSET] = { + [ECMR] = 0x0300, + [RFLR] = 0x0308, + [ECSR] = 0x0310, + [ECSIPR] = 0x0318, + [PIR] = 0x0320, + [PSR] = 0x0328, + [RDMLR] = 0x0340, + [IPGR] = 0x0350, + [APR] = 0x0354, + [MPR] = 0x0358, + [RFCF] = 0x0360, + [TPAUSER] = 0x0364, + [TPAUSECR] = 0x0368, + [MAHR] = 0x03c0, + [MALR] = 0x03c8, + [TROCR] = 0x03d0, + [CDCR] = 0x03d4, + [LCCR] = 0x03d8, + [CNDCR] = 0x03dc, + [CEFCR] = 0x03e4, + [FRECR] = 0x03e8, + [TSFRCR] = 0x03ec, + [TLFRCR] = 0x03f0, + [RFCR] = 0x03f4, + [MAFCR] = 0x03f8, + + [EDMR] = 0x0200, + [EDTRR] = 0x0208, + [EDRRR] = 0x0210, + [TDLAR] = 0x0218, + [RDLAR] = 0x0220, + [EESR] = 0x0228, + [EESIPR] = 0x0230, + [TRSCER] = 0x0238, + [RMFCR] = 0x0240, + [TFTR] = 0x0248, + [FDR] = 0x0250, + [RMCR] = 0x0258, + [TFUCR] = 0x0264, + [RFOCR] = 0x0268, + [FCFTR] = 0x0270, + [TRIMD] = 0x027c, +}; + static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = { [ECMR] = 0x0100, [RFLR] = 0x0108, @@ -296,7 +342,7 @@ static void sh_eth_select_mii(struct net_device *ndev) #endif /* There is CPU dependent code */ -#if defined(CONFIG_CPU_SUBTYPE_SH7724) || defined(CONFIG_ARCH_R8A7779) +#if defined(CONFIG_ARCH_R8A7779) #define SH_ETH_RESET_DEFAULT 1 static void sh_eth_set_duplex(struct net_device *ndev) { @@ -311,18 +357,60 @@ static void sh_eth_set_duplex(struct net_device *ndev) static void sh_eth_set_rate(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned int bits = ECMR_RTM; -#if defined(CONFIG_ARCH_R8A7779) - bits |= ECMR_ELB; -#endif + switch (mdp->speed) { + case 10: /* 10BASE */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_ELB, ECMR); + break; + case 100:/* 100BASE */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_ELB, ECMR); + break; + default: + break; + } +} + +/* R8A7779 */ +static struct sh_eth_cpu_data sh_eth_my_cpu_data = { + .set_duplex = sh_eth_set_duplex, + .set_rate = sh_eth_set_rate, + + .ecsr_value = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD, + .ecsipr_value = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP, + .eesipr_value = 0x01ff009f, + + .tx_check = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO, + .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RDE | + EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI, + .tx_error_check = EESR_TWB | EESR_TABT | EESR_TDE | EESR_TFE, + + .apr = 1, + .mpr = 1, + .tpauser = 1, + .hw_swap = 1, +}; +#elif defined(CONFIG_CPU_SUBTYPE_SH7724) +#define SH_ETH_RESET_DEFAULT 1 +static void sh_eth_set_duplex(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + if (mdp->duplex) /* Full */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_DM, ECMR); + else /* Half */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR); +} + +static void sh_eth_set_rate(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); switch (mdp->speed) { case 10: /* 10BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~bits, ECMR); + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_RTM, ECMR); break; case 100:/* 100BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | bits, ECMR); + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_RTM, ECMR); break; default: break; @@ -2521,6 +2609,9 @@ static const u16 *sh_eth_get_register_offset(int register_type) case SH_ETH_REG_GIGABIT: reg_offset = sh_eth_offset_gigabit; break; + case SH_ETH_REG_FAST_RCAR: + reg_offset = sh_eth_offset_fast_rcar; + break; case SH_ETH_REG_FAST_SH4: reg_offset = sh_eth_offset_fast_sh4; break; diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index b17d765ded84..fc305713fc6d 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -6,6 +6,7 @@ enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN}; enum { SH_ETH_REG_GIGABIT, + SH_ETH_REG_FAST_RCAR, SH_ETH_REG_FAST_SH4, SH_ETH_REG_FAST_SH3_SH2 }; -- cgit From 72f4dc117b57e05120aaac6e218b8abc09a5c350 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:25 -0400 Subject: NFS: Remove unneeded forward declaration I've built with NFSv4 enabled and disabled. This forward declaration does not seem to be required. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 90a4aa190b43..c1ca1f3f4935 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -14,9 +14,6 @@ #define NFS_DEF_FILE_IO_SIZE (4096U) #define NFS_MIN_FILE_IO_SIZE (1024U) -/* Forward declaration for NFS v3 */ -struct nfs4_secinfo_flavors; - struct nfs4_string { unsigned int len; char *data; -- cgit From fb15b26f8ba3ff629a052faf3f4a4744585ca2dc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:34 -0400 Subject: SUNRPC: Define rpcsec_gss_info structure The NFSv4 SECINFO procedure returns a list of security flavors. Any GSS flavor also has a GSS tuple containing an OID, a quality-of- protection value, and a service value, which specifies a particular GSS pseudoflavor. For simplicity and efficiency, I'd like to return each GSS tuple from the NFSv4 SECINFO XDR decoder and pass it straight into the RPC client. Define a data structure that is visible to both the NFS client and the RPC client. Take structure and field names from the relevant standards to avoid confusion. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 12 ++++++------ fs/nfs/nfs4xdr.c | 21 ++++++++++++--------- include/linux/nfs_xdr.h | 21 +++++---------------- include/linux/sunrpc/gss_api.h | 14 ++++++++++++-- net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +- 5 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 0dd766079e1c..88231c92317c 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -138,23 +138,23 @@ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { struct gss_api_mech *mech; struct xdr_netobj oid; - int i; + unsigned int i; rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; for (i = 0; i < flavors->num_flavors; i++) { - struct nfs4_secinfo_flavor *flavor; - flavor = &flavors->flavors[i]; + struct nfs4_secinfo4 *flavor = &flavors->flavors[i]; if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { pseudoflavor = flavor->flavor; break; } else if (flavor->flavor == RPC_AUTH_GSS) { - oid.len = flavor->gss.sec_oid4.len; - oid.data = flavor->gss.sec_oid4.data; + oid.len = flavor->flavor_info.oid.len; + oid.data = flavor->flavor_info.oid.data; mech = gss_mech_get_by_OID(&oid); if (!mech) continue; - pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); + pseudoflavor = gss_svc_to_pseudoflavor(mech, + flavor->flavor_info.service); gss_mech_put(mech); break; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b744895b9e1..a38fd179c34f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5205,27 +5205,30 @@ static int decode_delegreturn(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_DELEGRETURN); } -static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor) +static int decode_secinfo_gss(struct xdr_stream *xdr, + struct nfs4_secinfo4 *flavor) { + u32 oid_len; __be32 *p; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; - flavor->gss.sec_oid4.len = be32_to_cpup(p); - if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN) + oid_len = be32_to_cpup(p); + if (oid_len > GSS_OID_MAX_LEN) goto out_err; - p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len); + p = xdr_inline_decode(xdr, oid_len); if (unlikely(!p)) goto out_overflow; - memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len); + memcpy(flavor->flavor_info.oid.data, p, oid_len); + flavor->flavor_info.oid.len = oid_len; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; - flavor->gss.qop4 = be32_to_cpup(p++); - flavor->gss.service = be32_to_cpup(p); + flavor->flavor_info.qop = be32_to_cpup(p++); + flavor->flavor_info.service = be32_to_cpup(p); return 0; @@ -5238,10 +5241,10 @@ out_err: static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) { - struct nfs4_secinfo_flavor *sec_flavor; + struct nfs4_secinfo4 *sec_flavor; + unsigned int i, num_flavors; int status; __be32 *p; - int i, num_flavors; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c1ca1f3f4935..b759467741eb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1049,25 +1049,14 @@ struct nfs4_fs_locations_res { struct nfs4_fs_locations *fs_locations; }; -struct nfs4_secinfo_oid { - unsigned int len; - char data[GSS_OID_MAX_LEN]; -}; - -struct nfs4_secinfo_gss { - struct nfs4_secinfo_oid sec_oid4; - unsigned int qop4; - unsigned int service; -}; - -struct nfs4_secinfo_flavor { - unsigned int flavor; - struct nfs4_secinfo_gss gss; +struct nfs4_secinfo4 { + u32 flavor; + struct rpcsec_gss_info flavor_info; }; struct nfs4_secinfo_flavors { - unsigned int num_flavors; - struct nfs4_secinfo_flavor flavors[0]; + unsigned int num_flavors; + struct nfs4_secinfo4 flavors[0]; }; struct nfs4_secinfo_arg { diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index a19e2547ae6a..98950e5a8877 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -25,10 +25,20 @@ struct gss_ctx { #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) #define GSS_C_NO_CONTEXT ((struct gss_ctx *) 0) -#define GSS_C_NULL_OID ((struct xdr_netobj) 0) /*XXX arbitrary length - is this set somewhere? */ #define GSS_OID_MAX_LEN 32 +struct rpcsec_gss_oid { + unsigned int len; + u8 data[GSS_OID_MAX_LEN]; +}; + +/* From RFC 3530 */ +struct rpcsec_gss_info { + struct rpcsec_gss_oid oid; + u32 qop; + u32 service; +}; /* gss-api prototypes; note that these are somewhat simplified versions of * the prototypes specified in RFC 2744. */ @@ -76,7 +86,7 @@ struct pf_desc { struct gss_api_mech { struct list_head gm_list; struct module *gm_owner; - struct xdr_netobj gm_oid; + struct rpcsec_gss_oid gm_oid; char *gm_name; const struct gss_api_ops *gm_ops; /* pseudoflavors supported by this mechanism: */ diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index d3611f11a8df..61d36ce3b366 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -754,7 +754,7 @@ MODULE_ALIAS("rpc-auth-gss-390005"); static struct gss_api_mech gss_kerberos_mech = { .gm_name = "krb5", .gm_owner = THIS_MODULE, - .gm_oid = {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"}, + .gm_oid = { 9, "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" }, .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, -- cgit From 9568c5e9a61de49f67f524404a27a1014a8d7f1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:43 -0400 Subject: SUNRPC: Introduce rpcauth_get_pseudoflavor() A SECINFO reply may contain flavors whose kernel module is not yet loaded by the client's kernel. A new RPC client API, called rpcauth_get_pseudoflavor(), is introduced to do proper checking for support of a security flavor. When this API is invoked, the RPC client now tries to load the module for each flavor first before performing the "is this supported?" check. This means if a module is available on the client, but has not been loaded yet, it will be loaded and registered automatically when the SECINFO reply is processed. The new API can take a full GSS tuple (OID, QoP, and service). Previously only the OID and service were considered. nfs_find_best_sec() is updated to verify all flavors requested in a SECINFO reply, including AUTH_NULL and AUTH_UNIX. Previously these two flavors were simply assumed to be supported without consulting the RPC client. Note that the replaced version of nfs_find_best_sec() can return RPC_AUTH_MAXFLAVOR if the server returns a recognized OID but an unsupported "service" value. nfs_find_best_sec() now returns RPC_AUTH_UNIX in this case. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 41 ++++++++++++++++++++--------------- include/linux/sunrpc/auth.h | 5 +++++ include/linux/sunrpc/gss_api.h | 5 ++--- net/sunrpc/auth.c | 35 ++++++++++++++++++++++++++++++ net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/auth_gss/gss_mech_switch.c | 28 +++++++++++++++++++----- 6 files changed, 89 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 88231c92317c..cdb0b41a4810 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -134,33 +134,38 @@ static size_t nfs_parse_server_name(char *string, size_t len, return ret; } +/** + * nfs_find_best_sec - Find a security mechanism supported locally + * @flavors: List of security tuples returned by SECINFO procedure + * + * Return the pseudoflavor of the first security mechanism in + * "flavors" that is locally supported. Return RPC_AUTH_UNIX if + * no matching flavor is found in the array. The "flavors" array + * is searched in the order returned from the server, per RFC 3530 + * recommendation. + */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { - struct gss_api_mech *mech; - struct xdr_netobj oid; + rpc_authflavor_t pseudoflavor; + struct nfs4_secinfo4 *secinfo; unsigned int i; - rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; for (i = 0; i < flavors->num_flavors; i++) { - struct nfs4_secinfo4 *flavor = &flavors->flavors[i]; - - if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { - pseudoflavor = flavor->flavor; - break; - } else if (flavor->flavor == RPC_AUTH_GSS) { - oid.len = flavor->flavor_info.oid.len; - oid.data = flavor->flavor_info.oid.data; - mech = gss_mech_get_by_OID(&oid); - if (!mech) - continue; - pseudoflavor = gss_svc_to_pseudoflavor(mech, - flavor->flavor_info.service); - gss_mech_put(mech); + secinfo = &flavors->flavors[i]; + + switch (secinfo->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + case RPC_AUTH_GSS: + pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor, + &secinfo->flavor_info); + if (pseudoflavor != RPC_AUTH_MAXFLAVOR) + return pseudoflavor; break; } } - return pseudoflavor; + return RPC_AUTH_UNIX; } static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 58fda1c3c783..6851da4cb416 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -22,6 +22,8 @@ /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 +struct rpcsec_gss_info; + /* Work around the lack of a VFS credential */ struct auth_cred { kuid_t uid; @@ -103,6 +105,7 @@ struct rpc_authops { int (*pipes_create)(struct rpc_auth *); void (*pipes_destroy)(struct rpc_auth *); int (*list_pseudoflavors)(rpc_authflavor_t *, int); + rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); }; struct rpc_credops { @@ -137,6 +140,8 @@ int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); +rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, + struct rpcsec_gss_info *); int rpcauth_list_flavors(rpc_authflavor_t *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 98950e5a8877..aba7687ca884 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -127,9 +127,8 @@ struct gss_api_ops { int gss_mech_register(struct gss_api_mech *); void gss_mech_unregister(struct gss_api_mech *); -/* returns a mechanism descriptor given an OID, and increments the mechanism's - * reference count. */ -struct gss_api_mech * gss_mech_get_by_OID(struct xdr_netobj *); +/* Given a GSS security tuple, look up a pseudoflavor */ +rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *); /* Returns a reference to a mechanism, given a name like "krb5" etc. */ struct gss_api_mech *gss_mech_get_by_name(const char *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f5294047df77..9b81be8d9946 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -123,6 +123,41 @@ rpcauth_unregister(const struct rpc_authops *ops) } EXPORT_SYMBOL_GPL(rpcauth_unregister); +/** + * rpcauth_get_pseudoflavor - check if security flavor is supported + * @flavor: a security flavor + * @info: a GSS mech OID, quality of protection, and service value + * + * Verifies that an appropriate kernel module is available or already loaded. + * Returns an equivalent pseudoflavor, or RPC_AUTH_MAXFLAVOR if "flavor" is + * not supported locally. + */ +rpc_authflavor_t +rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info) +{ + const struct rpc_authops *ops; + rpc_authflavor_t pseudoflavor; + + ops = auth_flavors[flavor]; + if (ops == NULL) + request_module("rpc-auth-%u", flavor); + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); + return RPC_AUTH_MAXFLAVOR; + } + spin_unlock(&rpc_authflavor_lock); + + pseudoflavor = flavor; + if (ops->info2flavor != NULL) + pseudoflavor = ops->info2flavor(info); + + module_put(ops->owner); + return pseudoflavor; +} +EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor); + /** * rpcauth_list_flavors - discover registered flavors and pseudoflavors * @array: array to fill in diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 282dfb14db05..a7420076ef39 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1641,6 +1641,7 @@ static const struct rpc_authops authgss_ops = { .pipes_create = gss_pipes_dentries_create, .pipes_destroy = gss_pipes_dentries_destroy, .list_pseudoflavors = gss_mech_list_pseudoflavors, + .info2flavor = gss_mech_info2flavor, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index f0f4eee63a35..4db66f5f490e 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -171,8 +171,7 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name) } EXPORT_SYMBOL_GPL(gss_mech_get_by_name); -struct gss_api_mech * -gss_mech_get_by_OID(struct xdr_netobj *obj) +static struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) { struct gss_api_mech *pos, *gm = NULL; @@ -188,11 +187,8 @@ gss_mech_get_by_OID(struct xdr_netobj *obj) } spin_unlock(®istered_mechs_lock); return gm; - } -EXPORT_SYMBOL_GPL(gss_mech_get_by_OID); - static inline int mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) { @@ -282,6 +278,28 @@ gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) } EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor); +/** + * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple + * @info: a GSS mech OID, quality of protection, and service value + * + * Returns a matching pseudoflavor, or RPC_AUTH_MAXFLAVOR if the tuple is + * not supported. + */ +rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info) +{ + rpc_authflavor_t pseudoflavor; + struct gss_api_mech *gm; + + gm = gss_mech_get_by_OID(&info->oid); + if (gm == NULL) + return RPC_AUTH_MAXFLAVOR; + + pseudoflavor = gss_svc_to_pseudoflavor(gm, info->service); + + gss_mech_put(gm); + return pseudoflavor; +} + u32 gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) { -- cgit From 83523d083a045a2069e5f3443d2e4f810a6e6d9a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:01 -0400 Subject: SUNRPC: Consider qop when looking up pseudoflavors The NFSv4 SECINFO operation returns a list of security flavors that the server supports for a particular share. An NFSv4 client is supposed to pick a pseudoflavor it supports that corresponds to one of the flavors returned by the server. GSS flavors in this list have a GSS tuple that identify a specific GSS pseudoflavor. Currently our client ignores the GSS tuple's "qop" value. A matching pseudoflavor is chosen based only on the OID and service value. So far this omission has not had much effect on Linux. The NFSv4 protocol currently supports only one qop value: GSS_C_QOP_DEFAULT, also known as zero. However, if an NFSv4 server happens to return something other than zero in the qop field, our client won't notice. This could cause the client to behave in incorrect ways that could have security implications. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 5 ++++- net/sunrpc/auth_gss/gss_krb5_mech.c | 3 +++ net/sunrpc/auth_gss/gss_mech_switch.c | 20 ++++++++++++++------ net/sunrpc/auth_gss/svcauth_gss.c | 4 +++- 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index aba7687ca884..96e5a81a54d7 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -25,6 +25,7 @@ struct gss_ctx { #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) #define GSS_C_NO_CONTEXT ((struct gss_ctx *) 0) +#define GSS_C_QOP_DEFAULT (0) /*XXX arbitrary length - is this set somewhere? */ #define GSS_OID_MAX_LEN 32 @@ -68,12 +69,14 @@ u32 gss_unwrap( u32 gss_delete_sec_context( struct gss_ctx **ctx_id); -u32 gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 service); +rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 qop, + u32 service); u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); struct pf_desc { u32 pseudoflavor; + u32 qop; u32 service; char *name; char *auth_domain_name; diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index b822ec5cdc58..33255ff889c0 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -729,16 +729,19 @@ static const struct gss_api_ops gss_kerberos_ops = { static struct pf_desc gss_kerberos_pfs[] = { [0] = { .pseudoflavor = RPC_AUTH_GSS_KRB5, + .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_NONE, .name = "krb5", }, [1] = { .pseudoflavor = RPC_AUTH_GSS_KRB5I, + .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_INTEGRITY, .name = "krb5i", }, [2] = { .pseudoflavor = RPC_AUTH_GSS_KRB5P, + .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_PRIVACY, .name = "krb5p", }, diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 92a72404e6d5..81fb6f3e2424 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -271,19 +271,27 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) return i; } -u32 -gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) +/** + * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor + * @gm: GSS mechanism handle + * @qop: GSS quality-of-protection value + * @service: GSS service value + * + * Returns a matching security flavor, or RPC_AUTH_MAXFLAVOR if none is found. + */ +rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 qop, + u32 service) { int i; for (i = 0; i < gm->gm_pf_num; i++) { - if (gm->gm_pfs[i].service == service) { + if (gm->gm_pfs[i].qop == qop && + gm->gm_pfs[i].service == service) { return gm->gm_pfs[i].pseudoflavor; } } - return RPC_AUTH_MAXFLAVOR; /* illegal value */ + return RPC_AUTH_MAXFLAVOR; } -EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor); /** * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple @@ -301,7 +309,7 @@ rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info) if (gm == NULL) return RPC_AUTH_MAXFLAVOR; - pseudoflavor = gss_svc_to_pseudoflavor(gm, info->service); + pseudoflavor = gss_svc_to_pseudoflavor(gm, info->qop, info->service); gss_mech_put(gm); return pseudoflavor; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index f7d34e7b6f81..74f6d30f5ded 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1216,7 +1216,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) svcdata->rsci = rsci; cache_get(&rsci->h); rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor( - rsci->mechctx->mech_type, gc->gc_svc); + rsci->mechctx->mech_type, + GSS_C_QOP_DEFAULT, + gc->gc_svc); ret = SVC_OK; goto out; } -- cgit From a77c806fb9d097bb7733b64207cf52fc2c6438bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:10 -0400 Subject: SUNRPC: Refactor nfsd4_do_encode_secinfo() Clean up. This matches a similar API for the client side, and keeps ULP fingers out the of the GSS mech switch. Signed-off-by: Chuck Lever Acked-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4xdr.c | 24 +++++++++++------------- include/linux/sunrpc/auth.h | 4 ++++ include/linux/sunrpc/gss_api.h | 3 +++ net/sunrpc/auth.c | 35 +++++++++++++++++++++++++++++++++++ net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/auth_gss/gss_mech_switch.c | 35 +++++++++++++++++++++++++++++++++-- 6 files changed, 87 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 01168865dd37..2a2745615b42 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3138,10 +3138,9 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, - __be32 nfserr,struct svc_export *exp) + __be32 nfserr, struct svc_export *exp) { - int i = 0; - u32 nflavs; + u32 i, nflavs; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; __be32 *p; @@ -3172,30 +3171,29 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, WRITE32(nflavs); ADJUST_ARGS(); for (i = 0; i < nflavs; i++) { - u32 flav = flavs[i].pseudoflavor; - struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); + struct rpcsec_gss_info info; - if (gm) { + if (rpcauth_get_gssinfo(flavs[i].pseudoflavor, &info) == 0) { RESERVE_SPACE(4); WRITE32(RPC_AUTH_GSS); ADJUST_ARGS(); - RESERVE_SPACE(4 + gm->gm_oid.len); - WRITE32(gm->gm_oid.len); - WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); + RESERVE_SPACE(4 + info.oid.len); + WRITE32(info.oid.len); + WRITEMEM(info.oid.data, info.oid.len); ADJUST_ARGS(); RESERVE_SPACE(4); - WRITE32(0); /* qop */ + WRITE32(info.qop); ADJUST_ARGS(); RESERVE_SPACE(4); - WRITE32(gss_pseudoflavor_to_service(gm, flav)); + WRITE32(info.service); ADJUST_ARGS(); - gss_mech_put(gm); } else { RESERVE_SPACE(4); - WRITE32(flav); + WRITE32(flavs[i].pseudoflavor); ADJUST_ARGS(); } } + out: if (exp) exp_put(exp); diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 6851da4cb416..0dd00f4f6810 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -106,6 +106,8 @@ struct rpc_authops { void (*pipes_destroy)(struct rpc_auth *); int (*list_pseudoflavors)(rpc_authflavor_t *, int); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); + int (*flavor2info)(rpc_authflavor_t, + struct rpcsec_gss_info *); }; struct rpc_credops { @@ -142,6 +144,8 @@ struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, struct rpcsec_gss_info *); +int rpcauth_get_gssinfo(rpc_authflavor_t, + struct rpcsec_gss_info *); int rpcauth_list_flavors(rpc_authflavor_t *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 96e5a81a54d7..fca23380e667 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -133,6 +133,9 @@ void gss_mech_unregister(struct gss_api_mech *); /* Given a GSS security tuple, look up a pseudoflavor */ rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *); +/* Given a pseudoflavor, look up a GSS security tuple */ +int gss_mech_flavor2info(rpc_authflavor_t, struct rpcsec_gss_info *); + /* Returns a reference to a mechanism, given a name like "krb5" etc. */ struct gss_api_mech *gss_mech_get_by_name(const char *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 9b81be8d9946..2bc0cc2196e0 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -158,6 +158,41 @@ rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info) } EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor); +/** + * rpcauth_get_gssinfo - find GSS tuple matching a GSS pseudoflavor + * @pseudoflavor: GSS pseudoflavor to match + * @info: rpcsec_gss_info structure to fill in + * + * Returns zero and fills in "info" if pseudoflavor matches a + * supported mechanism. + */ +int +rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info) +{ + rpc_authflavor_t flavor = pseudoflavor_to_flavor(pseudoflavor); + const struct rpc_authops *ops; + int result; + + ops = auth_flavors[flavor]; + if (ops == NULL) + request_module("rpc-auth-%u", flavor); + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); + return -ENOENT; + } + spin_unlock(&rpc_authflavor_lock); + + result = -ENOENT; + if (ops->flavor2info != NULL) + result = ops->flavor2info(pseudoflavor, info); + + module_put(ops->owner); + return result; +} +EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo); + /** * rpcauth_list_flavors - discover registered flavors and pseudoflavors * @array: array to fill in diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index a7420076ef39..51415b07174e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1642,6 +1642,7 @@ static const struct rpc_authops authgss_ops = { .pipes_destroy = gss_pipes_dentries_destroy, .list_pseudoflavors = gss_mech_list_pseudoflavors, .info2flavor = gss_mech_info2flavor, + .flavor2info = gss_mech_flavor2info, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 81fb6f3e2424..deaa7ae81cdf 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -240,8 +240,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) return gm; } -EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); - /** * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors * @array: array to fill in @@ -315,6 +313,39 @@ rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info) return pseudoflavor; } +/** + * gss_mech_flavor2info - look up a GSS tuple for a given pseudoflavor + * @pseudoflavor: GSS pseudoflavor to match + * @info: rpcsec_gss_info structure to fill in + * + * Returns zero and fills in "info" if pseudoflavor matches a + * supported mechanism. Otherwise a negative errno is returned. + */ +int gss_mech_flavor2info(rpc_authflavor_t pseudoflavor, + struct rpcsec_gss_info *info) +{ + struct gss_api_mech *gm; + int i; + + gm = gss_mech_get_by_pseudoflavor(pseudoflavor); + if (gm == NULL) + return -ENOENT; + + for (i = 0; i < gm->gm_pf_num; i++) { + if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) { + memcpy(info->oid.data, gm->gm_oid.data, gm->gm_oid.len); + info->oid.len = gm->gm_oid.len; + info->qop = gm->gm_pfs[i].qop; + info->service = gm->gm_pfs[i].service; + gss_mech_put(gm); + return 0; + } + } + + gss_mech_put(gm); + return -ENOENT; +} + u32 gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) { -- cgit From 6599c0acae10e929b5315821c1d064cd13fe7648 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:19 -0400 Subject: SUNRPC: Make gss_mech_get() static gss_mech_get() is no longer used outside of gss_mech_switch.c. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 3 --- net/sunrpc/auth_gss/gss_mech_switch.c | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index fca23380e667..f32b7a47e13f 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -145,9 +145,6 @@ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); /* Fill in an array with a list of supported pseudoflavors */ int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); -/* Just increments the mechanism's reference count and returns its input: */ -struct gss_api_mech * gss_mech_get(struct gss_api_mech *); - /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a * corresponding call to gss_mech_put. */ void gss_mech_put(struct gss_api_mech *); diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index deaa7ae81cdf..89416522ef79 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -132,15 +132,12 @@ gss_mech_unregister(struct gss_api_mech *gm) EXPORT_SYMBOL_GPL(gss_mech_unregister); -struct gss_api_mech * -gss_mech_get(struct gss_api_mech *gm) +static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) { __module_get(gm->gm_owner); return gm; } -EXPORT_SYMBOL_GPL(gss_mech_get); - static struct gss_api_mech * _gss_mech_get_by_name(const char *name) { -- cgit From e4bcda28344cc4762c57ad7333f0472a39e83479 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 29 Mar 2013 17:38:18 -0600 Subject: ARM: tegra: move to This is required so that code such as Tegra's PCIe and clock drivers can still access this header file once Tegra is converted to multiplatform, and no longer exists. Signed-off-by: Stephen Warren --- arch/arm/mach-tegra/board.h | 1 + arch/arm/mach-tegra/common.c | 2 -- arch/arm/mach-tegra/include/mach/powergate.h | 54 ---------------------------- arch/arm/mach-tegra/pcie.c | 3 +- arch/arm/mach-tegra/powergate.c | 3 +- drivers/clk/tegra/clk-tegra30.c | 3 +- include/linux/tegra-powergate.h | 49 +++++++++++++++++++++++++ 7 files changed, 53 insertions(+), 62 deletions(-) delete mode 100644 arch/arm/mach-tegra/include/mach/powergate.h create mode 100644 include/linux/tegra-powergate.h (limited to 'include/linux') diff --git a/arch/arm/mach-tegra/board.h b/arch/arm/mach-tegra/board.h index 60431de585ca..1787327fae3a 100644 --- a/arch/arm/mach-tegra/board.h +++ b/arch/arm/mach-tegra/board.h @@ -40,6 +40,7 @@ int tegra_clk_debugfs_init(void); static inline int tegra_clk_debugfs_init(void) { return 0; } #endif +int __init tegra_powergate_init(void); #if defined(CONFIG_ARCH_TEGRA_2x_SOC) && defined(CONFIG_DEBUG_FS) int __init tegra_powergate_debugfs_init(void); #else diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index f0315c95c76d..7cc75636adc6 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -27,8 +27,6 @@ #include -#include - #include "board.h" #include "common.h" #include "fuse.h" diff --git a/arch/arm/mach-tegra/include/mach/powergate.h b/arch/arm/mach-tegra/include/mach/powergate.h deleted file mode 100644 index 06763fe7529d..000000000000 --- a/arch/arm/mach-tegra/include/mach/powergate.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * drivers/regulator/tegra-regulator.c - * - * Copyright (c) 2010 Google, Inc - * - * Author: - * Colin Cross - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef _MACH_TEGRA_POWERGATE_H_ -#define _MACH_TEGRA_POWERGATE_H_ - -struct clk; - -#define TEGRA_POWERGATE_CPU 0 -#define TEGRA_POWERGATE_3D 1 -#define TEGRA_POWERGATE_VENC 2 -#define TEGRA_POWERGATE_PCIE 3 -#define TEGRA_POWERGATE_VDEC 4 -#define TEGRA_POWERGATE_L2 5 -#define TEGRA_POWERGATE_MPE 6 -#define TEGRA_POWERGATE_HEG 7 -#define TEGRA_POWERGATE_SATA 8 -#define TEGRA_POWERGATE_CPU1 9 -#define TEGRA_POWERGATE_CPU2 10 -#define TEGRA_POWERGATE_CPU3 11 -#define TEGRA_POWERGATE_CELP 12 -#define TEGRA_POWERGATE_3D1 13 - -#define TEGRA_POWERGATE_CPU0 TEGRA_POWERGATE_CPU -#define TEGRA_POWERGATE_3D0 TEGRA_POWERGATE_3D - -int __init tegra_powergate_init(void); - -int tegra_cpu_powergate_id(int cpuid); -int tegra_powergate_is_powered(int id); -int tegra_powergate_power_on(int id); -int tegra_powergate_power_off(int id); -int tegra_powergate_remove_clamping(int id); - -/* Must be called with clk disabled, and returns with clk enabled */ -int tegra_powergate_sequence_power_up(int id, struct clk *clk); - -#endif /* _MACH_TEGRA_POWERGATE_H_ */ diff --git a/arch/arm/mach-tegra/pcie.c b/arch/arm/mach-tegra/pcie.c index b60165f1ca02..46144a19a7e7 100644 --- a/arch/arm/mach-tegra/pcie.c +++ b/arch/arm/mach-tegra/pcie.c @@ -34,12 +34,11 @@ #include #include #include +#include #include #include -#include - #include "board.h" #include "iomap.h" diff --git a/arch/arm/mach-tegra/powergate.c b/arch/arm/mach-tegra/powergate.c index c6bc8f85759c..585d2974a3c1 100644 --- a/arch/arm/mach-tegra/powergate.c +++ b/arch/arm/mach-tegra/powergate.c @@ -27,8 +27,7 @@ #include #include #include - -#include +#include #include "fuse.h" #include "iomap.h" diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 32c61cb6d0bb..84584e529a7d 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -22,8 +22,7 @@ #include #include #include - -#include +#include #include "clk.h" diff --git a/include/linux/tegra-powergate.h b/include/linux/tegra-powergate.h new file mode 100644 index 000000000000..55c29a8d5015 --- /dev/null +++ b/include/linux/tegra-powergate.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2010 Google, Inc + * + * Author: + * Colin Cross + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _MACH_TEGRA_POWERGATE_H_ +#define _MACH_TEGRA_POWERGATE_H_ + +struct clk; + +#define TEGRA_POWERGATE_CPU 0 +#define TEGRA_POWERGATE_3D 1 +#define TEGRA_POWERGATE_VENC 2 +#define TEGRA_POWERGATE_PCIE 3 +#define TEGRA_POWERGATE_VDEC 4 +#define TEGRA_POWERGATE_L2 5 +#define TEGRA_POWERGATE_MPE 6 +#define TEGRA_POWERGATE_HEG 7 +#define TEGRA_POWERGATE_SATA 8 +#define TEGRA_POWERGATE_CPU1 9 +#define TEGRA_POWERGATE_CPU2 10 +#define TEGRA_POWERGATE_CPU3 11 +#define TEGRA_POWERGATE_CELP 12 +#define TEGRA_POWERGATE_3D1 13 + +#define TEGRA_POWERGATE_CPU0 TEGRA_POWERGATE_CPU +#define TEGRA_POWERGATE_3D0 TEGRA_POWERGATE_3D + +int tegra_powergate_is_powered(int id); +int tegra_powergate_power_on(int id); +int tegra_powergate_power_off(int id); +int tegra_powergate_remove_clamping(int id); + +/* Must be called with clk disabled, and returns with clk enabled */ +int tegra_powergate_sequence_power_up(int id, struct clk *clk); + +#endif /* _MACH_TEGRA_POWERGATE_H_ */ -- cgit From 14b57a10553b5b768f77b247e6dd285c65816064 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 29 Mar 2013 14:46:51 +0100 Subject: openvswitch: Use ETH_ALEN to define ethernet addresses Signed-off-by: Thomas Graf Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 67d6c7b03581..8b9d7217eddc 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -20,6 +20,7 @@ #define _LINUX_OPENVSWITCH_H 1 #include +#include /** * struct ovs_header - header for OVS Generic Netlink messages. @@ -269,8 +270,8 @@ enum ovs_frag_type { #define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) struct ovs_key_ethernet { - __u8 eth_src[6]; - __u8 eth_dst[6]; + __u8 eth_src[ETH_ALEN]; + __u8 eth_dst[ETH_ALEN]; }; struct ovs_key_ipv4 { @@ -316,14 +317,14 @@ struct ovs_key_arp { __be32 arp_sip; __be32 arp_tip; __be16 arp_op; - __u8 arp_sha[6]; - __u8 arp_tha[6]; + __u8 arp_sha[ETH_ALEN]; + __u8 arp_tha[ETH_ALEN]; }; struct ovs_key_nd { __u32 nd_target[4]; - __u8 nd_sll[6]; - __u8 nd_tll[6]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; }; /** -- cgit From 22e3880a76bb9a0c4fa5c8fefdc8697a36a4dae1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 29 Mar 2013 14:46:52 +0100 Subject: openvswitch: Expose to userspace It contains the public netlink interface bits required by userspace to make use of the interface. Signed-off-by: Thomas Graf Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 433 +------------------------------------ include/uapi/linux/Kbuild | 1 + include/uapi/linux/openvswitch.h | 456 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 458 insertions(+), 432 deletions(-) create mode 100644 include/uapi/linux/openvswitch.h (limited to 'include/linux') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 8b9d7217eddc..e6b240b6196c 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -19,437 +19,6 @@ #ifndef _LINUX_OPENVSWITCH_H #define _LINUX_OPENVSWITCH_H 1 -#include -#include - -/** - * struct ovs_header - header for OVS Generic Netlink messages. - * @dp_ifindex: ifindex of local port for datapath (0 to make a request not - * specific to a datapath). - * - * Attributes following the header are specific to a particular OVS Generic - * Netlink family, but all of the OVS families use this header. - */ - -struct ovs_header { - int dp_ifindex; -}; - -/* Datapaths. */ - -#define OVS_DATAPATH_FAMILY "ovs_datapath" -#define OVS_DATAPATH_MCGROUP "ovs_datapath" -#define OVS_DATAPATH_VERSION 0x1 - -enum ovs_datapath_cmd { - OVS_DP_CMD_UNSPEC, - OVS_DP_CMD_NEW, - OVS_DP_CMD_DEL, - OVS_DP_CMD_GET, - OVS_DP_CMD_SET -}; - -/** - * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. - * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local - * port". This is the name of the network device whose dp_ifindex is given in - * the &struct ovs_header. Always present in notifications. Required in - * %OVS_DP_NEW requests. May be used as an alternative to specifying - * dp_ifindex in other requests (with a dp_ifindex of 0). - * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially - * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on - * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should - * not be sent. - * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the - * datapath. Always present in notifications. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_DP_* commands. - */ -enum ovs_datapath_attr { - OVS_DP_ATTR_UNSPEC, - OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ - OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ - OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ - __OVS_DP_ATTR_MAX -}; - -#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) - -struct ovs_dp_stats { - __u64 n_hit; /* Number of flow table matches. */ - __u64 n_missed; /* Number of flow table misses. */ - __u64 n_lost; /* Number of misses not sent to userspace. */ - __u64 n_flows; /* Number of flows present */ -}; - -struct ovs_vport_stats { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ -}; - -/* Fixed logical ports. */ -#define OVSP_LOCAL ((__u32)0) - -/* Packet transfer. */ - -#define OVS_PACKET_FAMILY "ovs_packet" -#define OVS_PACKET_VERSION 0x1 - -enum ovs_packet_cmd { - OVS_PACKET_CMD_UNSPEC, - - /* Kernel-to-user notifications. */ - OVS_PACKET_CMD_MISS, /* Flow table miss. */ - OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ - - /* Userspace commands. */ - OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ -}; - -/** - * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. - * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire - * packet as received, from the start of the Ethernet header onward. For - * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by - * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is - * the flow key extracted from the packet as originally received. - * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key - * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows - * userspace to adapt its flow setup strategy by comparing its notion of the - * flow key against the kernel's. - * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used - * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. - * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION - * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an - * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content - * specified there. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_PACKET_* commands. - */ -enum ovs_packet_attr { - OVS_PACKET_ATTR_UNSPEC, - OVS_PACKET_ATTR_PACKET, /* Packet data. */ - OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ - OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ - __OVS_PACKET_ATTR_MAX -}; - -#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) - -/* Virtual ports. */ - -#define OVS_VPORT_FAMILY "ovs_vport" -#define OVS_VPORT_MCGROUP "ovs_vport" -#define OVS_VPORT_VERSION 0x1 - -enum ovs_vport_cmd { - OVS_VPORT_CMD_UNSPEC, - OVS_VPORT_CMD_NEW, - OVS_VPORT_CMD_DEL, - OVS_VPORT_CMD_GET, - OVS_VPORT_CMD_SET -}; - -enum ovs_vport_type { - OVS_VPORT_TYPE_UNSPEC, - OVS_VPORT_TYPE_NETDEV, /* network device */ - OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ - __OVS_VPORT_TYPE_MAX -}; - -#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) - -/** - * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. - * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. - * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type - * of vport. - * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device - * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes - * plus a null terminator. - * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on - * this port. A value of zero indicates that upcalls should not be sent. - * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for - * packets sent or received through the vport. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_VPORT_* commands. - * - * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and - * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is - * optional; if not specified a free port number is automatically selected. - * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type - * of vport. - * and other attributes are ignored. - * - * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to - * look up the vport to operate on; otherwise dp_idx from the &struct - * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. - */ -enum ovs_vport_attr { - OVS_VPORT_ATTR_UNSPEC, - OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ - OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ - OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ - OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ - OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ - OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ - __OVS_VPORT_ATTR_MAX -}; - -#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) - -/* Flows. */ - -#define OVS_FLOW_FAMILY "ovs_flow" -#define OVS_FLOW_MCGROUP "ovs_flow" -#define OVS_FLOW_VERSION 0x1 - -enum ovs_flow_cmd { - OVS_FLOW_CMD_UNSPEC, - OVS_FLOW_CMD_NEW, - OVS_FLOW_CMD_DEL, - OVS_FLOW_CMD_GET, - OVS_FLOW_CMD_SET -}; - -struct ovs_flow_stats { - __u64 n_packets; /* Number of matched packets. */ - __u64 n_bytes; /* Number of matched bytes. */ -}; - -enum ovs_key_attr { - OVS_KEY_ATTR_UNSPEC, - OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ - OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ - OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ - OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ - OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ - OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ - OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ - OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ - OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ - OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ - OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ - OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ - OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ - OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ - OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ - __OVS_KEY_ATTR_MAX -}; - -#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) - -/** - * enum ovs_frag_type - IPv4 and IPv6 fragment type - * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. - * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. - * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. - * - * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct - * ovs_key_ipv6. - */ -enum ovs_frag_type { - OVS_FRAG_TYPE_NONE, - OVS_FRAG_TYPE_FIRST, - OVS_FRAG_TYPE_LATER, - __OVS_FRAG_TYPE_MAX -}; - -#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) - -struct ovs_key_ethernet { - __u8 eth_src[ETH_ALEN]; - __u8 eth_dst[ETH_ALEN]; -}; - -struct ovs_key_ipv4 { - __be32 ipv4_src; - __be32 ipv4_dst; - __u8 ipv4_proto; - __u8 ipv4_tos; - __u8 ipv4_ttl; - __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ -}; - -struct ovs_key_ipv6 { - __be32 ipv6_src[4]; - __be32 ipv6_dst[4]; - __be32 ipv6_label; /* 20-bits in least-significant bits. */ - __u8 ipv6_proto; - __u8 ipv6_tclass; - __u8 ipv6_hlimit; - __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ -}; - -struct ovs_key_tcp { - __be16 tcp_src; - __be16 tcp_dst; -}; - -struct ovs_key_udp { - __be16 udp_src; - __be16 udp_dst; -}; - -struct ovs_key_icmp { - __u8 icmp_type; - __u8 icmp_code; -}; - -struct ovs_key_icmpv6 { - __u8 icmpv6_type; - __u8 icmpv6_code; -}; - -struct ovs_key_arp { - __be32 arp_sip; - __be32 arp_tip; - __be16 arp_op; - __u8 arp_sha[ETH_ALEN]; - __u8 arp_tha[ETH_ALEN]; -}; - -struct ovs_key_nd { - __u32 nd_target[4]; - __u8 nd_sll[ETH_ALEN]; - __u8 nd_tll[ETH_ALEN]; -}; - -/** - * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. - * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow - * key. Always present in notifications. Required for all requests (except - * dumps). - * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying - * the actions to take for packets that match the key. Always present in - * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for - * %OVS_FLOW_CMD_SET requests. - * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this - * flow. Present in notifications if the stats would be nonzero. Ignored in - * requests. - * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the - * TCP flags seen on packets in this flow. Only present in notifications for - * TCP flows, and only if it would be nonzero. Ignored in requests. - * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on - * the system monotonic clock, at which a packet was last processed for this - * flow. Only present in notifications if a packet has been processed for this - * flow. Ignored in requests. - * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the - * last-used time, accumulated TCP flags, and statistics for this flow. - * Otherwise ignored in requests. Never present in notifications. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_FLOW_* commands. - */ -enum ovs_flow_attr { - OVS_FLOW_ATTR_UNSPEC, - OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ - OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ - OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ - OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ - OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ - __OVS_FLOW_ATTR_MAX -}; - -#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) - -/** - * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. - * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with - * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of - * %UINT32_MAX samples all packets and intermediate values sample intermediate - * fractions of packets. - * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. - * Actions are passed as nested attributes. - * - * Executes the specified actions with the given probability on a per-packet - * basis. - */ -enum ovs_sample_attr { - OVS_SAMPLE_ATTR_UNSPEC, - OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ - OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - __OVS_SAMPLE_ATTR_MAX, -}; - -#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) - -/** - * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. - * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION - * message should be sent. Required. - * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is - * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. - */ -enum ovs_userspace_attr { - OVS_USERSPACE_ATTR_UNSPEC, - OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ - OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ - __OVS_USERSPACE_ATTR_MAX -}; - -#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) - -/** - * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. - * @vlan_tpid: Tag protocol identifier (TPID) to push. - * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set - * (but it will not be set in the 802.1Q header that is pushed). - * - * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID - * values are those that the kernel module also parses as 802.1Q headers, to - * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN - * from having surprising results. - */ -struct ovs_action_push_vlan { - __be16 vlan_tpid; /* 802.1Q TPID. */ - __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ -}; - -/** - * enum ovs_action_attr - Action types. - * - * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. - * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested - * %OVS_USERSPACE_ATTR_* attributes. - * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The - * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its - * value. - * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the - * packet. - * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. - * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in - * the nested %OVS_SAMPLE_ATTR_* attributes. - * - * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all - * fields within a header are modifiable, e.g. the IPv4 protocol and fragment - * type may not be changed. - */ - -enum ovs_action_attr { - OVS_ACTION_ATTR_UNSPEC, - OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ - OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ - OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ - OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ - OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ - OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ - __OVS_ACTION_ATTR_MAX -}; - -#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) +#include #endif /* _LINUX_OPENVSWITCH_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5c8a1d25e21c..d8fbc6aeac86 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -285,6 +285,7 @@ header-y += nvram.h header-y += omap3isp.h header-y += omapfb.h header-y += oom.h +header-y += openvswitch.h header-y += packet_diag.h header-y += param.h header-y += parport.h diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h new file mode 100644 index 000000000000..405918dd7b3f --- /dev/null +++ b/include/uapi/linux/openvswitch.h @@ -0,0 +1,456 @@ + +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _UAPI__LINUX_OPENVSWITCH_H +#define _UAPI__LINUX_OPENVSWITCH_H 1 + +#include +#include + +/** + * struct ovs_header - header for OVS Generic Netlink messages. + * @dp_ifindex: ifindex of local port for datapath (0 to make a request not + * specific to a datapath). + * + * Attributes following the header are specific to a particular OVS Generic + * Netlink family, but all of the OVS families use this header. + */ + +struct ovs_header { + int dp_ifindex; +}; + +/* Datapaths. */ + +#define OVS_DATAPATH_FAMILY "ovs_datapath" +#define OVS_DATAPATH_MCGROUP "ovs_datapath" +#define OVS_DATAPATH_VERSION 0x1 + +enum ovs_datapath_cmd { + OVS_DP_CMD_UNSPEC, + OVS_DP_CMD_NEW, + OVS_DP_CMD_DEL, + OVS_DP_CMD_GET, + OVS_DP_CMD_SET +}; + +/** + * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. + * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local + * port". This is the name of the network device whose dp_ifindex is given in + * the &struct ovs_header. Always present in notifications. Required in + * %OVS_DP_NEW requests. May be used as an alternative to specifying + * dp_ifindex in other requests (with a dp_ifindex of 0). + * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially + * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on + * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should + * not be sent. + * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the + * datapath. Always present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_DP_* commands. + */ +enum ovs_datapath_attr { + OVS_DP_ATTR_UNSPEC, + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + __OVS_DP_ATTR_MAX +}; + +#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) + +struct ovs_dp_stats { + __u64 n_hit; /* Number of flow table matches. */ + __u64 n_missed; /* Number of flow table misses. */ + __u64 n_lost; /* Number of misses not sent to userspace. */ + __u64 n_flows; /* Number of flows present */ +}; + +struct ovs_vport_stats { + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ +}; + +/* Fixed logical ports. */ +#define OVSP_LOCAL ((__u32)0) + +/* Packet transfer. */ + +#define OVS_PACKET_FAMILY "ovs_packet" +#define OVS_PACKET_VERSION 0x1 + +enum ovs_packet_cmd { + OVS_PACKET_CMD_UNSPEC, + + /* Kernel-to-user notifications. */ + OVS_PACKET_CMD_MISS, /* Flow table miss. */ + OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ + + /* Userspace commands. */ + OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ +}; + +/** + * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. + * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire + * packet as received, from the start of the Ethernet header onward. For + * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by + * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is + * the flow key extracted from the packet as originally received. + * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key + * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows + * userspace to adapt its flow setup strategy by comparing its notion of the + * flow key against the kernel's. + * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used + * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. + * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content + * specified there. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_PACKET_* commands. + */ +enum ovs_packet_attr { + OVS_PACKET_ATTR_UNSPEC, + OVS_PACKET_ATTR_PACKET, /* Packet data. */ + OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ + OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ + __OVS_PACKET_ATTR_MAX +}; + +#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) + +/* Virtual ports. */ + +#define OVS_VPORT_FAMILY "ovs_vport" +#define OVS_VPORT_MCGROUP "ovs_vport" +#define OVS_VPORT_VERSION 0x1 + +enum ovs_vport_cmd { + OVS_VPORT_CMD_UNSPEC, + OVS_VPORT_CMD_NEW, + OVS_VPORT_CMD_DEL, + OVS_VPORT_CMD_GET, + OVS_VPORT_CMD_SET +}; + +enum ovs_vport_type { + OVS_VPORT_TYPE_UNSPEC, + OVS_VPORT_TYPE_NETDEV, /* network device */ + OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ + __OVS_VPORT_TYPE_MAX +}; + +#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) + +/** + * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. + * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. + * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type + * of vport. + * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device + * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes + * plus a null terminator. + * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. + * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that + * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on + * this port. A value of zero indicates that upcalls should not be sent. + * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for + * packets sent or received through the vport. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_VPORT_* commands. + * + * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and + * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is + * optional; if not specified a free port number is automatically selected. + * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type + * of vport. + * and other attributes are ignored. + * + * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to + * look up the vport to operate on; otherwise dp_idx from the &struct + * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. + */ +enum ovs_vport_attr { + OVS_VPORT_ATTR_UNSPEC, + OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ + OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ + OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ + OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ + OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ + OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ + __OVS_VPORT_ATTR_MAX +}; + +#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) + +/* Flows. */ + +#define OVS_FLOW_FAMILY "ovs_flow" +#define OVS_FLOW_MCGROUP "ovs_flow" +#define OVS_FLOW_VERSION 0x1 + +enum ovs_flow_cmd { + OVS_FLOW_CMD_UNSPEC, + OVS_FLOW_CMD_NEW, + OVS_FLOW_CMD_DEL, + OVS_FLOW_CMD_GET, + OVS_FLOW_CMD_SET +}; + +struct ovs_flow_stats { + __u64 n_packets; /* Number of matched packets. */ + __u64 n_bytes; /* Number of matched bytes. */ +}; + +enum ovs_key_attr { + OVS_KEY_ATTR_UNSPEC, + OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ + OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ + OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ + OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ + OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ + OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ + OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ + OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ + OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ + OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ + OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ + OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ + OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ + OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ + OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ + __OVS_KEY_ATTR_MAX +}; + +#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) + +/** + * enum ovs_frag_type - IPv4 and IPv6 fragment type + * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. + * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. + * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. + * + * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct + * ovs_key_ipv6. + */ +enum ovs_frag_type { + OVS_FRAG_TYPE_NONE, + OVS_FRAG_TYPE_FIRST, + OVS_FRAG_TYPE_LATER, + __OVS_FRAG_TYPE_MAX +}; + +#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) + +struct ovs_key_ethernet { + __u8 eth_src[ETH_ALEN]; + __u8 eth_dst[ETH_ALEN]; +}; + +struct ovs_key_ipv4 { + __be32 ipv4_src; + __be32 ipv4_dst; + __u8 ipv4_proto; + __u8 ipv4_tos; + __u8 ipv4_ttl; + __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_ipv6 { + __be32 ipv6_src[4]; + __be32 ipv6_dst[4]; + __be32 ipv6_label; /* 20-bits in least-significant bits. */ + __u8 ipv6_proto; + __u8 ipv6_tclass; + __u8 ipv6_hlimit; + __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_tcp { + __be16 tcp_src; + __be16 tcp_dst; +}; + +struct ovs_key_udp { + __be16 udp_src; + __be16 udp_dst; +}; + +struct ovs_key_icmp { + __u8 icmp_type; + __u8 icmp_code; +}; + +struct ovs_key_icmpv6 { + __u8 icmpv6_type; + __u8 icmpv6_code; +}; + +struct ovs_key_arp { + __be32 arp_sip; + __be32 arp_tip; + __be16 arp_op; + __u8 arp_sha[ETH_ALEN]; + __u8 arp_tha[ETH_ALEN]; +}; + +struct ovs_key_nd { + __u32 nd_target[4]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; +}; + +/** + * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. + * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow + * key. Always present in notifications. Required for all requests (except + * dumps). + * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying + * the actions to take for packets that match the key. Always present in + * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for + * %OVS_FLOW_CMD_SET requests. + * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this + * flow. Present in notifications if the stats would be nonzero. Ignored in + * requests. + * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the + * TCP flags seen on packets in this flow. Only present in notifications for + * TCP flows, and only if it would be nonzero. Ignored in requests. + * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on + * the system monotonic clock, at which a packet was last processed for this + * flow. Only present in notifications if a packet has been processed for this + * flow. Ignored in requests. + * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the + * last-used time, accumulated TCP flags, and statistics for this flow. + * Otherwise ignored in requests. Never present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_FLOW_* commands. + */ +enum ovs_flow_attr { + OVS_FLOW_ATTR_UNSPEC, + OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ + OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ + OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ + OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ + __OVS_FLOW_ATTR_MAX +}; + +#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) + +/** + * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. + * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with + * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of + * %UINT32_MAX samples all packets and intermediate values sample intermediate + * fractions of packets. + * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. + * Actions are passed as nested attributes. + * + * Executes the specified actions with the given probability on a per-packet + * basis. + */ +enum ovs_sample_attr { + OVS_SAMPLE_ATTR_UNSPEC, + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + __OVS_SAMPLE_ATTR_MAX, +}; + +#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) + +/** + * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. + * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION + * message should be sent. Required. + * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is + * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. + */ +enum ovs_userspace_attr { + OVS_USERSPACE_ATTR_UNSPEC, + OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ + OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ + __OVS_USERSPACE_ATTR_MAX +}; + +#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) + +/** + * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. + * @vlan_tpid: Tag protocol identifier (TPID) to push. + * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set + * (but it will not be set in the 802.1Q header that is pushed). + * + * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID + * values are those that the kernel module also parses as 802.1Q headers, to + * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN + * from having surprising results. + */ +struct ovs_action_push_vlan { + __be16 vlan_tpid; /* 802.1Q TPID. */ + __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ +}; + +/** + * enum ovs_action_attr - Action types. + * + * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. + * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested + * %OVS_USERSPACE_ATTR_* attributes. + * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The + * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its + * value. + * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the + * packet. + * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. + * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in + * the nested %OVS_SAMPLE_ATTR_* attributes. + * + * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all + * fields within a header are modifiable, e.g. the IPv4 protocol and fragment + * type may not be changed. + */ + +enum ovs_action_attr { + OVS_ACTION_ATTR_UNSPEC, + OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ + OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ + OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ + OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ + OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ + OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ + __OVS_ACTION_ATTR_MAX +}; + +#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) + +#endif /* _LINUX_OPENVSWITCH_H */ -- cgit From a691ce7fe451363d2f1fa48d30c8f4b87c2475d4 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 28 Mar 2013 15:24:53 +0000 Subject: include/linux: printk is needed in filter.h when CONFIG_BPF_JIT is defined for make V=1 EXTRA_CFLAGS=-W ARCH=arm allmodconfig printk is need when CONFIG_BPF_JIT is defined or it will report pr_err and print_hex_dump are implicit declaration Signed-off-by: Chen Gang Signed-off-by: David S. Miller --- include/linux/filter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index d7d25083130b..d1248f401a56 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -48,6 +48,9 @@ extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); #ifdef CONFIG_BPF_JIT +#include +#include + extern void bpf_jit_compile(struct sk_filter *fp); extern void bpf_jit_free(struct sk_filter *fp); -- cgit From 4c3d5e7b41dda1b1372bfc2545ef092a1bc5ad33 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 30 Mar 2013 06:31:03 +0000 Subject: net: reorder some fields of net_device As time passed, some fields were added in net_device, and not at sensible offsets. Lets reorder some fields to reduce number of cache lines in RX path. Fields not used in data path should be moved out of this critical cache line. In particular, move broadcast[] to the end of the rx section, as it is less used, and ethernet uses only the beginning of the 32bytes field. Before patch : offsetof(struct net_device,dev_addr)=0x258 offsetof(struct net_device,rx_handler)=0x2b8 offsetof(struct net_device,ingress_queue)=0x2c8 offsetof(struct net_device,broadcast)=0x278 After : offsetof(struct net_device,dev_addr)=0x280 offsetof(struct net_device,rx_handler)=0x298 offsetof(struct net_device,ingress_queue)=0x2a8 offsetof(struct net_device,broadcast)=0x2b0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1dbb02c98946..4491414a9218 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1071,6 +1071,8 @@ struct net_device { struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; + struct list_head upper_dev_list; /* List of upper devices */ + /* currently active device features */ netdev_features_t features; @@ -1143,6 +1145,13 @@ struct net_device { spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; /* Unicast mac addresses */ struct netdev_hw_addr_list mc; /* Multicast mac addresses */ + struct netdev_hw_addr_list dev_addrs; /* list of device + * hw addresses + */ +#ifdef CONFIG_SYSFS + struct kset *queues_kset; +#endif + bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1175,21 +1184,11 @@ struct net_device { * avoid dirtying this cache line. */ - struct list_head upper_dev_list; /* List of upper devices */ - /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are unicast) */ - struct netdev_hw_addr_list dev_addrs; /* list of device - hw addresses */ - - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ - -#ifdef CONFIG_SYSFS - struct kset *queues_kset; -#endif #ifdef CONFIG_RPS struct netdev_rx_queue *_rx; @@ -1200,18 +1199,14 @@ struct net_device { /* Number of RX queues currently active in device */ unsigned int real_num_rx_queues; -#ifdef CONFIG_RFS_ACCEL - /* CPU reverse-mapping for RX completion interrupts, indexed - * by RX queue number. Assigned by driver. This must only be - * set if the ndo_rx_flow_steer operation is defined. */ - struct cpu_rmap *rx_cpu_rmap; -#endif #endif rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; struct netdev_queue __rcu *ingress_queue; + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + /* * Cache lines mostly used on transmit path @@ -1233,6 +1228,12 @@ struct net_device { #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_RFS_ACCEL + /* CPU reverse-mapping for RX completion interrupts, indexed + * by RX queue number. Assigned by driver. This must only be + * set if the ndo_rx_flow_steer operation is defined. */ + struct cpu_rmap *rx_cpu_rmap; +#endif /* These may be needed for future network-power-down code. */ -- cgit From b60e6a0eb0273132cbb60a9806abf5f47a4aee1c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 21 Mar 2013 12:21:31 +0000 Subject: cpuidle : handle clockevent notify from the cpuidle framework When a cpu enters a deep idle state, the local timers are stopped and the time framework falls back to the timer device used as a broadcast timer. The different cpuidle drivers are calling clockevents_notify ENTER/EXIT when the idle state stops the local timer. Add a new flag CPUIDLE_FLAG_TIMER_STOP which can be set by the cpuidle drivers. If the flag is set, the cpuidle core code takes care of the notification on behalf of the driver to avoid pointless code duplication. Signed-off-by: Daniel Lezcano Reviewed-by: Thomas Gleixner Acked-by: Santosh Shilimkar Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 9 +++++++++ include/linux/cpuidle.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index eba69290e074..c50037029184 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -8,6 +8,7 @@ * This code is licenced under the GPL. */ +#include #include #include #include @@ -146,12 +147,20 @@ int cpuidle_idle_call(void) trace_cpu_idle_rcuidle(next_state, dev->cpu); + if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP) + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, + &dev->cpu); + if (cpuidle_state_is_coupled(dev, drv, next_state)) entered_state = cpuidle_enter_state_coupled(dev, drv, next_state); else entered_state = cpuidle_enter_state(dev, drv, next_state); + if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP) + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, + &dev->cpu); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); /* give the governor an opportunity to reflect on the outcome */ diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 480c14dc1ddd..a837b332df65 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -57,6 +57,7 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ #define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ +#define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) -- cgit From 4dbad816febb6cb7340e36af4f5c0dc86e55a2ca Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Mar 2013 10:22:09 +0000 Subject: timer: move enum definition out of ifdef section The next patch will setup automatically the broadcast timer for the different cpuidle driver when one idle state stops its timer. This will be part of the generic code. But some ARM boards, like s3c64xx, uses cpuidle but without the CONFIG_GENERIC_CLOCKEVENTS_BUILD set. Hence the cpuidle framework will be compiled with the code supposed to be generic, that is with clockevents_notify and the different enum. Also the function clockevents_notify is a noop macro, this is fine except the usual code is: int cpu = smp_processor_id(); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); and that raises a warning for the variable cpu which is not used. Move the clock_event_nofitiers enum definition out of the CONFIG_GENERIC_CLOCKEVENTS_BUILD section to prevent a compilation error when these are used in the code. Change the clockevents_notify macro to a static inline noop function to prevent a compilation warning. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- include/linux/clockchips.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 66346521cb65..f9fd93758333 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -8,6 +8,20 @@ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H +/* Clock event notification values */ +enum clock_event_nofitiers { + CLOCK_EVT_NOTIFY_ADD, + CLOCK_EVT_NOTIFY_BROADCAST_ON, + CLOCK_EVT_NOTIFY_BROADCAST_OFF, + CLOCK_EVT_NOTIFY_BROADCAST_FORCE, + CLOCK_EVT_NOTIFY_BROADCAST_ENTER, + CLOCK_EVT_NOTIFY_BROADCAST_EXIT, + CLOCK_EVT_NOTIFY_SUSPEND, + CLOCK_EVT_NOTIFY_RESUME, + CLOCK_EVT_NOTIFY_CPU_DYING, + CLOCK_EVT_NOTIFY_CPU_DEAD, +}; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD #include @@ -26,20 +40,6 @@ enum clock_event_mode { CLOCK_EVT_MODE_RESUME, }; -/* Clock event notification values */ -enum clock_event_nofitiers { - CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_BROADCAST_ON, - CLOCK_EVT_NOTIFY_BROADCAST_OFF, - CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - CLOCK_EVT_NOTIFY_BROADCAST_ENTER, - CLOCK_EVT_NOTIFY_BROADCAST_EXIT, - CLOCK_EVT_NOTIFY_SUSPEND, - CLOCK_EVT_NOTIFY_RESUME, - CLOCK_EVT_NOTIFY_CPU_DYING, - CLOCK_EVT_NOTIFY_CPU_DEAD, -}; - /* * Clock event features */ @@ -173,7 +173,7 @@ extern int tick_receive_broadcast(void); #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); #else -# define clockevents_notify(reason, arg) do { } while (0) +static inline void clockevents_notify(unsigned long reason, void *arg) {} #endif #else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ @@ -181,7 +181,7 @@ extern void clockevents_notify(unsigned long reason, void *arg); static inline void clockevents_suspend(void) {} static inline void clockevents_resume(void) {} -#define clockevents_notify(reason, arg) do { } while (0) +static inline void clockevents_notify(unsigned long reason, void *arg) {} #endif -- cgit From a06df062a189a8d5588babb8bf0bb78672497798 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Mar 2013 10:22:10 +0000 Subject: cpuidle: initialize the broadcast timer framework The commit 89878baa73f0f1c679355006bd8632e5d78f96c2 introduced the CPUIDLE_FLAG_TIMER_STOP flag where we specify a specific idle state stops the local timer. Now use this flag to check at init time if one state will need the broadcast timer and, in this case, setup the broadcast timer framework. That prevents multiple code duplication in the drivers. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/driver.c | 31 +++++++++++++++++++++++++++++-- include/linux/cpuidle.h | 2 ++ 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 422c7b69ba7c..8dfaaae94444 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include "cpuidle.h" @@ -19,9 +21,28 @@ DEFINE_SPINLOCK(cpuidle_driver_lock); static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu); static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu); -static void __cpuidle_driver_init(struct cpuidle_driver *drv) +static void cpuidle_setup_broadcast_timer(void *arg) { + int cpu = smp_processor_id(); + clockevents_notify((long)(arg), &cpu); +} + +static void __cpuidle_driver_init(struct cpuidle_driver *drv, int cpu) +{ + int i; + drv->refcnt = 0; + + for (i = drv->state_count - 1; i >= 0 ; i--) { + + if (!(drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP)) + continue; + + drv->bctimer = 1; + on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer, + (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1); + break; + } } static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu) @@ -35,7 +56,7 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu) if (__cpuidle_get_cpu_driver(cpu)) return -EBUSY; - __cpuidle_driver_init(drv); + __cpuidle_driver_init(drv, cpu); __cpuidle_set_cpu_driver(drv, cpu); @@ -49,6 +70,12 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu) if (!WARN_ON(drv->refcnt > 0)) __cpuidle_set_cpu_driver(NULL, cpu); + + if (drv->bctimer) { + drv->bctimer = 0; + on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer, + (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1); + } } #ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index a837b332df65..fc3e5808b7ff 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -107,6 +107,8 @@ struct cpuidle_driver { /* set to 1 to use the core cpuidle time keeping (for all states). */ unsigned int en_core_tk_irqen:1; + /* used by the cpuidle framework to setup the broadcast timer */ + unsigned int bctimer:1; /* states array must be ordered in decreasing power consumption */ struct cpuidle_state states[CPUIDLE_STATE_MAX]; int state_count; -- cgit From 7bd353a995d9049262661d85811d6109140582a3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Mar 2013 15:58:57 +0000 Subject: cpufreq: Add per policy governor-init/exit infrastructure Currently, there can't be multiple instances of single governor_type. If we have a multi-package system, where we have multiple instances of struct policy (per package), we can't have multiple instances of same governor. i.e. We can't have multiple instances of ondemand governor for multiple packages. Governors directory in sysfs is created at /sys/devices/system/cpu/cpufreq/ governor-name/. Which again reflects that there can be only one instance of a governor_type in the system. This is a bottleneck for multicluster system, where we want different packages to use same governor type, but with different tunables. This patch is inclined towards providing this infrastructure. Because we are required to allocate governor's resources dynamically now, we must do it at policy creation and end. And so got CPUFREQ_GOV_POLICY_INIT/EXIT. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 21 ++++++++++++++++++--- include/linux/cpufreq.h | 9 ++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c5996fe7250e..08df7a196116 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1070,6 +1070,8 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif /* If cpu is last user of policy, free policy */ if (cpus == 1) { + __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); + lock_policy_rwsem_read(cpu); kobj = &data->kobj; cmp = &data->kobj_unregister; @@ -1651,7 +1653,7 @@ EXPORT_SYMBOL(cpufreq_get_policy); static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_policy *policy) { - int ret = 0; + int ret = 0, failed = 1; pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu, policy->min, policy->max); @@ -1705,17 +1707,30 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, pr_debug("governor switch\n"); /* end old governor */ - if (data->governor) + if (data->governor) { __cpufreq_governor(data, CPUFREQ_GOV_STOP); + __cpufreq_governor(data, + CPUFREQ_GOV_POLICY_EXIT); + } /* start new governor */ data->governor = policy->governor; - if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { + if (!__cpufreq_governor(data, CPUFREQ_GOV_POLICY_INIT)) { + if (!__cpufreq_governor(data, CPUFREQ_GOV_START)) + failed = 0; + else + __cpufreq_governor(data, + CPUFREQ_GOV_POLICY_EXIT); + } + + if (failed) { /* new governor failed, so re-start old one */ pr_debug("starting governor %s failed\n", data->governor->name); if (old_gov) { data->governor = old_gov; + __cpufreq_governor(data, + CPUFREQ_GOV_POLICY_INIT); __cpufreq_governor(data, CPUFREQ_GOV_START); } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a22944ca0526..b7393b56f552 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -106,6 +106,7 @@ struct cpufreq_policy { * governors are used */ unsigned int policy; /* see above */ struct cpufreq_governor *governor; /* see below */ + void *governor_data; struct work_struct update; /* if update_policy() needs to be * called, but you're in IRQ context */ @@ -178,9 +179,11 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu * CPUFREQ GOVERNORS * *********************************************************************/ -#define CPUFREQ_GOV_START 1 -#define CPUFREQ_GOV_STOP 2 -#define CPUFREQ_GOV_LIMITS 3 +#define CPUFREQ_GOV_START 1 +#define CPUFREQ_GOV_STOP 2 +#define CPUFREQ_GOV_LIMITS 3 +#define CPUFREQ_GOV_POLICY_INIT 4 +#define CPUFREQ_GOV_POLICY_EXIT 5 struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; -- cgit From 4d5dcc4211f9def4281eafb54b8ed483862e8135 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Mar 2013 15:58:58 +0000 Subject: cpufreq: governor: Implement per policy instances of governors Currently, there can't be multiple instances of single governor_type. If we have a multi-package system, where we have multiple instances of struct policy (per package), we can't have multiple instances of same governor. i.e. We can't have multiple instances of ondemand governor for multiple packages. Governors directory in sysfs is created at /sys/devices/system/cpu/cpufreq/ governor-name/. Which again reflects that there can be only one instance of a governor_type in the system. This is a bottleneck for multicluster system, where we want different packages to use same governor type, but with different tunables. This patch uses the infrastructure provided by earlier patch and implements init/exit routines for ondemand and conservative governors. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 +- drivers/cpufreq/cpufreq_conservative.c | 193 ++++++++++++++---------- drivers/cpufreq/cpufreq_governor.c | 212 +++++++++++++++++--------- drivers/cpufreq/cpufreq_governor.h | 117 +++++++++++++-- drivers/cpufreq/cpufreq_ondemand.c | 263 ++++++++++++++++++++------------- include/linux/cpufreq.h | 8 + 6 files changed, 538 insertions(+), 270 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 08df7a196116..85963fc48a5f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -128,6 +128,11 @@ void disable_cpufreq(void) static LIST_HEAD(cpufreq_governor_list); static DEFINE_MUTEX(cpufreq_governor_mutex); +bool have_governor_per_policy(void) +{ + return cpufreq_driver->have_governor_per_policy; +} + static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs) { struct cpufreq_policy *data; @@ -1546,10 +1551,12 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->cpu, event); ret = policy->governor->governor(policy, event); - if (event == CPUFREQ_GOV_START) - policy->governor->initialized++; - else if (event == CPUFREQ_GOV_STOP) - policy->governor->initialized--; + if (!ret) { + if (event == CPUFREQ_GOV_POLICY_INIT) + policy->governor->initialized++; + else if (event == CPUFREQ_GOV_POLICY_EXIT) + policy->governor->initialized--; + } /* we keep one module reference alive for each CPU governed by this CPU */ diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4fd0006b1291..98b49462f4e9 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -31,17 +32,8 @@ #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) -static struct dbs_data cs_dbs_data; static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); -static struct cs_dbs_tuners cs_tuners = { - .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, - .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, - .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, - .ignore_nice = 0, - .freq_step = 5, -}; - /* * Every sampling_rate, we check, if current idle time is less than 20% * (default), then we try to increase frequency Every sampling_rate * @@ -55,24 +47,26 @@ static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct dbs_data *dbs_data = policy->governor_data; + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int freq_target; /* * break out if we 'cannot' reduce the speed as the user might * want freq_step to be zero */ - if (cs_tuners.freq_step == 0) + if (cs_tuners->freq_step == 0) return; /* Check for frequency increase */ - if (load > cs_tuners.up_threshold) { + if (load > cs_tuners->up_threshold) { dbs_info->down_skip = 0; /* if we are already at full speed then break out early */ if (dbs_info->requested_freq == policy->max) return; - freq_target = (cs_tuners.freq_step * policy->max) / 100; + freq_target = (cs_tuners->freq_step * policy->max) / 100; /* max freq cannot be less than 100. But who knows.... */ if (unlikely(freq_target == 0)) @@ -92,8 +86,8 @@ static void cs_check_cpu(int cpu, unsigned int load) * support the current CPU usage without triggering the up policy. To be * safe, we focus 10 points under the threshold. */ - if (load < (cs_tuners.down_threshold - 10)) { - freq_target = (cs_tuners.freq_step * policy->max) / 100; + if (load < (cs_tuners->down_threshold - 10)) { + freq_target = (cs_tuners->freq_step * policy->max) / 100; dbs_info->requested_freq -= freq_target; if (dbs_info->requested_freq < policy->min) @@ -119,11 +113,13 @@ static void cs_dbs_timer(struct work_struct *work) unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - int delay = delay_for_sampling_rate(cs_tuners.sampling_rate); + struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + int delay = delay_for_sampling_rate(cs_tuners->sampling_rate); mutex_lock(&core_dbs_info->cdbs.timer_mutex); - if (need_load_eval(&core_dbs_info->cdbs, cs_tuners.sampling_rate)) - dbs_check_cpu(&cs_dbs_data, cpu); + if (need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate)) + dbs_check_cpu(dbs_data, cpu); schedule_delayed_work_on(smp_processor_id(), dw, delay); mutex_unlock(&core_dbs_info->cdbs.timer_mutex); @@ -154,16 +150,12 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, } /************************** sysfs interface ************************/ -static ssize_t show_sampling_rate_min(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%u\n", cs_dbs_data.min_sampling_rate); -} +static struct common_dbs_data cs_dbs_cdata; -static ssize_t store_sampling_down_factor(struct kobject *a, - struct attribute *b, - const char *buf, size_t count) +static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, + const char *buf, size_t count) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -171,13 +163,14 @@ static ssize_t store_sampling_down_factor(struct kobject *a, if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - cs_tuners.sampling_down_factor = input; + cs_tuners->sampling_down_factor = input; return count; } -static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -185,43 +178,46 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; - cs_tuners.sampling_rate = max(input, cs_dbs_data.min_sampling_rate); + cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); return count; } -static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); - if (ret != 1 || input > 100 || input <= cs_tuners.down_threshold) + if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) return -EINVAL; - cs_tuners.up_threshold = input; + cs_tuners->up_threshold = input; return count; } -static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); /* cannot be lower than 11 otherwise freq will not fall */ if (ret != 1 || input < 11 || input > 100 || - input >= cs_tuners.up_threshold) + input >= cs_tuners->up_threshold) return -EINVAL; - cs_tuners.down_threshold = input; + cs_tuners->down_threshold = input; return count; } -static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input, j; int ret; @@ -232,10 +228,10 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, if (input > 1) input = 1; - if (input == cs_tuners.ignore_nice) /* nothing to do */ + if (input == cs_tuners->ignore_nice) /* nothing to do */ return count; - cs_tuners.ignore_nice = input; + cs_tuners->ignore_nice = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -243,16 +239,17 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, dbs_info = &per_cpu(cs_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall); - if (cs_tuners.ignore_nice) + if (cs_tuners->ignore_nice) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } return count; } -static ssize_t store_freq_step(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -267,43 +264,88 @@ static ssize_t store_freq_step(struct kobject *a, struct attribute *b, * no need to test here if freq_step is zero as the user might actually * want this, they would be crazy though :) */ - cs_tuners.freq_step = input; + cs_tuners->freq_step = input; return count; } -show_one(cs, sampling_rate, sampling_rate); -show_one(cs, sampling_down_factor, sampling_down_factor); -show_one(cs, up_threshold, up_threshold); -show_one(cs, down_threshold, down_threshold); -show_one(cs, ignore_nice_load, ignore_nice); -show_one(cs, freq_step, freq_step); - -define_one_global_rw(sampling_rate); -define_one_global_rw(sampling_down_factor); -define_one_global_rw(up_threshold); -define_one_global_rw(down_threshold); -define_one_global_rw(ignore_nice_load); -define_one_global_rw(freq_step); -define_one_global_ro(sampling_rate_min); - -static struct attribute *dbs_attributes[] = { - &sampling_rate_min.attr, - &sampling_rate.attr, - &sampling_down_factor.attr, - &up_threshold.attr, - &down_threshold.attr, - &ignore_nice_load.attr, - &freq_step.attr, +show_store_one(cs, sampling_rate); +show_store_one(cs, sampling_down_factor); +show_store_one(cs, up_threshold); +show_store_one(cs, down_threshold); +show_store_one(cs, ignore_nice); +show_store_one(cs, freq_step); +declare_show_sampling_rate_min(cs); + +gov_sys_pol_attr_rw(sampling_rate); +gov_sys_pol_attr_rw(sampling_down_factor); +gov_sys_pol_attr_rw(up_threshold); +gov_sys_pol_attr_rw(down_threshold); +gov_sys_pol_attr_rw(ignore_nice); +gov_sys_pol_attr_rw(freq_step); +gov_sys_pol_attr_ro(sampling_rate_min); + +static struct attribute *dbs_attributes_gov_sys[] = { + &sampling_rate_min_gov_sys.attr, + &sampling_rate_gov_sys.attr, + &sampling_down_factor_gov_sys.attr, + &up_threshold_gov_sys.attr, + &down_threshold_gov_sys.attr, + &ignore_nice_gov_sys.attr, + &freq_step_gov_sys.attr, NULL }; -static struct attribute_group cs_attr_group = { - .attrs = dbs_attributes, +static struct attribute_group cs_attr_group_gov_sys = { + .attrs = dbs_attributes_gov_sys, + .name = "conservative", +}; + +static struct attribute *dbs_attributes_gov_pol[] = { + &sampling_rate_min_gov_pol.attr, + &sampling_rate_gov_pol.attr, + &sampling_down_factor_gov_pol.attr, + &up_threshold_gov_pol.attr, + &down_threshold_gov_pol.attr, + &ignore_nice_gov_pol.attr, + &freq_step_gov_pol.attr, + NULL +}; + +static struct attribute_group cs_attr_group_gov_pol = { + .attrs = dbs_attributes_gov_pol, .name = "conservative", }; /************************** sysfs end ************************/ +static int cs_init(struct dbs_data *dbs_data) +{ + struct cs_dbs_tuners *tuners; + + tuners = kzalloc(sizeof(struct cs_dbs_tuners), GFP_KERNEL); + if (!tuners) { + pr_err("%s: kzalloc failed\n", __func__); + return -ENOMEM; + } + + tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; + tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + tuners->ignore_nice = 0; + tuners->freq_step = 5; + + dbs_data->tuners = tuners; + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + mutex_init(&dbs_data->mutex); + return 0; +} + +static void cs_exit(struct dbs_data *dbs_data) +{ + kfree(dbs_data->tuners); +} + define_get_cpu_dbs_routines(cs_cpu_dbs_info); static struct notifier_block cs_cpufreq_notifier_block = { @@ -314,21 +356,23 @@ static struct cs_ops cs_ops = { .notifier_block = &cs_cpufreq_notifier_block, }; -static struct dbs_data cs_dbs_data = { +static struct common_dbs_data cs_dbs_cdata = { .governor = GOV_CONSERVATIVE, - .attr_group = &cs_attr_group, - .tuners = &cs_tuners, + .attr_group_gov_sys = &cs_attr_group_gov_sys, + .attr_group_gov_pol = &cs_attr_group_gov_pol, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, .gov_check_cpu = cs_check_cpu, .gov_ops = &cs_ops, + .init = cs_init, + .exit = cs_exit, }; static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - return cpufreq_governor_dbs(&cs_dbs_data, policy, event); + return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); } #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE @@ -343,7 +387,6 @@ struct cpufreq_governor cpufreq_gov_conservative = { static int __init cpufreq_gov_dbs_init(void) { - mutex_init(&cs_dbs_data.mutex); return cpufreq_register_governor(&cpufreq_gov_conservative); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 5a76086ff09b..26fbb729bc1c 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,12 +22,29 @@ #include #include #include +#include #include #include #include #include "cpufreq_governor.h" +static struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) +{ + if (have_governor_per_policy()) + return &policy->kobj; + else + return cpufreq_global_kobject; +} + +static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) +{ + if (have_governor_per_policy()) + return dbs_data->cdata->attr_group_gov_pol; + else + return dbs_data->cdata->attr_group_gov_sys; +} + static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) { u64 idle_time; @@ -65,7 +82,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpufreq_policy *policy; @@ -73,7 +90,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int ignore_nice; unsigned int j; - if (dbs_data->governor == GOV_ONDEMAND) + if (dbs_data->cdata->governor == GOV_ONDEMAND) ignore_nice = od_tuners->ignore_nice; else ignore_nice = cs_tuners->ignore_nice; @@ -87,7 +104,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int idle_time, wall_time, iowait_time; unsigned int load; - j_cdbs = dbs_data->get_cpu_cdbs(j); + j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); @@ -117,9 +134,9 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) idle_time += jiffies_to_usecs(cur_nice_jiffies); } - if (dbs_data->governor == GOV_ONDEMAND) { + if (dbs_data->cdata->governor == GOV_ONDEMAND) { struct od_cpu_dbs_info_s *od_j_dbs_info = - dbs_data->get_cpu_dbs_info_s(cpu); + dbs_data->cdata->get_cpu_dbs_info_s(cpu); cur_iowait_time = get_cpu_iowait_time_us(j, &cur_wall_time); @@ -145,7 +162,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) load = 100 * (wall_time - idle_time) / wall_time; - if (dbs_data->governor == GOV_ONDEMAND) { + if (dbs_data->cdata->governor == GOV_ONDEMAND) { int freq_avg = __cpufreq_driver_getavg(policy, j); if (freq_avg <= 0) freq_avg = policy->cur; @@ -157,7 +174,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) max_load = load; } - dbs_data->gov_check_cpu(cpu, max_load); + dbs_data->cdata->gov_check_cpu(cpu, max_load); } EXPORT_SYMBOL_GPL(dbs_check_cpu); @@ -165,14 +182,14 @@ static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu, unsigned int sampling_rate) { int delay = delay_for_sampling_rate(sampling_rate); - struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); schedule_delayed_work_on(cpu, &cdbs->work, delay); } static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); cancel_delayed_work_sync(&cdbs->work); } @@ -196,31 +213,128 @@ bool need_load_eval(struct cpu_dbs_common_info *cdbs, } EXPORT_SYMBOL_GPL(need_load_eval); -int cpufreq_governor_dbs(struct dbs_data *dbs_data, - struct cpufreq_policy *policy, unsigned int event) +static void set_sampling_rate(struct dbs_data *dbs_data, + unsigned int sampling_rate) +{ + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + cs_tuners->sampling_rate = sampling_rate; + } else { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + od_tuners->sampling_rate = sampling_rate; + } +} + +int cpufreq_governor_dbs(struct cpufreq_policy *policy, + struct common_dbs_data *cdata, unsigned int event) { + struct dbs_data *dbs_data; struct od_cpu_dbs_info_s *od_dbs_info = NULL; struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; - struct cs_ops *cs_ops = NULL; struct od_ops *od_ops = NULL; - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + struct od_dbs_tuners *od_tuners = NULL; + struct cs_dbs_tuners *cs_tuners = NULL; struct cpu_dbs_common_info *cpu_cdbs; - unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; + unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; int rc; - cpu_cdbs = dbs_data->get_cpu_cdbs(cpu); + if (have_governor_per_policy()) + dbs_data = policy->governor_data; + else + dbs_data = cdata->gdbs_data; + + WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)); + + switch (event) { + case CPUFREQ_GOV_POLICY_INIT: + if (have_governor_per_policy()) { + WARN_ON(dbs_data); + } else if (dbs_data) { + policy->governor_data = dbs_data; + return 0; + } + + dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); + if (!dbs_data) { + pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); + return -ENOMEM; + } + + dbs_data->cdata = cdata; + rc = cdata->init(dbs_data); + if (rc) { + pr_err("%s: POLICY_INIT: init() failed\n", __func__); + kfree(dbs_data); + return rc; + } + + rc = sysfs_create_group(get_governor_parent_kobj(policy), + get_sysfs_attr(dbs_data)); + if (rc) { + cdata->exit(dbs_data); + kfree(dbs_data); + return rc; + } + + policy->governor_data = dbs_data; + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + + /* Bring kernel and HW constraints together */ + dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, + latency * LATENCY_MULTIPLIER)); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; + + cpufreq_register_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + if (!have_governor_per_policy()) + cdata->gdbs_data = dbs_data; + + return 0; + case CPUFREQ_GOV_POLICY_EXIT: + if ((policy->governor->initialized == 1) || + have_governor_per_policy()) { + sysfs_remove_group(get_governor_parent_kobj(policy), + get_sysfs_attr(dbs_data)); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; + + cpufreq_unregister_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + cdata->exit(dbs_data); + kfree(dbs_data); + cdata->gdbs_data = NULL; + } - if (dbs_data->governor == GOV_CONSERVATIVE) { - cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); - sampling_rate = &cs_tuners->sampling_rate; + policy->governor_data = NULL; + return 0; + } + + cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + cs_tuners = dbs_data->tuners; + cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); + sampling_rate = cs_tuners->sampling_rate; ignore_nice = cs_tuners->ignore_nice; - cs_ops = dbs_data->gov_ops; } else { - od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); - sampling_rate = &od_tuners->sampling_rate; + od_tuners = dbs_data->tuners; + od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); + sampling_rate = od_tuners->sampling_rate; ignore_nice = od_tuners->ignore_nice; - od_ops = dbs_data->gov_ops; + od_ops = dbs_data->cdata->gov_ops; } switch (event) { @@ -232,7 +346,7 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs = - dbs_data->get_cpu_cdbs(j); + dbs_data->cdata->get_cpu_cdbs(j); j_cdbs->cpu = j; j_cdbs->cur_policy = policy; @@ -244,69 +358,34 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, mutex_init(&j_cdbs->timer_mutex); INIT_DEFERRABLE_WORK(&j_cdbs->work, - dbs_data->gov_dbs_timer); - } - - if (!policy->governor->initialized) { - rc = sysfs_create_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (rc) { - mutex_unlock(&dbs_data->mutex); - return rc; - } + dbs_data->cdata->gov_dbs_timer); } /* * conservative does not implement micro like ondemand * governor, thus we are bound to jiffes/HZ */ - if (dbs_data->governor == GOV_CONSERVATIVE) { + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { cs_dbs_info->down_skip = 0; cs_dbs_info->enable = 1; cs_dbs_info->requested_freq = policy->cur; - - if (!policy->governor->initialized) { - cpufreq_register_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - - dbs_data->min_sampling_rate = - MIN_SAMPLING_RATE_RATIO * - jiffies_to_usecs(10); - } } else { od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; od_ops->powersave_bias_init_cpu(cpu); - - if (!policy->governor->initialized) - od_tuners->io_is_busy = od_ops->io_busy(); } - if (policy->governor->initialized) - goto unlock; - - /* policy latency is in nS. Convert it to uS first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - - /* Bring kernel and HW constraints together */ - dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, - MIN_LATENCY_MULTIPLIER * latency); - *sampling_rate = max(dbs_data->min_sampling_rate, latency * - LATENCY_MULTIPLIER); -unlock: mutex_unlock(&dbs_data->mutex); /* Initiate timer time stamp */ cpu_cdbs->time_stamp = ktime_get(); for_each_cpu(j, policy->cpus) - dbs_timer_init(dbs_data, j, *sampling_rate); + dbs_timer_init(dbs_data, j, sampling_rate); break; case CPUFREQ_GOV_STOP: - if (dbs_data->governor == GOV_CONSERVATIVE) + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) cs_dbs_info->enable = 0; for_each_cpu(j, policy->cpus) @@ -315,13 +394,6 @@ unlock: mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); - if (policy->governor->initialized == 1) { - sysfs_remove_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (dbs_data->governor == GOV_CONSERVATIVE) - cpufreq_unregister_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - } mutex_unlock(&dbs_data->mutex); break; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 46bde01eee62..c83cabf14b2f 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -40,14 +40,75 @@ /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; -/* Macro creating sysfs show routines */ -#define show_one(_gov, file_name, object) \ -static ssize_t show_##file_name \ +/* + * Macro for creating governors sysfs routines + * + * - gov_sys: One governor instance per whole system + * - gov_pol: One governor instance per policy + */ + +/* Create attributes */ +#define gov_sys_attr_ro(_name) \ +static struct global_attr _name##_gov_sys = \ +__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) + +#define gov_sys_attr_rw(_name) \ +static struct global_attr _name##_gov_sys = \ +__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) + +#define gov_pol_attr_ro(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0444, show_##_name##_gov_pol, NULL) + +#define gov_pol_attr_rw(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define gov_sys_pol_attr_rw(_name) \ + gov_sys_attr_rw(_name); \ + gov_pol_attr_rw(_name) + +#define gov_sys_pol_attr_ro(_name) \ + gov_sys_attr_ro(_name); \ + gov_pol_attr_ro(_name) + +/* Create show/store routines */ +#define show_one(_gov, file_name) \ +static ssize_t show_##file_name##_gov_sys \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - return sprintf(buf, "%u\n", _gov##_tuners.object); \ + struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} + +#define store_one(_gov, file_name) \ +static ssize_t store_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + return store_##file_name(dbs_data, buf, count); \ +} \ + \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + return store_##file_name(dbs_data, buf, count); \ } +#define show_store_one(_gov, file_name) \ +show_one(_gov, file_name); \ +store_one(_gov, file_name) + +/* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu) \ { \ @@ -103,7 +164,7 @@ struct cs_cpu_dbs_info_s { unsigned int enable:1; }; -/* Governers sysfs tunables */ +/* Per policy Governers sysfs tunables */ struct od_dbs_tuners { unsigned int ignore_nice; unsigned int sampling_rate; @@ -123,31 +184,42 @@ struct cs_dbs_tuners { unsigned int freq_step; }; -/* Per Governer data */ -struct dbs_data { +/* Common Governer data across policies */ +struct dbs_data; +struct common_dbs_data { /* Common across governors */ #define GOV_ONDEMAND 0 #define GOV_CONSERVATIVE 1 int governor; - unsigned int min_sampling_rate; - struct attribute_group *attr_group; - void *tuners; + struct attribute_group *attr_group_gov_sys; /* one governor - system */ + struct attribute_group *attr_group_gov_pol; /* one governor - policy */ - /* dbs_mutex protects dbs_enable in governor start/stop */ - struct mutex mutex; + /* Common data for platforms that don't set have_governor_per_policy */ + struct dbs_data *gdbs_data; struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); void (*gov_dbs_timer)(struct work_struct *work); void (*gov_check_cpu)(int cpu, unsigned int load); + int (*init)(struct dbs_data *dbs_data); + void (*exit)(struct dbs_data *dbs_data); /* Governor specific ops, see below */ void *gov_ops; }; +/* Governer Per policy data */ +struct dbs_data { + struct common_dbs_data *cdata; + unsigned int min_sampling_rate; + void *tuners; + + /* dbs_mutex protects dbs_enable in governor start/stop */ + struct mutex mutex; +}; + /* Governor specific ops, will be passed to dbs_data->gov_ops */ struct od_ops { - int (*io_busy)(void); void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation); @@ -169,10 +241,25 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) return delay; } +#define declare_show_sampling_rate_min(_gov) \ +static ssize_t show_sampling_rate_min_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ +} \ + \ +static ssize_t show_sampling_rate_min_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ +} + u64 get_cpu_idle_time(unsigned int cpu, u64 *wall); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); bool need_load_eval(struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate); -int cpufreq_governor_dbs(struct dbs_data *dbs_data, - struct cpufreq_policy *policy, unsigned int event); +int cpufreq_governor_dbs(struct cpufreq_policy *policy, + struct common_dbs_data *cdata, unsigned int event); #endif /* _CPUFREQ_GOVERNER_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index f3eb26cd848f..15e80ee61352 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -37,22 +38,12 @@ #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -static struct dbs_data od_dbs_data; static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND static struct cpufreq_governor cpufreq_gov_ondemand; #endif -static struct od_dbs_tuners od_tuners = { - .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, - .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, - .adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD - - DEF_FREQUENCY_DOWN_DIFFERENTIAL, - .ignore_nice = 0, - .powersave_bias = 0, -}; - static void ondemand_powersave_bias_init_cpu(int cpu) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); @@ -98,6 +89,8 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, unsigned int jiffies_total, jiffies_hi, jiffies_lo; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); + struct dbs_data *dbs_data = policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; if (!dbs_info->freq_table) { dbs_info->freq_lo = 0; @@ -108,7 +101,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, relation, &index); freq_req = dbs_info->freq_table[index].frequency; - freq_reduc = freq_req * od_tuners.powersave_bias / 1000; + freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; /* Find freq bounds for freq_avg in freq_table */ @@ -127,7 +120,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, dbs_info->freq_lo_jiffies = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(od_tuners.sampling_rate); + jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); jiffies_hi = (freq_avg - freq_lo) * jiffies_total; jiffies_hi += ((freq_hi - freq_lo) / 2); jiffies_hi /= (freq_hi - freq_lo); @@ -148,12 +141,15 @@ static void ondemand_powersave_bias_init(void) static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) { - if (od_tuners.powersave_bias) + struct dbs_data *dbs_data = p->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + if (od_tuners->powersave_bias) freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); else if (p->cur == p->max) return; - __cpufreq_driver_target(p, freq, od_tuners.powersave_bias ? + __cpufreq_driver_target(p, freq, od_tuners->powersave_bias ? CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); } @@ -170,15 +166,17 @@ static void od_check_cpu(int cpu, unsigned int load_freq) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct dbs_data *dbs_data = policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; dbs_info->freq_lo = 0; /* Check for frequency increase */ - if (load_freq > od_tuners.up_threshold * policy->cur) { + if (load_freq > od_tuners->up_threshold * policy->cur) { /* If switching to max speed, apply sampling_down_factor */ if (policy->cur < policy->max) dbs_info->rate_mult = - od_tuners.sampling_down_factor; + od_tuners->sampling_down_factor; dbs_freq_increase(policy, policy->max); return; } @@ -193,9 +191,10 @@ static void od_check_cpu(int cpu, unsigned int load_freq) * support the current CPU usage without triggering the up policy. To be * safe, we focus 10 points under the threshold. */ - if (load_freq < od_tuners.adj_up_threshold * policy->cur) { + if (load_freq < od_tuners->adj_up_threshold + * policy->cur) { unsigned int freq_next; - freq_next = load_freq / od_tuners.adj_up_threshold; + freq_next = load_freq / od_tuners->adj_up_threshold; /* No longer fully busy, reset rate_mult */ dbs_info->rate_mult = 1; @@ -203,7 +202,7 @@ static void od_check_cpu(int cpu, unsigned int load_freq) if (freq_next < policy->min) freq_next = policy->min; - if (!od_tuners.powersave_bias) { + if (!od_tuners->powersave_bias) { __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); } else { @@ -223,12 +222,14 @@ static void od_dbs_timer(struct work_struct *work) unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay, sample_type = core_dbs_info->sample_type; bool eval_load; mutex_lock(&core_dbs_info->cdbs.timer_mutex); eval_load = need_load_eval(&core_dbs_info->cdbs, - od_tuners.sampling_rate); + od_tuners->sampling_rate); /* Common NORMAL_SAMPLE setup */ core_dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -240,13 +241,13 @@ static void od_dbs_timer(struct work_struct *work) CPUFREQ_RELATION_H); } else { if (eval_load) - dbs_check_cpu(&od_dbs_data, cpu); + dbs_check_cpu(dbs_data, cpu); if (core_dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ core_dbs_info->sample_type = OD_SUB_SAMPLE; delay = core_dbs_info->freq_hi_jiffies; } else { - delay = delay_for_sampling_rate(od_tuners.sampling_rate + delay = delay_for_sampling_rate(od_tuners->sampling_rate * core_dbs_info->rate_mult); } } @@ -256,12 +257,7 @@ static void od_dbs_timer(struct work_struct *work) } /************************** sysfs interface ************************/ - -static ssize_t show_sampling_rate_min(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%u\n", od_dbs_data.min_sampling_rate); -} +static struct common_dbs_data od_dbs_cdata; /** * update_sampling_rate - update sampling rate effective immediately if needed. @@ -276,12 +272,14 @@ static ssize_t show_sampling_rate_min(struct kobject *kobj, * reducing the sampling rate, we need to make the new value effective * immediately. */ -static void update_sampling_rate(unsigned int new_rate) +static void update_sampling_rate(struct dbs_data *dbs_data, + unsigned int new_rate) { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; int cpu; - od_tuners.sampling_rate = new_rate = max(new_rate, - od_dbs_data.min_sampling_rate); + od_tuners->sampling_rate = new_rate = max(new_rate, + dbs_data->min_sampling_rate); for_each_online_cpu(cpu) { struct cpufreq_policy *policy; @@ -322,34 +320,37 @@ static void update_sampling_rate(unsigned int new_rate) } } -static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) { unsigned int input; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - update_sampling_rate(input); + + update_sampling_rate(dbs_data, input); return count; } -static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - od_tuners.io_is_busy = !!input; + od_tuners->io_is_busy = !!input; return count; } -static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -359,23 +360,24 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, return -EINVAL; } /* Calculate the new adj_up_threshold */ - od_tuners.adj_up_threshold += input; - od_tuners.adj_up_threshold -= od_tuners.up_threshold; + od_tuners->adj_up_threshold += input; + od_tuners->adj_up_threshold -= od_tuners->up_threshold; - od_tuners.up_threshold = input; + od_tuners->up_threshold = input; return count; } -static ssize_t store_sampling_down_factor(struct kobject *a, - struct attribute *b, const char *buf, size_t count) +static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, + const char *buf, size_t count) { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input, j; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - od_tuners.sampling_down_factor = input; + od_tuners->sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ for_each_online_cpu(j) { @@ -386,9 +388,10 @@ static ssize_t store_sampling_down_factor(struct kobject *a, return count; } -static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; @@ -401,10 +404,10 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, if (input > 1) input = 1; - if (input == od_tuners.ignore_nice) { /* nothing to do */ + if (input == od_tuners->ignore_nice) { /* nothing to do */ return count; } - od_tuners.ignore_nice = input; + od_tuners->ignore_nice = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -412,7 +415,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall); - if (od_tuners.ignore_nice) + if (od_tuners->ignore_nice) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -420,9 +423,10 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, - const char *buf, size_t count) +static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, + size_t count) { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -433,68 +437,138 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, if (input > 1000) input = 1000; - od_tuners.powersave_bias = input; + od_tuners->powersave_bias = input; ondemand_powersave_bias_init(); return count; } -show_one(od, sampling_rate, sampling_rate); -show_one(od, io_is_busy, io_is_busy); -show_one(od, up_threshold, up_threshold); -show_one(od, sampling_down_factor, sampling_down_factor); -show_one(od, ignore_nice_load, ignore_nice); -show_one(od, powersave_bias, powersave_bias); - -define_one_global_rw(sampling_rate); -define_one_global_rw(io_is_busy); -define_one_global_rw(up_threshold); -define_one_global_rw(sampling_down_factor); -define_one_global_rw(ignore_nice_load); -define_one_global_rw(powersave_bias); -define_one_global_ro(sampling_rate_min); - -static struct attribute *dbs_attributes[] = { - &sampling_rate_min.attr, - &sampling_rate.attr, - &up_threshold.attr, - &sampling_down_factor.attr, - &ignore_nice_load.attr, - &powersave_bias.attr, - &io_is_busy.attr, +show_store_one(od, sampling_rate); +show_store_one(od, io_is_busy); +show_store_one(od, up_threshold); +show_store_one(od, sampling_down_factor); +show_store_one(od, ignore_nice); +show_store_one(od, powersave_bias); +declare_show_sampling_rate_min(od); + +gov_sys_pol_attr_rw(sampling_rate); +gov_sys_pol_attr_rw(io_is_busy); +gov_sys_pol_attr_rw(up_threshold); +gov_sys_pol_attr_rw(sampling_down_factor); +gov_sys_pol_attr_rw(ignore_nice); +gov_sys_pol_attr_rw(powersave_bias); +gov_sys_pol_attr_ro(sampling_rate_min); + +static struct attribute *dbs_attributes_gov_sys[] = { + &sampling_rate_min_gov_sys.attr, + &sampling_rate_gov_sys.attr, + &up_threshold_gov_sys.attr, + &sampling_down_factor_gov_sys.attr, + &ignore_nice_gov_sys.attr, + &powersave_bias_gov_sys.attr, + &io_is_busy_gov_sys.attr, NULL }; -static struct attribute_group od_attr_group = { - .attrs = dbs_attributes, +static struct attribute_group od_attr_group_gov_sys = { + .attrs = dbs_attributes_gov_sys, + .name = "ondemand", +}; + +static struct attribute *dbs_attributes_gov_pol[] = { + &sampling_rate_min_gov_pol.attr, + &sampling_rate_gov_pol.attr, + &up_threshold_gov_pol.attr, + &sampling_down_factor_gov_pol.attr, + &ignore_nice_gov_pol.attr, + &powersave_bias_gov_pol.attr, + &io_is_busy_gov_pol.attr, + NULL +}; + +static struct attribute_group od_attr_group_gov_pol = { + .attrs = dbs_attributes_gov_pol, .name = "ondemand", }; /************************** sysfs end ************************/ +static int od_init(struct dbs_data *dbs_data) +{ + struct od_dbs_tuners *tuners; + u64 idle_time; + int cpu; + + tuners = kzalloc(sizeof(struct od_dbs_tuners), GFP_KERNEL); + if (!tuners) { + pr_err("%s: kzalloc failed\n", __func__); + return -ENOMEM; + } + + cpu = get_cpu(); + idle_time = get_cpu_idle_time_us(cpu, NULL); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + tuners->adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD - + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + /* + * In nohz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + tuners->adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD - + DEF_FREQUENCY_DOWN_DIFFERENTIAL; + + /* For correct statistics, we need 10 ticks for each measure */ + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + } + + tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + tuners->ignore_nice = 0; + tuners->powersave_bias = 0; + tuners->io_is_busy = should_io_be_busy(); + + dbs_data->tuners = tuners; + pr_info("%s: tuners %p\n", __func__, tuners); + mutex_init(&dbs_data->mutex); + return 0; +} + +static void od_exit(struct dbs_data *dbs_data) +{ + kfree(dbs_data->tuners); +} + define_get_cpu_dbs_routines(od_cpu_dbs_info); static struct od_ops od_ops = { - .io_busy = should_io_be_busy, .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, .powersave_bias_target = powersave_bias_target, .freq_increase = dbs_freq_increase, }; -static struct dbs_data od_dbs_data = { +static struct common_dbs_data od_dbs_cdata = { .governor = GOV_ONDEMAND, - .attr_group = &od_attr_group, - .tuners = &od_tuners, + .attr_group_gov_sys = &od_attr_group_gov_sys, + .attr_group_gov_pol = &od_attr_group_gov_pol, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer, .gov_check_cpu = od_check_cpu, .gov_ops = &od_ops, + .init = od_init, + .exit = od_exit, }; static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - return cpufreq_governor_dbs(&od_dbs_data, policy, event); + return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); } #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND @@ -509,29 +583,6 @@ struct cpufreq_governor cpufreq_gov_ondemand = { static int __init cpufreq_gov_dbs_init(void) { - u64 idle_time; - int cpu = get_cpu(); - - mutex_init(&od_dbs_data.mutex); - idle_time = get_cpu_idle_time_us(cpu, NULL); - put_cpu(); - if (idle_time != -1ULL) { - /* Idle micro accounting is supported. Use finer thresholds */ - od_tuners.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; - od_tuners.adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD - - MICRO_FREQUENCY_DOWN_DIFFERENTIAL; - /* - * In nohz/micro accounting case we set the minimum frequency - * not depending on HZ, but fixed (very low). The deferred - * timer might skip some samples if idle/sleeping as needed. - */ - od_dbs_data.min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; - } else { - /* For correct statistics, we need 10 ticks for each measure */ - od_dbs_data.min_sampling_rate = MIN_SAMPLING_RATE_RATIO * - jiffies_to_usecs(10); - } - return cpufreq_register_governor(&cpufreq_gov_ondemand); } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index b7393b56f552..4bbc572dd521 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -232,6 +232,13 @@ struct cpufreq_driver { struct module *owner; char name[CPUFREQ_NAME_LEN]; u8 flags; + /* + * This should be set by platforms having multiple clock-domains, i.e. + * supporting multiple policies. With this sysfs directories of governor + * would be created in cpu/cpu/cpufreq/ directory and so they can + * use the same governor with different tunables for different clusters. + */ + bool have_governor_per_policy; /* needed by all drivers */ int (*init) (struct cpufreq_policy *policy); @@ -332,6 +339,7 @@ const char *cpufreq_get_current_driver(void); *********************************************************************/ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); +bool have_governor_per_policy(void); #ifdef CONFIG_CPU_FREQ /* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */ -- cgit From 6a8e95b071ecf7357d294782b6ef4e707ce0fbbd Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 25 Mar 2013 21:34:51 +0800 Subject: ARM: mxs: move icoll driver into drivers/irqchip Move icoll.c into drivers/irqchip as irq-mxs.c, and along with the renaming, change the driver to use IRQCHIP_DECLARE. Signed-off-by: Shawn Guo --- arch/arm/mach-mxs/Makefile | 2 +- arch/arm/mach-mxs/icoll.c | 128 -------------------------------- arch/arm/mach-mxs/include/mach/common.h | 3 - arch/arm/mach-mxs/mach-mxs.c | 6 +- drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-mxs.c | 121 ++++++++++++++++++++++++++++++ include/linux/irqchip/mxs.h | 14 ++++ 7 files changed, 141 insertions(+), 134 deletions(-) delete mode 100644 arch/arm/mach-mxs/icoll.c create mode 100644 drivers/irqchip/irq-mxs.c create mode 100644 include/linux/irqchip/mxs.h (limited to 'include/linux') diff --git a/arch/arm/mach-mxs/Makefile b/arch/arm/mach-mxs/Makefile index 76c336e6f5b5..b934603e2765 100644 --- a/arch/arm/mach-mxs/Makefile +++ b/arch/arm/mach-mxs/Makefile @@ -1,5 +1,5 @@ # Common support -obj-y := icoll.o ocotp.o system.o mm.o +obj-y := ocotp.o system.o mm.o obj-$(CONFIG_PM) += pm.o diff --git a/arch/arm/mach-mxs/icoll.c b/arch/arm/mach-mxs/icoll.c deleted file mode 100644 index b4d620765cf1..000000000000 --- a/arch/arm/mach-mxs/icoll.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define HW_ICOLL_VECTOR 0x0000 -#define HW_ICOLL_LEVELACK 0x0010 -#define HW_ICOLL_CTRL 0x0020 -#define HW_ICOLL_STAT_OFFSET 0x0070 -#define HW_ICOLL_INTERRUPTn_SET(n) (0x0124 + (n) * 0x10) -#define HW_ICOLL_INTERRUPTn_CLR(n) (0x0128 + (n) * 0x10) -#define BM_ICOLL_INTERRUPTn_ENABLE 0x00000004 -#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 0x1 - -#define ICOLL_NUM_IRQS 128 - -static void __iomem *icoll_base; -static struct irq_domain *icoll_domain; - -static void icoll_ack_irq(struct irq_data *d) -{ - /* - * The Interrupt Collector is able to prioritize irqs. - * Currently only level 0 is used. So acking can use - * BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 unconditionally. - */ - __raw_writel(BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0, - icoll_base + HW_ICOLL_LEVELACK); -} - -static void icoll_mask_irq(struct irq_data *d) -{ - __raw_writel(BM_ICOLL_INTERRUPTn_ENABLE, - icoll_base + HW_ICOLL_INTERRUPTn_CLR(d->hwirq)); -} - -static void icoll_unmask_irq(struct irq_data *d) -{ - __raw_writel(BM_ICOLL_INTERRUPTn_ENABLE, - icoll_base + HW_ICOLL_INTERRUPTn_SET(d->hwirq)); -} - -static struct irq_chip mxs_icoll_chip = { - .irq_ack = icoll_ack_irq, - .irq_mask = icoll_mask_irq, - .irq_unmask = icoll_unmask_irq, -}; - -asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs) -{ - u32 irqnr; - - do { - irqnr = __raw_readl(icoll_base + HW_ICOLL_STAT_OFFSET); - if (irqnr != 0x7f) { - __raw_writel(irqnr, icoll_base + HW_ICOLL_VECTOR); - irqnr = irq_find_mapping(icoll_domain, irqnr); - handle_IRQ(irqnr, regs); - continue; - } - break; - } while (1); -} - -static int icoll_irq_domain_map(struct irq_domain *d, unsigned int virq, - irq_hw_number_t hw) -{ - irq_set_chip_and_handler(virq, &mxs_icoll_chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID); - - return 0; -} - -static struct irq_domain_ops icoll_irq_domain_ops = { - .map = icoll_irq_domain_map, - .xlate = irq_domain_xlate_onecell, -}; - -static void __init icoll_of_init(struct device_node *np, - struct device_node *interrupt_parent) -{ - icoll_base = of_iomap(np, 0); - WARN_ON(!icoll_base); - - /* - * Interrupt Collector reset, which initializes the priority - * for each irq to level 0. - */ - stmp_reset_block(icoll_base + HW_ICOLL_CTRL); - - icoll_domain = irq_domain_add_linear(np, ICOLL_NUM_IRQS, - &icoll_irq_domain_ops, NULL); - WARN_ON(!icoll_domain); -} - -static const struct of_device_id icoll_of_match[] __initconst = { - {.compatible = "fsl,icoll", .data = icoll_of_init}, - { /* sentinel */ } -}; - -void __init icoll_init_irq(void) -{ - of_irq_init(icoll_of_match); -} diff --git a/arch/arm/mach-mxs/include/mach/common.h b/arch/arm/mach-mxs/include/mach/common.h index e043c4735b5a..df2a4ef14dae 100644 --- a/arch/arm/mach-mxs/include/mach/common.h +++ b/arch/arm/mach-mxs/include/mach/common.h @@ -22,7 +22,4 @@ extern void mx23_map_io(void); extern int mx28_clocks_init(void); extern void mx28_map_io(void); -extern void icoll_init_irq(void); -extern void icoll_handle_irq(struct pt_regs *); - #endif /* __MACH_MXS_COMMON_H__ */ diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index c1c0fb414c28..10506381b446 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include #include @@ -469,7 +471,7 @@ static const char *imx28_dt_compat[] __initdata = { DT_MACHINE_START(IMX23, "Freescale i.MX23 (Device Tree)") .map_io = mx23_map_io, - .init_irq = icoll_init_irq, + .init_irq = irqchip_init, .handle_irq = icoll_handle_irq, .init_time = imx23_timer_init, .init_machine = mxs_machine_init, @@ -479,7 +481,7 @@ MACHINE_END DT_MACHINE_START(IMX28, "Freescale i.MX28 (Device Tree)") .map_io = mx28_map_io, - .init_irq = icoll_init_irq, + .init_irq = irqchip_init, .handle_irq = icoll_handle_irq, .init_time = imx28_timer_init, .init_machine = mxs_machine_init, diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 98e3b87bdf1b..9d8f4f1c6e39 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_IRQCHIP) += irqchip.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o +obj-$(CONFIG_ARCH_MXS) += irq-mxs.o obj-$(CONFIG_METAG) += irq-metag-ext.o obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi.o diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c new file mode 100644 index 000000000000..29889bbdcc6d --- /dev/null +++ b/drivers/irqchip/irq-mxs.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "irqchip.h" + +#define HW_ICOLL_VECTOR 0x0000 +#define HW_ICOLL_LEVELACK 0x0010 +#define HW_ICOLL_CTRL 0x0020 +#define HW_ICOLL_STAT_OFFSET 0x0070 +#define HW_ICOLL_INTERRUPTn_SET(n) (0x0124 + (n) * 0x10) +#define HW_ICOLL_INTERRUPTn_CLR(n) (0x0128 + (n) * 0x10) +#define BM_ICOLL_INTERRUPTn_ENABLE 0x00000004 +#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 0x1 + +#define ICOLL_NUM_IRQS 128 + +static void __iomem *icoll_base; +static struct irq_domain *icoll_domain; + +static void icoll_ack_irq(struct irq_data *d) +{ + /* + * The Interrupt Collector is able to prioritize irqs. + * Currently only level 0 is used. So acking can use + * BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 unconditionally. + */ + __raw_writel(BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0, + icoll_base + HW_ICOLL_LEVELACK); +} + +static void icoll_mask_irq(struct irq_data *d) +{ + __raw_writel(BM_ICOLL_INTERRUPTn_ENABLE, + icoll_base + HW_ICOLL_INTERRUPTn_CLR(d->hwirq)); +} + +static void icoll_unmask_irq(struct irq_data *d) +{ + __raw_writel(BM_ICOLL_INTERRUPTn_ENABLE, + icoll_base + HW_ICOLL_INTERRUPTn_SET(d->hwirq)); +} + +static struct irq_chip mxs_icoll_chip = { + .irq_ack = icoll_ack_irq, + .irq_mask = icoll_mask_irq, + .irq_unmask = icoll_unmask_irq, +}; + +asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs) +{ + u32 irqnr; + + do { + irqnr = __raw_readl(icoll_base + HW_ICOLL_STAT_OFFSET); + if (irqnr != 0x7f) { + __raw_writel(irqnr, icoll_base + HW_ICOLL_VECTOR); + irqnr = irq_find_mapping(icoll_domain, irqnr); + handle_IRQ(irqnr, regs); + continue; + } + break; + } while (1); +} + +static int icoll_irq_domain_map(struct irq_domain *d, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_chip_and_handler(virq, &mxs_icoll_chip, handle_level_irq); + set_irq_flags(virq, IRQF_VALID); + + return 0; +} + +static struct irq_domain_ops icoll_irq_domain_ops = { + .map = icoll_irq_domain_map, + .xlate = irq_domain_xlate_onecell, +}; + +static void __init icoll_of_init(struct device_node *np, + struct device_node *interrupt_parent) +{ + icoll_base = of_iomap(np, 0); + WARN_ON(!icoll_base); + + /* + * Interrupt Collector reset, which initializes the priority + * for each irq to level 0. + */ + stmp_reset_block(icoll_base + HW_ICOLL_CTRL); + + icoll_domain = irq_domain_add_linear(np, ICOLL_NUM_IRQS, + &icoll_irq_domain_ops, NULL); + WARN_ON(!icoll_domain); +} +IRQCHIP_DECLARE(mxs, "fsl,icoll", icoll_of_init); diff --git a/include/linux/irqchip/mxs.h b/include/linux/irqchip/mxs.h new file mode 100644 index 000000000000..9039a538a919 --- /dev/null +++ b/include/linux/irqchip/mxs.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_IRQCHIP_MXS_H +#define __LINUX_IRQCHIP_MXS_H + +extern void icoll_handle_irq(struct pt_regs *); + +#endif -- cgit From 547f384f33dbd6171607f925ab246e25e315961e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 28 Mar 2013 16:11:14 +0000 Subject: regulator: ab8500: add support for ab8505 To obtain full AB8505 regulator support, the AB8500 regulator driver first needs to know its register layout and their initialisation values for each. That information is provided via a couple of large data structures which we provide here. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- arch/arm/mach-ux500/board-mop500-regulators.c | 511 ++++++++++++++++++++- arch/arm/mach-ux500/board-mop500-regulators.h | 1 + drivers/regulator/ab8500.c | 617 ++++++++++++++++++++++++++ include/linux/regulator/ab8500.h | 75 +++- 4 files changed, 1195 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500-regulators.c b/arch/arm/mach-ux500/board-mop500-regulators.c index c1173a161a04..816151903d46 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.c +++ b/arch/arm/mach-ux500/board-mop500-regulators.c @@ -5,6 +5,7 @@ * * Authors: Sundar Iyer * Bengt Jonsson + * Daniel Willerud * * MOP500 board specific initialization for regulators */ @@ -99,6 +100,27 @@ static struct regulator_consumer_supply ab8500_vaux3_consumers[] = { REGULATOR_SUPPLY("vmmc", "sdi0"), }; +static struct regulator_consumer_supply ab8505_vaux4_consumers[] = { +}; + +static struct regulator_consumer_supply ab8505_vaux5_consumers[] = { +}; + +static struct regulator_consumer_supply ab8505_vaux6_consumers[] = { +}; + +static struct regulator_consumer_supply ab8505_vaux8_consumers[] = { + /* AB8500 audio codec device */ + REGULATOR_SUPPLY("v-aux8", NULL), +}; + +static struct regulator_consumer_supply ab8505_vadc_consumers[] = { + /* Internal general-purpose ADC */ + REGULATOR_SUPPLY("vddadc", "ab8500-gpadc.0"), + /* ADC for charger */ + REGULATOR_SUPPLY("vddadc", "ab8500-charger.0"), +}; + static struct regulator_consumer_supply ab8500_vtvout_consumers[] = { /* TV-out DENC supply */ REGULATOR_SUPPLY("vtvout", "ab8500-denc.0"), @@ -133,6 +155,11 @@ static struct regulator_consumer_supply ab8500_vintcore_consumers[] = { REGULATOR_SUPPLY("vddulpivio18", "ab8500-usb.0"), }; +static struct regulator_consumer_supply ab8505_usb_consumers[] = { + /* HS USB OTG physical interface */ + REGULATOR_SUPPLY("v-ape", NULL), +}; + static struct regulator_consumer_supply ab8500_vana_consumers[] = { /* External displays, connector on board, 1v8 power supply */ REGULATOR_SUPPLY("vsmps2", "mcde.0"), @@ -469,6 +496,450 @@ static struct regulator_init_data ab8500_ext_regulators[] = { }, }; +/* ab8505 regulator register initialization */ +static struct ab8500_regulator_reg_init ab8505_reg_init[] = { + /* + * VarmRequestCtrl + * VsmpsCRequestCtrl + * VsmpsARequestCtrl + * VsmpsBRequestCtrl + */ + INIT_REGULATOR_REGISTER(AB8505_REGUREQUESTCTRL1, 0x00, 0x00), + /* + * VsafeRequestCtrl + * VpllRequestCtrl + * VanaRequestCtrl = HP/LP depending on VxRequest + */ + INIT_REGULATOR_REGISTER(AB8505_REGUREQUESTCTRL2, 0x30, 0x00), + /* + * Vaux1RequestCtrl = HP/LP depending on VxRequest + * Vaux2RequestCtrl = HP/LP depending on VxRequest + */ + INIT_REGULATOR_REGISTER(AB8505_REGUREQUESTCTRL3, 0xf0, 0x00), + /* + * Vaux3RequestCtrl = HP/LP depending on VxRequest + * SwHPReq = Control through SWValid disabled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUREQUESTCTRL4, 0x07, 0x00), + /* + * VsmpsASysClkReq1HPValid + * VsmpsBSysClkReq1HPValid + * VsafeSysClkReq1HPValid + * VanaSysClkReq1HPValid = disabled + * VpllSysClkReq1HPValid + * Vaux1SysClkReq1HPValid = disabled + * Vaux2SysClkReq1HPValid = disabled + * Vaux3SysClkReq1HPValid = disabled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUSYSCLKREQ1HPVALID1, 0xe8, 0x00), + /* + * VsmpsCSysClkReq1HPValid + * VarmSysClkReq1HPValid + * VbbSysClkReq1HPValid + * VsmpsMSysClkReq1HPValid + */ + INIT_REGULATOR_REGISTER(AB8505_REGUSYSCLKREQ1HPVALID2, 0x00, 0x00), + /* + * VsmpsAHwHPReq1Valid + * VsmpsBHwHPReq1Valid + * VsafeHwHPReq1Valid + * VanaHwHPReq1Valid = disabled + * VpllHwHPReq1Valid + * Vaux1HwHPreq1Valid = disabled + * Vaux2HwHPReq1Valid = disabled + * Vaux3HwHPReqValid = disabled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUHWHPREQ1VALID1, 0xe8, 0x00), + /* + * VsmpsMHwHPReq1Valid + */ + INIT_REGULATOR_REGISTER(AB8505_REGUHWHPREQ1VALID2, 0x00, 0x00), + /* + * VsmpsAHwHPReq2Valid + * VsmpsBHwHPReq2Valid + * VsafeHwHPReq2Valid + * VanaHwHPReq2Valid = disabled + * VpllHwHPReq2Valid + * Vaux1HwHPReq2Valid = disabled + * Vaux2HwHPReq2Valid = disabled + * Vaux3HwHPReq2Valid = disabled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUHWHPREQ2VALID1, 0xe8, 0x00), + /* + * VsmpsMHwHPReq2Valid + */ + INIT_REGULATOR_REGISTER(AB8505_REGUHWHPREQ2VALID2, 0x00, 0x00), + /** + * VsmpsCSwHPReqValid + * VarmSwHPReqValid + * VsmpsASwHPReqValid + * VsmpsBSwHPReqValid + * VsafeSwHPReqValid + * VanaSwHPReqValid + * VanaSwHPReqValid = disabled + * VpllSwHPReqValid + * Vaux1SwHPReqValid = disabled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUSWHPREQVALID1, 0xa0, 0x00), + /* + * Vaux2SwHPReqValid = disabled + * Vaux3SwHPReqValid = disabled + * VsmpsMSwHPReqValid + */ + INIT_REGULATOR_REGISTER(AB8505_REGUSWHPREQVALID2, 0x03, 0x00), + /* + * SysClkReq2Valid1 = SysClkReq2 controlled + * SysClkReq3Valid1 = disabled + * SysClkReq4Valid1 = SysClkReq4 controlled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUSYSCLKREQVALID1, 0x0e, 0x0a), + /* + * SysClkReq2Valid2 = disabled + * SysClkReq3Valid2 = disabled + * SysClkReq4Valid2 = disabled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUSYSCLKREQVALID2, 0x0e, 0x00), + /* + * Vaux4SwHPReqValid + * Vaux4HwHPReq2Valid + * Vaux4HwHPReq1Valid + * Vaux4SysClkReq1HPValid + */ + INIT_REGULATOR_REGISTER(AB8505_REGUVAUX4REQVALID, 0x00, 0x00), + /* + * VadcEna = disabled + * VintCore12Ena = disabled + * VintCore12Sel = 1.25 V + * VintCore12LP = inactive (HP) + * VadcLP = inactive (HP) + */ + INIT_REGULATOR_REGISTER(AB8505_REGUMISC1, 0xfe, 0x10), + /* + * VaudioEna = disabled + * Vaux8Ena = disabled + * Vamic1Ena = disabled + * Vamic2Ena = disabled + */ + INIT_REGULATOR_REGISTER(AB8505_VAUDIOSUPPLY, 0x1e, 0x00), + /* + * Vamic1_dzout = high-Z when Vamic1 is disabled + * Vamic2_dzout = high-Z when Vamic2 is disabled + */ + INIT_REGULATOR_REGISTER(AB8505_REGUCTRL1VAMIC, 0x03, 0x00), + /* + * VsmpsARegu + * VsmpsASelCtrl + * VsmpsAAutoMode + * VsmpsAPWMMode + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSAREGU, 0x00, 0x00), + /* + * VsmpsBRegu + * VsmpsBSelCtrl + * VsmpsBAutoMode + * VsmpsBPWMMode + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSBREGU, 0x00, 0x00), + /* + * VsafeRegu + * VsafeSelCtrl + * VsafeAutoMode + * VsafePWMMode + */ + INIT_REGULATOR_REGISTER(AB8505_VSAFEREGU, 0x00, 0x00), + /* + * VPll = Hw controlled (NOTE! PRCMU bits) + * VanaRegu = force off + */ + INIT_REGULATOR_REGISTER(AB8505_VPLLVANAREGU, 0x0f, 0x02), + /* + * VextSupply1Regu = force OFF (OTP_ExtSupply12LPnPolarity 1) + * VextSupply2Regu = force OFF (OTP_ExtSupply12LPnPolarity 1) + * VextSupply3Regu = force OFF (OTP_ExtSupply3LPnPolarity 0) + * ExtSupply2Bypass = ExtSupply12LPn ball is 0 when Ena is 0 + * ExtSupply3Bypass = ExtSupply3LPn ball is 0 when Ena is 0 + */ + INIT_REGULATOR_REGISTER(AB8505_EXTSUPPLYREGU, 0xff, 0x30), + /* + * Vaux1Regu = force HP + * Vaux2Regu = force off + */ + INIT_REGULATOR_REGISTER(AB8505_VAUX12REGU, 0x0f, 0x01), + /* + * Vaux3Regu = force off + */ + INIT_REGULATOR_REGISTER(AB8505_VRF1VAUX3REGU, 0x03, 0x00), + /* + * VsmpsASel1 + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSASEL1, 0x00, 0x00), + /* + * VsmpsASel2 + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSASEL2, 0x00, 0x00), + /* + * VsmpsASel3 + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSASEL3, 0x00, 0x00), + /* + * VsmpsBSel1 + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSBSEL1, 0x00, 0x00), + /* + * VsmpsBSel2 + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSBSEL2, 0x00, 0x00), + /* + * VsmpsBSel3 + */ + INIT_REGULATOR_REGISTER(AB8505_VSMPSBSEL3, 0x00, 0x00), + /* + * VsafeSel1 + */ + INIT_REGULATOR_REGISTER(AB8505_VSAFESEL1, 0x00, 0x00), + /* + * VsafeSel2 + */ + INIT_REGULATOR_REGISTER(AB8505_VSAFESEL2, 0x00, 0x00), + /* + * VsafeSel3 + */ + INIT_REGULATOR_REGISTER(AB8505_VSAFESEL3, 0x00, 0x00), + /* + * Vaux1Sel = 2.8 V + */ + INIT_REGULATOR_REGISTER(AB8505_VAUX1SEL, 0x0f, 0x0C), + /* + * Vaux2Sel = 2.9 V + */ + INIT_REGULATOR_REGISTER(AB8505_VAUX2SEL, 0x0f, 0x0d), + /* + * Vaux3Sel = 2.91 V + */ + INIT_REGULATOR_REGISTER(AB8505_VRF1VAUX3SEL, 0x07, 0x07), + /* + * Vaux4RequestCtrl + */ + INIT_REGULATOR_REGISTER(AB8505_VAUX4REQCTRL, 0x00, 0x00), + /* + * Vaux4Regu + */ + INIT_REGULATOR_REGISTER(AB8505_VAUX4REGU, 0x00, 0x00), + /* + * Vaux4Sel + */ + INIT_REGULATOR_REGISTER(AB8505_VAUX4SEL, 0x00, 0x00), + /* + * Vaux1Disch = short discharge time + * Vaux2Disch = short discharge time + * Vaux3Disch = short discharge time + * Vintcore12Disch = short discharge time + * VTVoutDisch = short discharge time + * VaudioDisch = short discharge time + */ + INIT_REGULATOR_REGISTER(AB8505_REGUCTRLDISCH, 0xfc, 0x00), + /* + * VanaDisch = short discharge time + * Vaux8PullDownEna = pulldown disabled when Vaux8 is disabled + * Vaux8Disch = short discharge time + */ + INIT_REGULATOR_REGISTER(AB8505_REGUCTRLDISCH2, 0x16, 0x00), + /* + * Vaux4Disch = short discharge time + */ + INIT_REGULATOR_REGISTER(AB8505_REGUCTRLDISCH3, 0x01, 0x00), + /* + * Vaux5Sel + * Vaux5LP + * Vaux5Ena + * Vaux5Disch + * Vaux5DisSfst + * Vaux5DisPulld + */ + INIT_REGULATOR_REGISTER(AB8505_CTRLVAUX5, 0x00, 0x00), + /* + * Vaux6Sel + * Vaux6LP + * Vaux6Ena + * Vaux6DisPulld + */ + INIT_REGULATOR_REGISTER(AB8505_CTRLVAUX6, 0x00, 0x00), +}; + +struct regulator_init_data ab8505_regulators[AB8505_NUM_REGULATORS] = { + /* supplies to the display/camera */ + [AB8505_LDO_AUX1] = { + .constraints = { + .name = "V-DISPLAY", + .min_uV = 2800000, + .max_uV = 3300000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE | + REGULATOR_CHANGE_STATUS, + .boot_on = 1, /* display is on at boot */ + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vaux1_consumers), + .consumer_supplies = ab8500_vaux1_consumers, + }, + /* supplies to the on-board eMMC */ + [AB8505_LDO_AUX2] = { + .constraints = { + .name = "V-eMMC1", + .min_uV = 1100000, + .max_uV = 3300000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE | + REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_MODE, + .valid_modes_mask = REGULATOR_MODE_NORMAL | + REGULATOR_MODE_IDLE, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vaux2_consumers), + .consumer_supplies = ab8500_vaux2_consumers, + }, + /* supply for VAUX3, supplies to SDcard slots */ + [AB8505_LDO_AUX3] = { + .constraints = { + .name = "V-MMC-SD", + .min_uV = 1100000, + .max_uV = 3300000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE | + REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_MODE, + .valid_modes_mask = REGULATOR_MODE_NORMAL | + REGULATOR_MODE_IDLE, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vaux3_consumers), + .consumer_supplies = ab8500_vaux3_consumers, + }, + /* supply for VAUX4, supplies to NFC and standalone secure element */ + [AB8505_LDO_AUX4] = { + .constraints = { + .name = "V-NFC-SE", + .min_uV = 1100000, + .max_uV = 3300000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE | + REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_MODE, + .valid_modes_mask = REGULATOR_MODE_NORMAL | + REGULATOR_MODE_IDLE, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8505_vaux4_consumers), + .consumer_supplies = ab8505_vaux4_consumers, + }, + /* supply for VAUX5, supplies to TBD */ + [AB8505_LDO_AUX5] = { + .constraints = { + .name = "V-AUX5", + .min_uV = 1050000, + .max_uV = 2790000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE | + REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_MODE, + .valid_modes_mask = REGULATOR_MODE_NORMAL | + REGULATOR_MODE_IDLE, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8505_vaux5_consumers), + .consumer_supplies = ab8505_vaux5_consumers, + }, + /* supply for VAUX6, supplies to TBD */ + [AB8505_LDO_AUX6] = { + .constraints = { + .name = "V-AUX6", + .min_uV = 1050000, + .max_uV = 2790000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE | + REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_MODE, + .valid_modes_mask = REGULATOR_MODE_NORMAL | + REGULATOR_MODE_IDLE, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8505_vaux6_consumers), + .consumer_supplies = ab8505_vaux6_consumers, + }, + /* supply for gpadc, ADC LDO */ + [AB8505_LDO_ADC] = { + .constraints = { + .name = "V-ADC", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8505_vadc_consumers), + .consumer_supplies = ab8505_vadc_consumers, + }, + /* supply for ab8500-vaudio, VAUDIO LDO */ + [AB8505_LDO_AUDIO] = { + .constraints = { + .name = "V-AUD", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vaud_consumers), + .consumer_supplies = ab8500_vaud_consumers, + }, + /* supply for v-anamic1 VAMic1-LDO */ + [AB8505_LDO_ANAMIC1] = { + .constraints = { + .name = "V-AMIC1", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vamic1_consumers), + .consumer_supplies = ab8500_vamic1_consumers, + }, + /* supply for v-amic2, VAMIC2 LDO, reuse constants for AMIC1 */ + [AB8505_LDO_ANAMIC2] = { + .constraints = { + .name = "V-AMIC2", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vamic2_consumers), + .consumer_supplies = ab8500_vamic2_consumers, + }, + /* supply for v-aux8, VAUX8 LDO */ + [AB8505_LDO_AUX8] = { + .constraints = { + .name = "V-AUX8", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8505_vaux8_consumers), + .consumer_supplies = ab8505_vaux8_consumers, + }, + /* supply for v-intcore12, VINTCORE12 LDO */ + [AB8505_LDO_INTCORE] = { + .constraints = { + .name = "V-INTCORE", + .min_uV = 1250000, + .max_uV = 1350000, + .input_uV = 1800000, + .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE | + REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_MODE | + REGULATOR_CHANGE_DRMS, + .valid_modes_mask = REGULATOR_MODE_NORMAL | + REGULATOR_MODE_IDLE, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vintcore_consumers), + .consumer_supplies = ab8500_vintcore_consumers, + }, + /* supply for LDO USB */ + [AB8505_LDO_USB] = { + .constraints = { + .name = "V-USB", + .valid_ops_mask = REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_MODE, + .valid_modes_mask = REGULATOR_MODE_NORMAL | + REGULATOR_MODE_IDLE, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8505_usb_consumers), + .consumer_supplies = ab8505_usb_consumers, + }, + /* supply for U8500 CSI-DSI, VANA LDO */ + [AB8505_LDO_ANA] = { + .constraints = { + .name = "V-CSI-DSI", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(ab8500_vana_consumers), + .consumer_supplies = ab8500_vana_consumers, + }, +}; + struct ab8500_regulator_platform_data ab8500_regulator_plat_data = { .reg_init = ab8500_reg_init, .num_reg_init = ARRAY_SIZE(ab8500_reg_init), @@ -478,18 +949,39 @@ struct ab8500_regulator_platform_data ab8500_regulator_plat_data = { .num_ext_regulator = ARRAY_SIZE(ab8500_ext_regulators), }; +/* Use the AB8500 init settings for AB8505 as they are the same right now */ +struct ab8500_regulator_platform_data ab8505_regulator_plat_data = { + .reg_init = ab8505_reg_init, + .num_reg_init = ARRAY_SIZE(ab8505_reg_init), + .regulator = ab8505_regulators, + .num_regulator = ARRAY_SIZE(ab8505_regulators), +}; + static void ab8500_modify_reg_init(int id, u8 mask, u8 value) { int i; - for (i = ARRAY_SIZE(ab8500_reg_init) - 1; i >= 0; i--) { - if (ab8500_reg_init[i].id == id) { - u8 initval = ab8500_reg_init[i].value; - initval = (initval & ~mask) | (value & mask); - ab8500_reg_init[i].value = initval; + if (cpu_is_u8520()) { + for (i = ARRAY_SIZE(ab8505_reg_init) - 1; i >= 0; i--) { + if (ab8505_reg_init[i].id == id) { + u8 initval = ab8505_reg_init[i].value; + initval = (initval & ~mask) | (value & mask); + ab8505_reg_init[i].value = initval; - BUG_ON(mask & ~ab8500_reg_init[i].mask); - return; + BUG_ON(mask & ~ab8505_reg_init[i].mask); + return; + } + } + } else { + for (i = ARRAY_SIZE(ab8500_reg_init) - 1; i >= 0; i--) { + if (ab8500_reg_init[i].id == id) { + u8 initval = ab8500_reg_init[i].value; + initval = (initval & ~mask) | (value & mask); + ab8500_reg_init[i].value = initval; + + BUG_ON(mask & ~ab8500_reg_init[i].mask); + return; + } } } @@ -511,6 +1003,11 @@ void mop500_regulator_init(void) regulator->constraints.state_standby.disabled = 1; } + if (cpu_is_u8520()) { + /* Vaux2 initialized to be on */ + ab8500_modify_reg_init(AB8505_VAUX12REGU, 0x0f, 0x05); + } + /* * Handle AB8500_EXT_SUPPLY2 on HREFP_V20_V50 boards (do it for * all HREFP_V20 boards) diff --git a/arch/arm/mach-ux500/board-mop500-regulators.h b/arch/arm/mach-ux500/board-mop500-regulators.h index 3d4c412d0b7a..9bece38fe933 100644 --- a/arch/arm/mach-ux500/board-mop500-regulators.h +++ b/arch/arm/mach-ux500/board-mop500-regulators.h @@ -15,6 +15,7 @@ #include extern struct ab8500_regulator_platform_data ab8500_regulator_plat_data; +extern struct ab8500_regulator_platform_data ab8505_regulator_plat_data; extern struct regulator_init_data tps61052_regulator; extern struct regulator_init_data gpio_en_3v3_regulator; diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 9de3a211b0b4..1ab0f8a7c862 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -5,11 +5,15 @@ * * Authors: Sundar Iyer for ST-Ericsson * Bengt Jonsson for ST-Ericsson + * Daniel Willerud for ST-Ericsson * * AB8500 peripheral regulators * * AB8500 supports the following regulators: * VAUX1/2/3, VINTCORE, VTVOUT, VUSB, VAUDIO, VAMIC1/2, VDMIC, VANA + * + * AB8505 supports the following regulators: + * VAUX1/2/3/4/5/6, VINTCORE, VADC, VUSB, VAUDIO, VAMIC1/2, VDMIC, VANA */ #include #include @@ -92,6 +96,17 @@ static const unsigned int ldo_vaux3_voltages[] = { 2910000, }; +static const int ldo_vaux56_voltages[] = { + 1800000, + 1050000, + 1100000, + 1200000, + 1500000, + 2200000, + 2500000, + 2790000, +}; + static const unsigned int ldo_vintcore_voltages[] = { 1200000, 1225000, @@ -589,6 +604,313 @@ static struct ab8500_regulator_info }, }; +/* AB8505 regulator information */ +static struct ab8500_regulator_info + ab8505_regulator_info[AB8505_NUM_REGULATORS] = { + /* + * Variable Voltage Regulators + * name, min mV, max mV, + * update bank, reg, mask, enable val + * volt bank, reg, mask, table, table length + */ + [AB8505_LDO_AUX1] = { + .desc = { + .name = "LDO-AUX1", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUX1, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vauxn_voltages), + }, + .min_uV = 1100000, + .max_uV = 3300000, + .load_lp_uA = 5000, + .update_bank = 0x04, + .update_reg = 0x09, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + .voltage_bank = 0x04, + .voltage_reg = 0x1f, + .voltage_mask = 0x0f, + .voltages = ldo_vauxn_voltages, + .voltages_len = ARRAY_SIZE(ldo_vauxn_voltages), + }, + [AB8505_LDO_AUX2] = { + .desc = { + .name = "LDO-AUX2", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUX2, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vauxn_voltages), + }, + .min_uV = 1100000, + .max_uV = 3300000, + .load_lp_uA = 5000, + .update_bank = 0x04, + .update_reg = 0x09, + .update_mask = 0x0c, + .update_val = 0x04, + .update_val_idle = 0x0c, + .update_val_normal = 0x04, + .voltage_bank = 0x04, + .voltage_reg = 0x20, + .voltage_mask = 0x0f, + .voltages = ldo_vauxn_voltages, + .voltages_len = ARRAY_SIZE(ldo_vauxn_voltages), + }, + [AB8505_LDO_AUX3] = { + .desc = { + .name = "LDO-AUX3", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUX3, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vaux3_voltages), + }, + .min_uV = 1100000, + .max_uV = 3300000, + .load_lp_uA = 5000, + .update_bank = 0x04, + .update_reg = 0x0a, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + .voltage_bank = 0x04, + .voltage_reg = 0x21, + .voltage_mask = 0x07, + .voltages = ldo_vaux3_voltages, + .voltages_len = ARRAY_SIZE(ldo_vaux3_voltages), + }, + [AB8505_LDO_AUX4] = { + .desc = { + .name = "LDO-AUX4", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB9540_LDO_AUX4, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vauxn_voltages), + }, + .min_uV = 1100000, + .max_uV = 3300000, + .load_lp_uA = 5000, + /* values for Vaux4Regu register */ + .update_bank = 0x04, + .update_reg = 0x2e, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + /* values for Vaux4SEL register */ + .voltage_bank = 0x04, + .voltage_reg = 0x2f, + .voltage_mask = 0x0f, + .voltages = ldo_vauxn_voltages, + .voltages_len = ARRAY_SIZE(ldo_vauxn_voltages), + }, + [AB8505_LDO_AUX5] = { + .desc = { + .name = "LDO-AUX5", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8505_LDO_AUX5, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vaux56_voltages), + }, + .min_uV = 1050000, + .max_uV = 2790000, + .load_lp_uA = 2000, + /* values for CtrlVaux5 register */ + .update_bank = 0x01, + .update_reg = 0x55, + .update_mask = 0x08, + .update_val = 0x00, + .update_val_idle = 0x01, + .update_val_normal = 0x00, + .voltage_bank = 0x01, + .voltage_reg = 0x55, + .voltage_mask = 0x07, + .voltages = ldo_vaux56_voltages, + .voltages_len = ARRAY_SIZE(ldo_vaux56_voltages), + }, + [AB8505_LDO_AUX6] = { + .desc = { + .name = "LDO-AUX6", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8505_LDO_AUX6, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vaux56_voltages), + }, + .min_uV = 1050000, + .max_uV = 2790000, + .load_lp_uA = 2000, + /* values for CtrlVaux6 register */ + .update_bank = 0x01, + .update_reg = 0x56, + .update_mask = 0x08, + .update_val = 0x00, + .update_val_idle = 0x01, + .update_val_normal = 0x00, + .voltage_bank = 0x01, + .voltage_reg = 0x56, + .voltage_mask = 0x07, + .voltages = ldo_vaux56_voltages, + .voltages_len = ARRAY_SIZE(ldo_vaux56_voltages), + }, + [AB8505_LDO_INTCORE] = { + .desc = { + .name = "LDO-INTCORE", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_INTCORE, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vintcore_voltages), + }, + .min_uV = 1100000, + .max_uV = 3300000, + .load_lp_uA = 5000, + .update_bank = 0x03, + .update_reg = 0x80, + .update_mask = 0x44, + .update_val = 0x04, + .update_val_idle = 0x44, + .update_val_normal = 0x04, + .voltage_bank = 0x03, + .voltage_reg = 0x80, + .voltage_mask = 0x38, + .voltages = ldo_vintcore_voltages, + .voltages_len = ARRAY_SIZE(ldo_vintcore_voltages), + .voltage_shift = 3, + }, + + /* + * Fixed Voltage Regulators + * name, fixed mV, + * update bank, reg, mask, enable val + */ + [AB8505_LDO_ADC] = { + .desc = { + .name = "LDO-ADC", + .ops = &ab8500_regulator_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8505_LDO_ADC, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .delay = 10000, + .fixed_uV = 2000000, + .load_lp_uA = 1000, + .update_bank = 0x03, + .update_reg = 0x80, + .update_mask = 0x82, + .update_val = 0x02, + .update_val_idle = 0x82, + .update_val_normal = 0x02, + }, + [AB8505_LDO_USB] = { + .desc = { + .name = "LDO-USB", + .ops = &ab8500_regulator_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB9540_LDO_USB, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .fixed_uV = 3300000, + .update_bank = 0x03, + .update_reg = 0x82, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + }, + [AB8505_LDO_AUDIO] = { + .desc = { + .name = "LDO-AUDIO", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUDIO, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .fixed_uV = 2000000, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x02, + .update_val = 0x02, + }, + [AB8505_LDO_ANAMIC1] = { + .desc = { + .name = "LDO-ANAMIC1", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_ANAMIC1, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .fixed_uV = 2050000, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x08, + .update_val = 0x08, + }, + [AB8505_LDO_ANAMIC2] = { + .desc = { + .name = "LDO-ANAMIC2", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_ANAMIC2, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .fixed_uV = 2050000, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x10, + .update_val = 0x10, + }, + [AB8505_LDO_AUX8] = { + .desc = { + .name = "LDO-AUX8", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8505_LDO_AUX8, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .fixed_uV = 1800000, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x04, + .update_val = 0x04, + }, + /* + * Regulators with fixed voltage and normal/idle modes + */ + [AB8505_LDO_ANA] = { + .desc = { + .name = "LDO-ANA", + .ops = &ab8500_regulator_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_ANA, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .fixed_uV = 1200000, + .load_lp_uA = 1000, + .update_bank = 0x04, + .update_reg = 0x06, + .update_mask = 0x0c, + .update_val = 0x04, + .update_val_idle = 0x0c, + .update_val_normal = 0x04, + }, +}; + /* AB9540 regulator information */ static struct ab8500_regulator_info ab9540_regulator_info[AB9540_NUM_REGULATORS] = { @@ -1031,6 +1353,276 @@ static struct ab8500_reg_init ab8500_reg_init[] = { REG_INIT(AB8500_REGUCTRLDISCH2, 0x04, 0x44, 0x16), }; +/* AB8505 register init */ +static struct ab8500_reg_init ab8505_reg_init[] = { + /* + * 0x03, VarmRequestCtrl + * 0x0c, VsmpsCRequestCtrl + * 0x30, VsmpsARequestCtrl + * 0xc0, VsmpsBRequestCtrl + */ + REG_INIT(AB8505_REGUREQUESTCTRL1, 0x03, 0x03, 0xff), + /* + * 0x03, VsafeRequestCtrl + * 0x0c, VpllRequestCtrl + * 0x30, VanaRequestCtrl + */ + REG_INIT(AB8505_REGUREQUESTCTRL2, 0x03, 0x04, 0x3f), + /* + * 0x30, Vaux1RequestCtrl + * 0xc0, Vaux2RequestCtrl + */ + REG_INIT(AB8505_REGUREQUESTCTRL3, 0x03, 0x05, 0xf0), + /* + * 0x03, Vaux3RequestCtrl + * 0x04, SwHPReq + */ + REG_INIT(AB8505_REGUREQUESTCTRL4, 0x03, 0x06, 0x07), + /* + * 0x01, VsmpsASysClkReq1HPValid + * 0x02, VsmpsBSysClkReq1HPValid + * 0x04, VsafeSysClkReq1HPValid + * 0x08, VanaSysClkReq1HPValid + * 0x10, VpllSysClkReq1HPValid + * 0x20, Vaux1SysClkReq1HPValid + * 0x40, Vaux2SysClkReq1HPValid + * 0x80, Vaux3SysClkReq1HPValid + */ + REG_INIT(AB8505_REGUSYSCLKREQ1HPVALID1, 0x03, 0x07, 0xff), + /* + * 0x01, VsmpsCSysClkReq1HPValid + * 0x02, VarmSysClkReq1HPValid + * 0x04, VbbSysClkReq1HPValid + * 0x08, VsmpsMSysClkReq1HPValid + */ + REG_INIT(AB8505_REGUSYSCLKREQ1HPVALID2, 0x03, 0x08, 0x0f), + /* + * 0x01, VsmpsAHwHPReq1Valid + * 0x02, VsmpsBHwHPReq1Valid + * 0x04, VsafeHwHPReq1Valid + * 0x08, VanaHwHPReq1Valid + * 0x10, VpllHwHPReq1Valid + * 0x20, Vaux1HwHPReq1Valid + * 0x40, Vaux2HwHPReq1Valid + * 0x80, Vaux3HwHPReq1Valid + */ + REG_INIT(AB8505_REGUHWHPREQ1VALID1, 0x03, 0x09, 0xff), + /* + * 0x08, VsmpsMHwHPReq1Valid + */ + REG_INIT(AB8505_REGUHWHPREQ1VALID2, 0x03, 0x0a, 0x08), + /* + * 0x01, VsmpsAHwHPReq2Valid + * 0x02, VsmpsBHwHPReq2Valid + * 0x04, VsafeHwHPReq2Valid + * 0x08, VanaHwHPReq2Valid + * 0x10, VpllHwHPReq2Valid + * 0x20, Vaux1HwHPReq2Valid + * 0x40, Vaux2HwHPReq2Valid + * 0x80, Vaux3HwHPReq2Valid + */ + REG_INIT(AB8505_REGUHWHPREQ2VALID1, 0x03, 0x0b, 0xff), + /* + * 0x08, VsmpsMHwHPReq2Valid + */ + REG_INIT(AB8505_REGUHWHPREQ2VALID2, 0x03, 0x0c, 0x08), + /* + * 0x01, VsmpsCSwHPReqValid + * 0x02, VarmSwHPReqValid + * 0x04, VsmpsASwHPReqValid + * 0x08, VsmpsBSwHPReqValid + * 0x10, VsafeSwHPReqValid + * 0x20, VanaSwHPReqValid + * 0x40, VpllSwHPReqValid + * 0x80, Vaux1SwHPReqValid + */ + REG_INIT(AB8505_REGUSWHPREQVALID1, 0x03, 0x0d, 0xff), + /* + * 0x01, Vaux2SwHPReqValid + * 0x02, Vaux3SwHPReqValid + * 0x20, VsmpsMSwHPReqValid + */ + REG_INIT(AB8505_REGUSWHPREQVALID2, 0x03, 0x0e, 0x23), + /* + * 0x02, SysClkReq2Valid1 + * 0x04, SysClkReq3Valid1 + * 0x08, SysClkReq4Valid1 + */ + REG_INIT(AB8505_REGUSYSCLKREQVALID1, 0x03, 0x0f, 0x0e), + /* + * 0x02, SysClkReq2Valid2 + * 0x04, SysClkReq3Valid2 + * 0x08, SysClkReq4Valid2 + */ + REG_INIT(AB8505_REGUSYSCLKREQVALID2, 0x03, 0x10, 0x0e), + /* + * 0x01, Vaux4SwHPReqValid + * 0x02, Vaux4HwHPReq2Valid + * 0x04, Vaux4HwHPReq1Valid + * 0x08, Vaux4SysClkReq1HPValid + */ + REG_INIT(AB8505_REGUVAUX4REQVALID, 0x03, 0x11, 0x0f), + /* + * 0x02, VadcEna + * 0x04, VintCore12Ena + * 0x38, VintCore12Sel + * 0x40, VintCore12LP + * 0x80, VadcLP + */ + REG_INIT(AB8505_REGUMISC1, 0x03, 0x80, 0xfe), + /* + * 0x02, VaudioEna + * 0x04, VdmicEna + * 0x08, Vamic1Ena + * 0x10, Vamic2Ena + */ + REG_INIT(AB8505_VAUDIOSUPPLY, 0x03, 0x83, 0x1e), + /* + * 0x01, Vamic1_dzout + * 0x02, Vamic2_dzout + */ + REG_INIT(AB8505_REGUCTRL1VAMIC, 0x03, 0x84, 0x03), + /* + * 0x03, VsmpsARegu + * 0x0c, VsmpsASelCtrl + * 0x10, VsmpsAAutoMode + * 0x20, VsmpsAPWMMode + */ + REG_INIT(AB8505_VSMPSAREGU, 0x04, 0x03, 0x3f), + /* + * 0x03, VsmpsBRegu + * 0x0c, VsmpsBSelCtrl + * 0x10, VsmpsBAutoMode + * 0x20, VsmpsBPWMMode + */ + REG_INIT(AB8505_VSMPSBREGU, 0x04, 0x04, 0x3f), + /* + * 0x03, VsafeRegu + * 0x0c, VsafeSelCtrl + * 0x10, VsafeAutoMode + * 0x20, VsafePWMMode + */ + REG_INIT(AB8505_VSAFEREGU, 0x04, 0x05, 0x3f), + /* + * 0x03, VpllRegu (NOTE! PRCMU register bits) + * 0x0c, VanaRegu + */ + REG_INIT(AB8505_VPLLVANAREGU, 0x04, 0x06, 0x0f), + /* + * 0x03, VextSupply1Regu + * 0x0c, VextSupply2Regu + * 0x30, VextSupply3Regu + * 0x40, ExtSupply2Bypass + * 0x80, ExtSupply3Bypass + */ + REG_INIT(AB8505_EXTSUPPLYREGU, 0x04, 0x08, 0xff), + /* + * 0x03, Vaux1Regu + * 0x0c, Vaux2Regu + */ + REG_INIT(AB8505_VAUX12REGU, 0x04, 0x09, 0x0f), + /* + * 0x0f, Vaux3Regu + */ + REG_INIT(AB8505_VRF1VAUX3REGU, 0x04, 0x0a, 0x0f), + /* + * 0x3f, VsmpsASel1 + */ + REG_INIT(AB8505_VSMPSASEL1, 0x04, 0x13, 0x3f), + /* + * 0x3f, VsmpsASel2 + */ + REG_INIT(AB8505_VSMPSASEL2, 0x04, 0x14, 0x3f), + /* + * 0x3f, VsmpsASel3 + */ + REG_INIT(AB8505_VSMPSASEL3, 0x04, 0x15, 0x3f), + /* + * 0x3f, VsmpsBSel1 + */ + REG_INIT(AB8505_VSMPSBSEL1, 0x04, 0x17, 0x3f), + /* + * 0x3f, VsmpsBSel2 + */ + REG_INIT(AB8505_VSMPSBSEL2, 0x04, 0x18, 0x3f), + /* + * 0x3f, VsmpsBSel3 + */ + REG_INIT(AB8505_VSMPSBSEL3, 0x04, 0x19, 0x3f), + /* + * 0x7f, VsafeSel1 + */ + REG_INIT(AB8505_VSAFESEL1, 0x04, 0x1b, 0x7f), + /* + * 0x3f, VsafeSel2 + */ + REG_INIT(AB8505_VSAFESEL2, 0x04, 0x1c, 0x7f), + /* + * 0x3f, VsafeSel3 + */ + REG_INIT(AB8505_VSAFESEL3, 0x04, 0x1d, 0x7f), + /* + * 0x0f, Vaux1Sel + */ + REG_INIT(AB8505_VAUX1SEL, 0x04, 0x1f, 0x0f), + /* + * 0x0f, Vaux2Sel + */ + REG_INIT(AB8505_VAUX2SEL, 0x04, 0x20, 0x0f), + /* + * 0x07, Vaux3Sel + * 0x30, VRF1Sel + */ + REG_INIT(AB8505_VRF1VAUX3SEL, 0x04, 0x21, 0x37), + /* + * 0x03, Vaux4RequestCtrl + */ + REG_INIT(AB8505_VAUX4REQCTRL, 0x04, 0x2d, 0x03), + /* + * 0x03, Vaux4Regu + */ + REG_INIT(AB8505_VAUX4REGU, 0x04, 0x2e, 0x03), + /* + * 0x0f, Vaux4Sel + */ + REG_INIT(AB8505_VAUX4SEL, 0x04, 0x2f, 0x0f), + /* + * 0x04, Vaux1Disch + * 0x08, Vaux2Disch + * 0x10, Vaux3Disch + * 0x20, Vintcore12Disch + * 0x40, VTVoutDisch + * 0x80, VaudioDisch + */ + REG_INIT(AB8505_REGUCTRLDISCH, 0x04, 0x43, 0xfc), + /* + * 0x02, VanaDisch + * 0x04, VdmicPullDownEna + * 0x10, VdmicDisch + */ + REG_INIT(AB8505_REGUCTRLDISCH2, 0x04, 0x44, 0x16), + /* + * 0x01, Vaux4Disch + */ + REG_INIT(AB8505_REGUCTRLDISCH3, 0x04, 0x48, 0x01), + /* + * 0x07, Vaux5Sel + * 0x08, Vaux5LP + * 0x10, Vaux5Ena + * 0x20, Vaux5Disch + * 0x40, Vaux5DisSfst + * 0x80, Vaux5DisPulld + */ + REG_INIT(AB8505_CTRLVAUX5, 0x01, 0x55, 0xff), + /* + * 0x07, Vaux6Sel + * 0x08, Vaux6LP + * 0x10, Vaux6Ena + * 0x80, Vaux6DisPulld + */ + REG_INIT(AB8505_CTRLVAUX6, 0x01, 0x56, 0x9f), +}; + /* AB9540 register init */ static struct ab8500_reg_init ab9540_reg_init[] = { /* @@ -1396,6 +1988,22 @@ static struct of_regulator_match ab8500_regulator_match[] = { { .name = "ab8500_ldo_ana", .driver_data = (void *) AB8500_LDO_ANA, }, }; +static struct of_regulator_match ab8505_regulator_match[] = { + { .name = "ab8500_ldo_aux1", .driver_data = (void *) AB8505_LDO_AUX1, }, + { .name = "ab8500_ldo_aux2", .driver_data = (void *) AB8505_LDO_AUX2, }, + { .name = "ab8500_ldo_aux3", .driver_data = (void *) AB8505_LDO_AUX3, }, + { .name = "ab8500_ldo_aux4", .driver_data = (void *) AB8505_LDO_AUX4, }, + { .name = "ab8500_ldo_aux5", .driver_data = (void *) AB8505_LDO_AUX5, }, + { .name = "ab8500_ldo_aux6", .driver_data = (void *) AB8505_LDO_AUX6, }, + { .name = "ab8500_ldo_intcore", .driver_data = (void *) AB8505_LDO_INTCORE, }, + { .name = "ab8500_ldo_adc", .driver_data = (void *) AB8505_LDO_ADC, }, + { .name = "ab8500_ldo_audio", .driver_data = (void *) AB8505_LDO_AUDIO, }, + { .name = "ab8500_ldo_anamic1", .driver_data = (void *) AB8505_LDO_ANAMIC1, }, + { .name = "ab8500_ldo_amamic2", .driver_data = (void *) AB8505_LDO_ANAMIC2, }, + { .name = "ab8500_ldo_aux8", .driver_data = (void *) AB8505_LDO_AUX8, }, + { .name = "ab8500_ldo_ana", .driver_data = (void *) AB8505_LDO_ANA, }, +}; + static struct of_regulator_match ab9540_regulator_match[] = { { .name = "ab8500_ldo_aux1", .driver_data = (void *) AB9540_LDO_AUX1, }, { .name = "ab8500_ldo_aux2", .driver_data = (void *) AB9540_LDO_AUX2, }, @@ -1450,6 +2058,11 @@ static int ab8500_regulator_probe(struct platform_device *pdev) reg_init_size = AB9540_NUM_REGULATOR_REGISTERS; match = ab9540_regulator_match; match_size = ARRAY_SIZE(ab9540_regulator_match) + } else if (is_ab8505(ab8500)) { + regulator_info = ab8505_regulator_info; + regulator_info_size = ARRAY_SIZE(ab8505_regulator_info); + reg_init = ab8505_reg_init; + reg_init_size = AB8505_NUM_REGULATOR_REGISTERS; } else { regulator_info = ab8500_regulator_info; regulator_info_size = ARRAY_SIZE(ab8500_regulator_info); @@ -1543,6 +2156,9 @@ static int ab8500_regulator_remove(struct platform_device *pdev) if (is_ab9540(ab8500)) { regulator_info = ab9540_regulator_info; regulator_info_size = ARRAY_SIZE(ab9540_regulator_info); + } else if (is_ab8505(ab8500)) { + regulator_info = ab8505_regulator_info; + regulator_info_size = ARRAY_SIZE(ab8505_regulator_info); } else { regulator_info = ab8500_regulator_info; regulator_info_size = ARRAY_SIZE(ab8500_regulator_info); @@ -1601,5 +2217,6 @@ module_exit(ab8500_regulator_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Sundar Iyer "); MODULE_AUTHOR("Bengt Jonsson "); +MODULE_AUTHOR("Daniel Willerud "); MODULE_DESCRIPTION("Regulator Driver for ST-Ericsson AB8500 Mixed-Sig PMIC"); MODULE_ALIAS("platform:ab8500-regulator"); diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 592a3f3994c0..9a7cf97e5040 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -5,6 +5,7 @@ * * Authors: Sundar Iyer for ST-Ericsson * Bengt Jonsson for ST-Ericsson + * Daniel Willerud for ST-Ericsson */ #ifndef __LINUX_MFD_AB8500_REGULATOR_H @@ -27,7 +28,28 @@ enum ab8500_regulator_id { AB8500_NUM_REGULATORS, }; -/* AB9450 regulators */ +/* AB8505 regulators */ +enum ab8505_regulator_id { + AB8505_LDO_AUX1, + AB8505_LDO_AUX2, + AB8505_LDO_AUX3, + AB8505_LDO_AUX4, + AB8505_LDO_AUX5, + AB8505_LDO_AUX6, + AB8505_LDO_INTCORE, + AB8505_LDO_ADC, + AB8505_LDO_USB, + AB8505_LDO_AUDIO, + AB8505_LDO_ANAMIC1, + AB8505_LDO_ANAMIC2, + AB8505_LDO_AUX8, + AB8505_LDO_ANA, + AB8505_SYSCLKREQ_2, + AB8505_SYSCLKREQ_4, + AB8505_NUM_REGULATORS, +}; + +/* AB9540 regulators */ enum ab9540_regulator_id { AB9540_LDO_AUX1, AB9540_LDO_AUX2, @@ -46,7 +68,7 @@ enum ab9540_regulator_id { AB9540_NUM_REGULATORS, }; -/* AB8500 and AB9540 register initialization */ +/* AB8500, AB8505, and AB9540 register initialization */ struct ab8500_regulator_reg_init { int id; u8 mask; @@ -92,6 +114,55 @@ enum ab8500_regulator_reg { AB8500_NUM_REGULATOR_REGISTERS, }; +/* AB8505 registers */ +enum ab8505_regulator_reg { + AB8505_REGUREQUESTCTRL1, + AB8505_REGUREQUESTCTRL2, + AB8505_REGUREQUESTCTRL3, + AB8505_REGUREQUESTCTRL4, + AB8505_REGUSYSCLKREQ1HPVALID1, + AB8505_REGUSYSCLKREQ1HPVALID2, + AB8505_REGUHWHPREQ1VALID1, + AB8505_REGUHWHPREQ1VALID2, + AB8505_REGUHWHPREQ2VALID1, + AB8505_REGUHWHPREQ2VALID2, + AB8505_REGUSWHPREQVALID1, + AB8505_REGUSWHPREQVALID2, + AB8505_REGUSYSCLKREQVALID1, + AB8505_REGUSYSCLKREQVALID2, + AB8505_REGUVAUX4REQVALID, + AB8505_REGUMISC1, + AB8505_VAUDIOSUPPLY, + AB8505_REGUCTRL1VAMIC, + AB8505_VSMPSAREGU, + AB8505_VSMPSBREGU, + AB8505_VSAFEREGU, /* NOTE! PRCMU register */ + AB8505_VPLLVANAREGU, + AB8505_EXTSUPPLYREGU, + AB8505_VAUX12REGU, + AB8505_VRF1VAUX3REGU, + AB8505_VSMPSASEL1, + AB8505_VSMPSASEL2, + AB8505_VSMPSASEL3, + AB8505_VSMPSBSEL1, + AB8505_VSMPSBSEL2, + AB8505_VSMPSBSEL3, + AB8505_VSAFESEL1, /* NOTE! PRCMU register */ + AB8505_VSAFESEL2, /* NOTE! PRCMU register */ + AB8505_VSAFESEL3, /* NOTE! PRCMU register */ + AB8505_VAUX1SEL, + AB8505_VAUX2SEL, + AB8505_VRF1VAUX3SEL, + AB8505_VAUX4REQCTRL, + AB8505_VAUX4REGU, + AB8505_VAUX4SEL, + AB8505_REGUCTRLDISCH, + AB8505_REGUCTRLDISCH2, + AB8505_REGUCTRLDISCH3, + AB8505_CTRLVAUX5, + AB8505_CTRLVAUX6, + AB8505_NUM_REGULATOR_REGISTERS, +}; /* AB9540 registers */ enum ab9540_regulator_reg { -- cgit From ae0a9a3efce22e77b5f0f2b266646431f492f7ed Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 28 Mar 2013 16:11:16 +0000 Subject: regulator: ab8500: Add support for the ab8540 To obtain full AB8540 regulator support, the AB8500 regulator driver first needs to know its register layout and their initialisation values for each. That information is provided via a couple of large data structures which we provide here. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- drivers/regulator/ab8500.c | 686 ++++++++++++++++++++++++++++++++++++++- include/linux/regulator/ab8500.h | 86 +++++ 2 files changed, 764 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 1ab0f8a7c862..ec609ab747ae 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -107,6 +107,18 @@ static const int ldo_vaux56_voltages[] = { 2790000, }; +static const int ldo_vaux3_ab8540_voltages[] = { + 1200000, + 1500000, + 1800000, + 2100000, + 2500000, + 2750000, + 2790000, + 2910000, + 3050000, +}; + static const unsigned int ldo_vintcore_voltages[] = { 1200000, 1225000, @@ -117,6 +129,17 @@ static const unsigned int ldo_vintcore_voltages[] = { 1350000, }; +static const int ldo_sdio_voltages[] = { + 1160000, + 1050000, + 1100000, + 1500000, + 1800000, + 2200000, + 2910000, + 3050000, +}; + static int ab8500_regulator_enable(struct regulator_dev *rdev) { int ret; @@ -726,10 +749,10 @@ static struct ab8500_regulator_info /* values for CtrlVaux5 register */ .update_bank = 0x01, .update_reg = 0x55, - .update_mask = 0x08, - .update_val = 0x00, - .update_val_idle = 0x01, - .update_val_normal = 0x00, + .update_mask = 0x18, + .update_val = 0x10, + .update_val_idle = 0x18, + .update_val_normal = 0x10, .voltage_bank = 0x01, .voltage_reg = 0x55, .voltage_mask = 0x07, @@ -751,10 +774,10 @@ static struct ab8500_regulator_info /* values for CtrlVaux6 register */ .update_bank = 0x01, .update_reg = 0x56, - .update_mask = 0x08, - .update_val = 0x00, - .update_val_idle = 0x01, - .update_val_normal = 0x00, + .update_mask = 0x18, + .update_val = 0x10, + .update_val_idle = 0x18, + .update_val_normal = 0x10, .voltage_bank = 0x01, .voltage_reg = 0x56, .voltage_mask = 0x07, @@ -1169,6 +1192,255 @@ static struct ab8500_regulator_info }, }; +/* AB8540 regulator information */ +static struct ab8500_regulator_info + ab8540_regulator_info[AB8540_NUM_REGULATORS] = { + /* + * Variable Voltage Regulators + * name, min mV, max mV, + * update bank, reg, mask, enable val + * volt bank, reg, mask, table, table length + */ + [AB8540_LDO_AUX1] = { + .desc = { + .name = "LDO-AUX1", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUX1, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vauxn_voltages), + }, + .load_lp_uA = 5000, + .update_bank = 0x04, + .update_reg = 0x09, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + .voltage_bank = 0x04, + .voltage_reg = 0x1f, + .voltage_mask = 0x0f, + .voltages = ldo_vauxn_voltages, + .voltages_len = ARRAY_SIZE(ldo_vauxn_voltages), + }, + [AB8540_LDO_AUX2] = { + .desc = { + .name = "LDO-AUX2", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUX2, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vauxn_voltages), + }, + .load_lp_uA = 5000, + .update_bank = 0x04, + .update_reg = 0x09, + .update_mask = 0x0c, + .update_val = 0x04, + .update_val_idle = 0x0c, + .update_val_normal = 0x04, + .voltage_bank = 0x04, + .voltage_reg = 0x20, + .voltage_mask = 0x0f, + .voltages = ldo_vauxn_voltages, + .voltages_len = ARRAY_SIZE(ldo_vauxn_voltages), + }, + [AB8540_LDO_AUX3] = { + .desc = { + .name = "LDO-AUX3", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUX3, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vaux3_ab8540_voltages), + }, + .load_lp_uA = 5000, + .update_bank = 0x04, + .update_reg = 0x0a, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + .voltage_bank = 0x04, + .voltage_reg = 0x21, + .voltage_mask = 0x07, + .voltages = ldo_vaux3_ab8540_voltages, + .voltages_len = ARRAY_SIZE(ldo_vaux3_ab8540_voltages), + }, + [AB8540_LDO_AUX4] = { + .desc = { + .name = "LDO-AUX4", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB9540_LDO_AUX4, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vauxn_voltages), + }, + .load_lp_uA = 5000, + /* values for Vaux4Regu register */ + .update_bank = 0x04, + .update_reg = 0x2e, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + /* values for Vaux4SEL register */ + .voltage_bank = 0x04, + .voltage_reg = 0x2f, + .voltage_mask = 0x0f, + .voltages = ldo_vauxn_voltages, + .voltages_len = ARRAY_SIZE(ldo_vauxn_voltages), + }, + [AB8540_LDO_INTCORE] = { + .desc = { + .name = "LDO-INTCORE", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_INTCORE, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vintcore_voltages), + }, + .load_lp_uA = 5000, + .update_bank = 0x03, + .update_reg = 0x80, + .update_mask = 0x44, + .update_val = 0x44, + .update_val_idle = 0x44, + .update_val_normal = 0x04, + .voltage_bank = 0x03, + .voltage_reg = 0x80, + .voltage_mask = 0x38, + .voltages = ldo_vintcore_voltages, + .voltages_len = ARRAY_SIZE(ldo_vintcore_voltages), + .voltage_shift = 3, + }, + + /* + * Fixed Voltage Regulators + * name, fixed mV, + * update bank, reg, mask, enable val + */ + [AB8540_LDO_TVOUT] = { + .desc = { + .name = "LDO-TVOUT", + .ops = &ab8500_regulator_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_TVOUT, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .delay = 10000, + .load_lp_uA = 1000, + .update_bank = 0x03, + .update_reg = 0x80, + .update_mask = 0x82, + .update_val = 0x02, + .update_val_idle = 0x82, + .update_val_normal = 0x02, + }, + [AB8540_LDO_AUDIO] = { + .desc = { + .name = "LDO-AUDIO", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_AUDIO, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x02, + .update_val = 0x02, + }, + [AB8540_LDO_ANAMIC1] = { + .desc = { + .name = "LDO-ANAMIC1", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_ANAMIC1, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x08, + .update_val = 0x08, + }, + [AB8540_LDO_ANAMIC2] = { + .desc = { + .name = "LDO-ANAMIC2", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_ANAMIC2, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x10, + .update_val = 0x10, + }, + [AB8540_LDO_DMIC] = { + .desc = { + .name = "LDO-DMIC", + .ops = &ab8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_DMIC, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .update_bank = 0x03, + .update_reg = 0x83, + .update_mask = 0x04, + .update_val = 0x04, + }, + + /* + * Regulators with fixed voltage and normal/idle modes + */ + [AB8540_LDO_ANA] = { + .desc = { + .name = "LDO-ANA", + .ops = &ab8500_regulator_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8500_LDO_ANA, + .owner = THIS_MODULE, + .n_voltages = 1, + }, + .load_lp_uA = 1000, + .update_bank = 0x04, + .update_reg = 0x06, + .update_mask = 0x0c, + .update_val = 0x04, + .update_val_idle = 0x0c, + .update_val_normal = 0x04, + }, + [AB8540_LDO_SDIO] = { + .desc = { + .name = "LDO-SDIO", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8540_LDO_SDIO, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_sdio_voltages), + }, + .min_uV = 1050000, + .max_uV = 3050000, + .load_lp_uA = 5000, + .update_bank = 0x03, + .update_reg = 0x88, + .update_mask = 0x30, + .update_val = 0x10, + .update_val_idle = 0x30, + .update_val_normal = 0x10, + .voltage_bank = 0x03, + .voltage_reg = 0x88, + .voltage_mask = 0x07, + .voltages = ldo_sdio_voltages, + .voltages_len = ARRAY_SIZE(ldo_sdio_voltages), + }, +}; + struct ab8500_reg_init { u8 bank; u8 addr; @@ -1898,6 +2170,384 @@ static struct ab8500_reg_init ab9540_reg_init[] = { REG_INIT(AB9540_REGUCTRLDISCH3, 0x04, 0x48, 0x01), }; +/* AB8540 register init */ +static struct ab8500_reg_init ab8540_reg_init[] = { + /* + * 0x01, VSimSycClkReq1Valid + * 0x02, VSimSycClkReq2Valid + * 0x04, VSimSycClkReq3Valid + * 0x08, VSimSycClkReq4Valid + * 0x10, VSimSycClkReq5Valid + * 0x20, VSimSycClkReq6Valid + * 0x40, VSimSycClkReq7Valid + * 0x80, VSimSycClkReq8Valid + */ + REG_INIT(AB8540_VSIMSYSCLKCTRL, 0x02, 0x33, 0xff), + /* + * 0x03, VarmRequestCtrl + * 0x0c, VapeRequestCtrl + * 0x30, Vsmps1RequestCtrl + * 0xc0, Vsmps2RequestCtrl + */ + REG_INIT(AB8540_REGUREQUESTCTRL1, 0x03, 0x03, 0xff), + /* + * 0x03, Vsmps3RequestCtrl + * 0x0c, VpllRequestCtrl + * 0x30, VanaRequestCtrl + * 0xc0, VextSupply1RequestCtrl + */ + REG_INIT(AB8540_REGUREQUESTCTRL2, 0x03, 0x04, 0xff), + /* + * 0x03, VextSupply2RequestCtrl + * 0x0c, VextSupply3RequestCtrl + * 0x30, Vaux1RequestCtrl + * 0xc0, Vaux2RequestCtrl + */ + REG_INIT(AB8540_REGUREQUESTCTRL3, 0x03, 0x05, 0xff), + /* + * 0x03, Vaux3RequestCtrl + * 0x04, SwHPReq + */ + REG_INIT(AB8540_REGUREQUESTCTRL4, 0x03, 0x06, 0x07), + /* + * 0x01, Vsmps1SysClkReq1HPValid + * 0x02, Vsmps2SysClkReq1HPValid + * 0x04, Vsmps3SysClkReq1HPValid + * 0x08, VanaSysClkReq1HPValid + * 0x10, VpllSysClkReq1HPValid + * 0x20, Vaux1SysClkReq1HPValid + * 0x40, Vaux2SysClkReq1HPValid + * 0x80, Vaux3SysClkReq1HPValid + */ + REG_INIT(AB8540_REGUSYSCLKREQ1HPVALID1, 0x03, 0x07, 0xff), + /* + * 0x01, VapeSysClkReq1HPValid + * 0x02, VarmSysClkReq1HPValid + * 0x04, VbbSysClkReq1HPValid + * 0x10, VextSupply1SysClkReq1HPValid + * 0x20, VextSupply2SysClkReq1HPValid + * 0x40, VextSupply3SysClkReq1HPValid + */ + REG_INIT(AB8540_REGUSYSCLKREQ1HPVALID2, 0x03, 0x08, 0x77), + /* + * 0x01, Vsmps1HwHPReq1Valid + * 0x02, Vsmps2HwHPReq1Valid + * 0x04, Vsmps3HwHPReq1Valid + * 0x08, VanaHwHPReq1Valid + * 0x10, VpllHwHPReq1Valid + * 0x20, Vaux1HwHPReq1Valid + * 0x40, Vaux2HwHPReq1Valid + * 0x80, Vaux3HwHPReq1Valid + */ + REG_INIT(AB8540_REGUHWHPREQ1VALID1, 0x03, 0x09, 0xff), + /* + * 0x01, VextSupply1HwHPReq1Valid + * 0x02, VextSupply2HwHPReq1Valid + * 0x04, VextSupply3HwHPReq1Valid + */ + REG_INIT(AB8540_REGUHWHPREQ1VALID2, 0x03, 0x0a, 0x07), + /* + * 0x01, Vsmps1HwHPReq2Valid + * 0x02, Vsmps2HwHPReq2Valid + * 0x03, Vsmps3HwHPReq2Valid + * 0x08, VanaHwHPReq2Valid + * 0x10, VpllHwHPReq2Valid + * 0x20, Vaux1HwHPReq2Valid + * 0x40, Vaux2HwHPReq2Valid + * 0x80, Vaux3HwHPReq2Valid + */ + REG_INIT(AB8540_REGUHWHPREQ2VALID1, 0x03, 0x0b, 0xff), + /* + * 0x01, VextSupply1HwHPReq2Valid + * 0x02, VextSupply2HwHPReq2Valid + * 0x04, VextSupply3HwHPReq2Valid + */ + REG_INIT(AB8540_REGUHWHPREQ2VALID2, 0x03, 0x0c, 0x07), + /* + * 0x01, VapeSwHPReqValid + * 0x02, VarmSwHPReqValid + * 0x04, Vsmps1SwHPReqValid + * 0x08, Vsmps2SwHPReqValid + * 0x10, Vsmps3SwHPReqValid + * 0x20, VanaSwHPReqValid + * 0x40, VpllSwHPReqValid + * 0x80, Vaux1SwHPReqValid + */ + REG_INIT(AB8540_REGUSWHPREQVALID1, 0x03, 0x0d, 0xff), + /* + * 0x01, Vaux2SwHPReqValid + * 0x02, Vaux3SwHPReqValid + * 0x04, VextSupply1SwHPReqValid + * 0x08, VextSupply2SwHPReqValid + * 0x10, VextSupply3SwHPReqValid + */ + REG_INIT(AB8540_REGUSWHPREQVALID2, 0x03, 0x0e, 0x1f), + /* + * 0x02, SysClkReq2Valid1 + * ... + * 0x80, SysClkReq8Valid1 + */ + REG_INIT(AB8540_REGUSYSCLKREQVALID1, 0x03, 0x0f, 0xff), + /* + * 0x02, SysClkReq2Valid2 + * ... + * 0x80, SysClkReq8Valid2 + */ + REG_INIT(AB8540_REGUSYSCLKREQVALID2, 0x03, 0x10, 0xff), + /* + * 0x01, Vaux4SwHPReqValid + * 0x02, Vaux4HwHPReq2Valid + * 0x04, Vaux4HwHPReq1Valid + * 0x08, Vaux4SysClkReq1HPValid + */ + REG_INIT(AB8540_REGUVAUX4REQVALID, 0x03, 0x11, 0x0f), + /* + * 0x01, Vaux5SwHPReqValid + * 0x02, Vaux5HwHPReq2Valid + * 0x04, Vaux5HwHPReq1Valid + * 0x08, Vaux5SysClkReq1HPValid + */ + REG_INIT(AB8540_REGUVAUX5REQVALID, 0x03, 0x12, 0x0f), + /* + * 0x01, Vaux6SwHPReqValid + * 0x02, Vaux6HwHPReq2Valid + * 0x04, Vaux6HwHPReq1Valid + * 0x08, Vaux6SysClkReq1HPValid + */ + REG_INIT(AB8540_REGUVAUX6REQVALID, 0x03, 0x13, 0x0f), + /* + * 0x01, VclkbSwHPReqValid + * 0x02, VclkbHwHPReq2Valid + * 0x04, VclkbHwHPReq1Valid + * 0x08, VclkbSysClkReq1HPValid + */ + REG_INIT(AB8540_REGUVCLKBREQVALID, 0x03, 0x14, 0x0f), + /* + * 0x01, Vrf1SwHPReqValid + * 0x02, Vrf1HwHPReq2Valid + * 0x04, Vrf1HwHPReq1Valid + * 0x08, Vrf1SysClkReq1HPValid + */ + REG_INIT(AB8540_REGUVRF1REQVALID, 0x03, 0x15, 0x0f), + /* + * 0x02, VTVoutEna + * 0x04, Vintcore12Ena + * 0x38, Vintcore12Sel + * 0x40, Vintcore12LP + * 0x80, VTVoutLP + */ + REG_INIT(AB8540_REGUMISC1, 0x03, 0x80, 0xfe), + /* + * 0x02, VaudioEna + * 0x04, VdmicEna + * 0x08, Vamic1Ena + * 0x10, Vamic2Ena + * 0x20, Vamic12LP + * 0xC0, VdmicSel + */ + REG_INIT(AB8540_VAUDIOSUPPLY, 0x03, 0x83, 0xfe), + /* + * 0x01, Vamic1_dzout + * 0x02, Vamic2_dzout + */ + REG_INIT(AB8540_REGUCTRL1VAMIC, 0x03, 0x84, 0x03), + /* + * 0x07, VHSICSel + * 0x08, VHSICOffState + * 0x10, VHSIEna + * 0x20, VHSICLP + */ + REG_INIT(AB8540_VHSIC, 0x03, 0x87, 0x3f), + /* + * 0x07, VSDIOSel + * 0x08, VSDIOOffState + * 0x10, VSDIOEna + * 0x20, VSDIOLP + */ + REG_INIT(AB8540_VSDIO, 0x03, 0x88, 0x3f), + /* + * 0x03, Vsmps1Regu + * 0x0c, Vsmps1SelCtrl + * 0x10, Vsmps1AutoMode + * 0x20, Vsmps1PWMMode + */ + REG_INIT(AB8540_VSMPS1REGU, 0x04, 0x03, 0x3f), + /* + * 0x03, Vsmps2Regu + * 0x0c, Vsmps2SelCtrl + * 0x10, Vsmps2AutoMode + * 0x20, Vsmps2PWMMode + */ + REG_INIT(AB8540_VSMPS2REGU, 0x04, 0x04, 0x3f), + /* + * 0x03, Vsmps3Regu + * 0x0c, Vsmps3SelCtrl + * 0x10, Vsmps3AutoMode + * 0x20, Vsmps3PWMMode + * NOTE! PRCMU register + */ + REG_INIT(AB8540_VSMPS3REGU, 0x04, 0x05, 0x0f), + /* + * 0x03, VpllRegu + * 0x0c, VanaRegu + */ + REG_INIT(AB8540_VPLLVANAREGU, 0x04, 0x06, 0x0f), + /* + * 0x03, VextSupply1Regu + * 0x0c, VextSupply2Regu + * 0x30, VextSupply3Regu + * 0x40, ExtSupply2Bypass + * 0x80, ExtSupply3Bypass + */ + REG_INIT(AB8540_EXTSUPPLYREGU, 0x04, 0x08, 0xff), + /* + * 0x03, Vaux1Regu + * 0x0c, Vaux2Regu + */ + REG_INIT(AB8540_VAUX12REGU, 0x04, 0x09, 0x0f), + /* + * 0x0c, VRF1Regu + * 0x03, Vaux3Regu + */ + REG_INIT(AB8540_VRF1VAUX3REGU, 0x04, 0x0a, 0x0f), + /* + * 0x3f, Vsmps1Sel1 + */ + REG_INIT(AB8540_VSMPS1SEL1, 0x04, 0x13, 0x3f), + /* + * 0x3f, Vsmps1Sel2 + */ + REG_INIT(AB8540_VSMPS1SEL2, 0x04, 0x14, 0x3f), + /* + * 0x3f, Vsmps1Sel3 + */ + REG_INIT(AB8540_VSMPS1SEL3, 0x04, 0x15, 0x3f), + /* + * 0x3f, Vsmps2Sel1 + */ + REG_INIT(AB8540_VSMPS2SEL1, 0x04, 0x17, 0x3f), + /* + * 0x3f, Vsmps2Sel2 + */ + REG_INIT(AB8540_VSMPS2SEL2, 0x04, 0x18, 0x3f), + /* + * 0x3f, Vsmps2Sel3 + */ + REG_INIT(AB8540_VSMPS2SEL3, 0x04, 0x19, 0x3f), + /* + * 0x7f, Vsmps3Sel1 + * NOTE! PRCMU register + */ + REG_INIT(AB8540_VSMPS3SEL1, 0x04, 0x1b, 0x7f), + /* + * 0x7f, Vsmps3Sel2 + * NOTE! PRCMU register + */ + REG_INIT(AB8540_VSMPS3SEL2, 0x04, 0x1c, 0x7f), + /* + * 0x0f, Vaux1Sel + */ + REG_INIT(AB8540_VAUX1SEL, 0x04, 0x1f, 0x0f), + /* + * 0x0f, Vaux2Sel + */ + REG_INIT(AB8540_VAUX2SEL, 0x04, 0x20, 0x0f), + /* + * 0x07, Vaux3Sel + * 0x70, Vrf1Sel + */ + REG_INIT(AB8540_VRF1VAUX3SEL, 0x04, 0x21, 0x77), + /* + * 0x01, VextSupply12LP + */ + REG_INIT(AB8540_REGUCTRL2SPARE, 0x04, 0x22, 0x01), + /* + * 0x07, Vanasel + * 0x30, Vpllsel + */ + REG_INIT(AB8540_VANAVPLLSEL, 0x04, 0x29, 0x37), + /* + * 0x03, Vaux4RequestCtrl + */ + REG_INIT(AB8540_VAUX4REQCTRL, 0x04, 0x2d, 0x03), + /* + * 0x03, Vaux4Regu + */ + REG_INIT(AB8540_VAUX4REGU, 0x04, 0x2e, 0x03), + /* + * 0x0f, Vaux4Sel + */ + REG_INIT(AB8540_VAUX4SEL, 0x04, 0x2f, 0x0f), + /* + * 0x03, Vaux5RequestCtrl + */ + REG_INIT(AB8540_VAUX5REQCTRL, 0x04, 0x31, 0x03), + /* + * 0x03, Vaux5Regu + */ + REG_INIT(AB8540_VAUX5REGU, 0x04, 0x32, 0x03), + /* + * 0x3f, Vaux5Sel + */ + REG_INIT(AB8540_VAUX5SEL, 0x04, 0x33, 0x3f), + /* + * 0x03, Vaux6RequestCtrl + */ + REG_INIT(AB8540_VAUX6REQCTRL, 0x04, 0x34, 0x03), + /* + * 0x03, Vaux6Regu + */ + REG_INIT(AB8540_VAUX6REGU, 0x04, 0x35, 0x03), + /* + * 0x3f, Vaux6Sel + */ + REG_INIT(AB8540_VAUX6SEL, 0x04, 0x36, 0x3f), + /* + * 0x03, VCLKBRequestCtrl + */ + REG_INIT(AB8540_VCLKBREQCTRL, 0x04, 0x37, 0x03), + /* + * 0x03, VCLKBRegu + */ + REG_INIT(AB8540_VCLKBREGU, 0x04, 0x38, 0x03), + /* + * 0x07, VCLKBSel + */ + REG_INIT(AB8540_VCLKBSEL, 0x04, 0x39, 0x07), + /* + * 0x03, Vrf1RequestCtrl + */ + REG_INIT(AB8540_VRF1REQCTRL, 0x04, 0x3a, 0x03), + /* + * 0x01, VpllDisch + * 0x02, Vrf1Disch + * 0x04, Vaux1Disch + * 0x08, Vaux2Disch + * 0x10, Vaux3Disch + * 0x20, Vintcore12Disch + * 0x40, VTVoutDisch + * 0x80, VaudioDisch + */ + REG_INIT(AB8540_REGUCTRLDISCH, 0x04, 0x43, 0xff), + /* + * 0x02, VanaDisch + * 0x04, VdmicPullDownEna + * 0x08, VpllPullDownEna + * 0x10, VdmicDisch + */ + REG_INIT(AB8540_REGUCTRLDISCH2, 0x04, 0x44, 0x1e), + /* + * 0x01, Vaux4Disch + */ + REG_INIT(AB8540_REGUCTRLDISCH3, 0x04, 0x48, 0x01), + /* + * 0x01, Vaux5Disch + * 0x02, Vaux6Disch + * 0x04, VCLKBDisch + */ + REG_INIT(AB8540_REGUCTRLDISCH4, 0x04, 0x49, 0x07), +}; + static int ab8500_regulator_init_registers(struct platform_device *pdev, struct ab8500_reg_init *reg_init, int id, int mask, int value) @@ -2004,6 +2654,21 @@ static struct of_regulator_match ab8505_regulator_match[] = { { .name = "ab8500_ldo_ana", .driver_data = (void *) AB8505_LDO_ANA, }, }; +static struct of_regulator_match ab8540_regulator_match[] = { + { .name = "ab8500_ldo_aux1", .driver_data = (void *) AB8540_LDO_AUX1, }, + { .name = "ab8500_ldo_aux2", .driver_data = (void *) AB8540_LDO_AUX2, }, + { .name = "ab8500_ldo_aux3", .driver_data = (void *) AB8540_LDO_AUX3, }, + { .name = "ab8500_ldo_aux4", .driver_data = (void *) AB8540_LDO_AUX4, }, + { .name = "ab8500_ldo_intcore", .driver_data = (void *) AB8540_LDO_INTCORE, }, + { .name = "ab8500_ldo_tvout", .driver_data = (void *) AB8540_LDO_TVOUT, }, + { .name = "ab8500_ldo_audio", .driver_data = (void *) AB8540_LDO_AUDIO, }, + { .name = "ab8500_ldo_anamic1", .driver_data = (void *) AB8540_LDO_ANAMIC1, }, + { .name = "ab8500_ldo_amamic2", .driver_data = (void *) AB8540_LDO_ANAMIC2, }, + { .name = "ab8500_ldo_dmic", .driver_data = (void *) AB8540_LDO_DMIC, }, + { .name = "ab8500_ldo_ana", .driver_data = (void *) AB8540_LDO_ANA, }, + { .name = "ab8500_ldo_sdio", .driver_data = (void *) AB8540_LDO_SDIO, }, +}; + static struct of_regulator_match ab9540_regulator_match[] = { { .name = "ab8500_ldo_aux1", .driver_data = (void *) AB9540_LDO_AUX1, }, { .name = "ab8500_ldo_aux2", .driver_data = (void *) AB9540_LDO_AUX2, }, @@ -2063,6 +2728,11 @@ static int ab8500_regulator_probe(struct platform_device *pdev) regulator_info_size = ARRAY_SIZE(ab8505_regulator_info); reg_init = ab8505_reg_init; reg_init_size = AB8505_NUM_REGULATOR_REGISTERS; + } else if (is_ab8540(ab8500)) { + regulator_info = ab8540_regulator_info; + regulator_info_size = ARRAY_SIZE(ab8540_regulator_info); + reg_init = ab8540_reg_init; + reg_init_size = AB8540_NUM_REGULATOR_REGISTERS; } else { regulator_info = ab8500_regulator_info; regulator_info_size = ARRAY_SIZE(ab8500_regulator_info); diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 9a7cf97e5040..bb0140c9d4f4 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -68,6 +68,25 @@ enum ab9540_regulator_id { AB9540_NUM_REGULATORS, }; +/* AB8540 regulators */ +enum ab8540_regulator_id { + AB8540_LDO_AUX1, + AB8540_LDO_AUX2, + AB8540_LDO_AUX3, + AB8540_LDO_AUX4, + AB8540_LDO_INTCORE, + AB8540_LDO_TVOUT, + AB8540_LDO_AUDIO, + AB8540_LDO_ANAMIC1, + AB8540_LDO_ANAMIC2, + AB8540_LDO_DMIC, + AB8540_LDO_ANA, + AB8540_LDO_SDIO, + AB8540_SYSCLKREQ_2, + AB8540_SYSCLKREQ_4, + AB8540_NUM_REGULATORS, +}; + /* AB8500, AB8505, and AB9540 register initialization */ struct ab8500_regulator_reg_init { int id; @@ -212,6 +231,73 @@ enum ab9540_regulator_reg { AB9540_NUM_REGULATOR_REGISTERS, }; +/* AB8540 registers */ +enum ab8540_regulator_reg { + AB8540_REGUREQUESTCTRL1, + AB8540_REGUREQUESTCTRL2, + AB8540_REGUREQUESTCTRL3, + AB8540_REGUREQUESTCTRL4, + AB8540_REGUSYSCLKREQ1HPVALID1, + AB8540_REGUSYSCLKREQ1HPVALID2, + AB8540_REGUHWHPREQ1VALID1, + AB8540_REGUHWHPREQ1VALID2, + AB8540_REGUHWHPREQ2VALID1, + AB8540_REGUHWHPREQ2VALID2, + AB8540_REGUSWHPREQVALID1, + AB8540_REGUSWHPREQVALID2, + AB8540_REGUSYSCLKREQVALID1, + AB8540_REGUSYSCLKREQVALID2, + AB8540_REGUVAUX4REQVALID, + AB8540_REGUVAUX5REQVALID, + AB8540_REGUVAUX6REQVALID, + AB8540_REGUVCLKBREQVALID, + AB8540_REGUVRF1REQVALID, + AB8540_REGUMISC1, + AB8540_VAUDIOSUPPLY, + AB8540_REGUCTRL1VAMIC, + AB8540_VHSIC, + AB8540_VSDIO, + AB8540_VSMPS1REGU, + AB8540_VSMPS2REGU, + AB8540_VSMPS3REGU, + AB8540_VPLLVANAREGU, + AB8540_EXTSUPPLYREGU, + AB8540_VAUX12REGU, + AB8540_VRF1VAUX3REGU, + AB8540_VSMPS1SEL1, + AB8540_VSMPS1SEL2, + AB8540_VSMPS1SEL3, + AB8540_VSMPS2SEL1, + AB8540_VSMPS2SEL2, + AB8540_VSMPS2SEL3, + AB8540_VSMPS3SEL1, + AB8540_VSMPS3SEL2, + AB8540_VAUX1SEL, + AB8540_VAUX2SEL, + AB8540_VRF1VAUX3SEL, + AB8540_REGUCTRL2SPARE, + AB8540_VAUX4REQCTRL, + AB8540_VAUX4REGU, + AB8540_VAUX4SEL, + AB8540_VAUX5REQCTRL, + AB8540_VAUX5REGU, + AB8540_VAUX5SEL, + AB8540_VAUX6REQCTRL, + AB8540_VAUX6REGU, + AB8540_VAUX6SEL, + AB8540_VCLKBREQCTRL, + AB8540_VCLKBREGU, + AB8540_VCLKBSEL, + AB8540_VRF1REQCTRL, + AB8540_REGUCTRLDISCH, + AB8540_REGUCTRLDISCH2, + AB8540_REGUCTRLDISCH3, + AB8540_REGUCTRLDISCH4, + AB8540_VSIMSYSCLKCTRL, + AB8540_VANAVPLLSEL, + AB8540_NUM_REGULATOR_REGISTERS, +}; + /* AB8500 external regulators */ struct ab8500_ext_regulator_cfg { bool hwreq; /* requires hw mode or high power mode */ -- cgit From 3cb7825bdc84d1d6c81ac9a2be201fe5bea5de05 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 29 Mar 2013 13:36:05 +0800 Subject: ARM: mxs: remove common.h All three remaining functions declared in common.h are implemented by clock driver. Create header include/linux/clk/mxs.h to contain them and remove common.h. Signed-off-by: Shawn Guo --- arch/arm/mach-mxs/include/mach/common.h | 20 -------------------- arch/arm/mach-mxs/mach-mxs.c | 2 +- include/linux/clk/mxs.h | 16 ++++++++++++++++ 3 files changed, 17 insertions(+), 21 deletions(-) delete mode 100644 arch/arm/mach-mxs/include/mach/common.h create mode 100644 include/linux/clk/mxs.h (limited to 'include/linux') diff --git a/arch/arm/mach-mxs/include/mach/common.h b/arch/arm/mach-mxs/include/mach/common.h deleted file mode 100644 index aca982c6d43f..000000000000 --- a/arch/arm/mach-mxs/include/mach/common.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved. - */ - -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MACH_MXS_COMMON_H__ -#define __MACH_MXS_COMMON_H__ - -extern int mxs_saif_clkmux_select(unsigned int clkmux); - -extern int mx23_clocks_init(void); - -extern int mx28_clocks_init(void); - -#endif /* __MACH_MXS_COMMON_H__ */ diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index f346c432cda2..fc762579d30f 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/clk/mxs.h b/include/linux/clk/mxs.h new file mode 100644 index 000000000000..90c30dc3efc7 --- /dev/null +++ b/include/linux/clk/mxs.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_CLK_MXS_H +#define __LINUX_CLK_MXS_H + +int mx23_clocks_init(void); +int mx28_clocks_init(void); +int mxs_saif_clkmux_select(unsigned int clkmux); + +#endif -- cgit From 543bb255a1987836e64f5b7a63664ead8b32b042 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 26 Mar 2013 20:37:57 -0600 Subject: spi: add ability to validate xfer->bits_per_word in SPI core Allow SPI masters to define the set of bits_per_word values they support. If they do this, then the SPI core will reject transfers that attempt to use an unsupported bits_per_word value. This eliminates the need for each SPI driver to implement this checking in most cases. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown --- drivers/spi/spi.c | 8 ++++++++ include/linux/spi/spi.h | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f996c600eb8c..0cabf1560550 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1377,6 +1377,14 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message) xfer->bits_per_word = spi->bits_per_word; if (!xfer->speed_hz) xfer->speed_hz = spi->max_speed_hz; + if (master->bits_per_word_mask) { + /* Only 32 bits fit in the mask */ + if (xfer->bits_per_word > 32) + return -EINVAL; + if (!(master->bits_per_word_mask & + BIT(xfer->bits_per_word - 1))) + return -EINVAL; + } } message->spi = spi; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 38c2b925923d..733eb5ee31c5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -228,6 +228,11 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * every chipselect is connected to a slave. * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver + * @bits_per_word_mask: A mask indicating which values of bits_per_word are + * supported by the driver. Bit n indicates that a bits_per_word n+1 is + * suported. If set, the SPI core will reject any transfer with an + * unsupported bits_per_word. If not set, this value is simply ignored, + * and it's up to the individual driver to perform any validation. * @flags: other constraints relevant to this driver * @bus_lock_spinlock: spinlock for SPI bus locking * @bus_lock_mutex: mutex for SPI bus locking @@ -301,6 +306,9 @@ struct spi_master { /* spi_device.mode flags understood by this controller driver */ u16 mode_bits; + /* bitmask of supported bits_per_word for transfers */ + u32 bits_per_word_mask; + /* other constraints relevant to this driver */ u16 flags; #define SPI_MASTER_HALF_DUPLEX BIT(0) /* can't do full duplex */ -- cgit From 9fac2cf316b070ae43d2ae2525e381ff2d1d68aa Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:27 +0100 Subject: perf/x86: Add flags to event constraints This patch adds a flags field to each event constraint. It can be used to store event specific features which can then later be used by scheduling code or low-level x86 code. The flags are propagated into event->hw.flags during the get_event_constraint() call. They are cleared during the put_event_constraint() call. This mechanism is going to be used by the PEBS-LL patches. It avoids defining yet another table to hold event specific information. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/cpu/perf_event.h | 8 +++++--- arch/x86/kernel/cpu/perf_event_intel.c | 6 +++++- arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +++- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- include/linux/perf_event.h | 1 + 6 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6e8ab0427041..8ba51518f689 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1489,7 +1489,7 @@ static int __init init_hw_perf_events(void) unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters, 0); + 0, x86_pmu.num_counters, 0, 0); x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b1518eed5f99..9686d38eb458 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -59,6 +59,7 @@ struct event_constraint { u64 cmask; int weight; int overlap; + int flags; }; struct amd_nb { @@ -170,16 +171,17 @@ struct cpu_hw_events { void *kfree_on_online; }; -#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ +#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\ { .idxmsk64 = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = (w), \ .overlap = (o), \ + .flags = f, \ } #define EVENT_CONSTRAINT(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) /* * The overlap flag marks event constraints with overlapping counter @@ -203,7 +205,7 @@ struct cpu_hw_events { * and its counter masks must be kept at a minimum. */ #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) /* * Constraint on the Event code. diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dab7580c47ae..df3beaac3397 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1392,8 +1392,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) + if ((event->hw.config & c->cmask) == c->code) { + /* hw.flags zeroed at initialization */ + event->hw.flags |= c->flags; return c; + } } } @@ -1438,6 +1441,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { + event->hw.flags = 0; intel_put_shared_regs_event_constraints(cpuc, event); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 826054a4f2ee..f30d85bcbda9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -430,8 +430,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) if (x86_pmu.pebs_constraints) { for_each_event_constraint(c, x86_pmu.pebs_constraints) { - if ((event->hw.config & c->cmask) == c->code) + if ((event->hw.config & c->cmask) == c->code) { + event->hw.flags |= c->flags; return c; + } } } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index b43200dbfe7e..75da9e18b128 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type) type->unconstrainted = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, - 0, type->num_counters, 0); + 0, type->num_counters, 0, 0); for (i = 0; i < type->num_boxes; i++) { pmus[i].func_id = -1; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1c592114c437..cd3bb2cd9494 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -127,6 +127,7 @@ struct hw_perf_event { int event_base_rdpmc; int idx; int last_cpu; + int flags; struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra branch_reg; -- cgit From c3feedf2aaf9ac8bad6f19f5d21e4ee0b4b87e9c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 24 Jan 2013 16:10:28 +0100 Subject: perf/core: Add weighted samples For some events it's useful to weight sample with a hardware provided number. This expresses how expensive the action the sample represent was. This allows the profiler to scale the samples to be more informative to the programmer. There is already the period which is used similarly, but it means something different, so I chose to not overload it. Instead a new sample type for WEIGHT is added. Can be used for multiple things. Initially it is used for TSX abort costs and profiling by memory latencies (so to make expensive load appear higher up in the histograms). The concept is quite generic and can be extended to many other kinds of events or architectures, as long as the hardware provides suitable auxillary values. In principle it could be also used for software tracepoints. This adds the generic glue. A new optional sample format for a 64-bit weight value. Signed-off-by: Andi Kleen Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-5-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 6 +++++- kernel/events/core.c | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cd3bb2cd9494..7ce0b37b155b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -573,6 +573,7 @@ struct perf_sample_data { struct perf_branch_stack *br_stack; struct perf_regs_user regs_user; u64 stack_user_size; + u64 weight; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -586,6 +587,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; data->regs_user.regs = NULL; data->stack_user_size = 0; + data->weight = 0; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9fa9c622a7f4..cdc255da02e2 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -132,8 +132,10 @@ enum perf_event_sample_format { PERF_SAMPLE_BRANCH_STACK = 1U << 11, PERF_SAMPLE_REGS_USER = 1U << 12, PERF_SAMPLE_STACK_USER = 1U << 13, + PERF_SAMPLE_WEIGHT = 1U << 14, + + PERF_SAMPLE_MAX = 1U << 15, /* non-ABI */ - PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ }; /* @@ -588,6 +590,8 @@ enum perf_event_type { * { u64 size; * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * + * { u64 weight; } && PERF_SAMPLE_WEIGHT * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/kernel/events/core.c b/kernel/events/core.c index 7b4a55d41efc..9e3edb272b3e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -976,6 +976,9 @@ static void perf_event__header_size(struct perf_event *event) if (sample_type & PERF_SAMPLE_PERIOD) size += sizeof(data->period); + if (sample_type & PERF_SAMPLE_WEIGHT) + size += sizeof(data->weight); + if (sample_type & PERF_SAMPLE_READ) size += event->read_size; @@ -4193,6 +4196,9 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_sample_ustack(handle, data->stack_user_size, data->regs_user.regs); + + if (sample_type & PERF_SAMPLE_WEIGHT) + perf_output_put(handle, data->weight); } void perf_prepare_sample(struct perf_event_header *header, -- cgit From d6be9ad6c960f43800a6f118932bc8a5a4eadcd1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:31 +0100 Subject: perf: Add generic memory sampling interface This patch adds PERF_SAMPLE_DATA_SRC. PERF_SAMPLE_DATA_SRC collects the data source, i.e., where did the data associated with the sampled instruction come from. Information is stored in a perf_mem_data_src structure. It contains opcode, mem level, tlb, snoop, lock information, subject to availability in hardware. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-8-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 68 +++++++++++++++++++++++++++++++++++++++-- kernel/events/core.c | 6 ++++ 3 files changed, 74 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7ce0b37b155b..42a6daaf4e0a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -568,6 +568,7 @@ struct perf_sample_data { u32 reserved; } cpu_entry; u64 period; + union perf_mem_data_src data_src; struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; @@ -588,6 +589,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->regs_user.regs = NULL; data->stack_user_size = 0; data->weight = 0; + data->data_src.val = 0; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index cdc255da02e2..5b5762006855 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -133,9 +133,9 @@ enum perf_event_sample_format { PERF_SAMPLE_REGS_USER = 1U << 12, PERF_SAMPLE_STACK_USER = 1U << 13, PERF_SAMPLE_WEIGHT = 1U << 14, + PERF_SAMPLE_DATA_SRC = 1U << 15, - PERF_SAMPLE_MAX = 1U << 15, /* non-ABI */ - + PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */ }; /* @@ -592,6 +592,7 @@ enum perf_event_type { * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * }; */ PERF_RECORD_SAMPLE = 9, @@ -617,4 +618,67 @@ enum perf_callchain_context { #define PERF_FLAG_FD_OUTPUT (1U << 1) #define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ +union perf_mem_data_src { + __u64 val; + struct { + __u64 mem_op:5, /* type of opcode */ + mem_lvl:14, /* memory hierarchy level */ + mem_snoop:5, /* snoop mode */ + mem_lock:2, /* lock instr */ + mem_dtlb:7, /* tlb access */ + mem_rsvd:31; + }; +}; + +/* type of opcode (load/store/prefetch,code) */ +#define PERF_MEM_OP_NA 0x01 /* not available */ +#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ +#define PERF_MEM_OP_STORE 0x04 /* store instruction */ +#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ +#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ +#define PERF_MEM_OP_SHIFT 0 + +/* memory hierarchy (memory level, hit or miss) */ +#define PERF_MEM_LVL_NA 0x01 /* not available */ +#define PERF_MEM_LVL_HIT 0x02 /* hit level */ +#define PERF_MEM_LVL_MISS 0x04 /* miss level */ +#define PERF_MEM_LVL_L1 0x08 /* L1 */ +#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x20 /* L2 hit */ +#define PERF_MEM_LVL_L3 0x40 /* L3 hit */ +#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +/* snoop mode */ +#define PERF_MEM_SNOOP_NA 0x01 /* not available */ +#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ +#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +/* locked instruction */ +#define PERF_MEM_LOCK_NA 0x01 /* not available */ +#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 + +/* TLB access */ +#define PERF_MEM_TLB_NA 0x01 /* not available */ +#define PERF_MEM_TLB_HIT 0x02 /* hit level */ +#define PERF_MEM_TLB_MISS 0x04 /* miss level */ +#define PERF_MEM_TLB_L1 0x08 /* L1 */ +#define PERF_MEM_TLB_L2 0x10 /* L2 */ +#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 + +#define PERF_MEM_S(a, s) \ + (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 9e3edb272b3e..77c96d18c23a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -982,6 +982,9 @@ static void perf_event__header_size(struct perf_event *event) if (sample_type & PERF_SAMPLE_READ) size += event->read_size; + if (sample_type & PERF_SAMPLE_DATA_SRC) + size += sizeof(data->data_src.val); + event->header_size = size; } @@ -4199,6 +4202,9 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_WEIGHT) perf_output_put(handle, data->weight); + + if (sample_type & PERF_SAMPLE_DATA_SRC) + perf_output_put(handle, data->data_src.val); } void perf_prepare_sample(struct perf_event_header *header, -- cgit From ff45262a85dbf1bc74463c5dcea1d71a406d4d8e Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Mon, 18 Feb 2013 21:10:14 -0800 Subject: leds: add new LP5562 LED driver LP5562 can drive up to 4 channels, RGB and White. LEDs can be controlled directly via the led class control interface. LP55xx common driver LP5562 is one of LP55xx family device, so LP55xx common code are used. On the other hand, chip specific configuration is defined in the structure 'lp55xx_device_config' LED pattern data LP5562 has also internal program memory which is used for running various LED patterns. LP5562 driver supports the firmware interface and the predefined pattern data as well. LP5562 device attributes: 'led_pattern' and 'engine_mux' A 'led_pattern' is an index code which runs the predefined pattern data. And 'engine_mux' is updated with the firmware interface is activated. Detailed description has been updated in the documentation files, 'leds-lp55xx.txt' and 'leds-lp5562.txt'. Changes on the header file LP5562 configurable definitions are added. Pattern RGB data is fixed as constant value. (No side effect on other devices, LP5521 or LP5523.) (cooloney@gmail.com: remove redundant mutex_unlock(). Reported by Dan Carpenter ) Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- Documentation/leds/00-INDEX | 2 + Documentation/leds/leds-lp5562.txt | 135 +++++++ Documentation/leds/leds-lp55xx.txt | 46 ++- drivers/leds/Kconfig | 14 +- drivers/leds/Makefile | 1 + drivers/leds/leds-lp5562.c | 593 ++++++++++++++++++++++++++++++ drivers/leds/leds-lp55xx-common.c | 2 +- include/linux/platform_data/leds-lp55xx.h | 13 +- 8 files changed, 799 insertions(+), 7 deletions(-) create mode 100644 Documentation/leds/leds-lp5562.txt create mode 100644 drivers/leds/leds-lp5562.c (limited to 'include/linux') diff --git a/Documentation/leds/00-INDEX b/Documentation/leds/00-INDEX index 5246090ef15c..1ecd1596633e 100644 --- a/Documentation/leds/00-INDEX +++ b/Documentation/leds/00-INDEX @@ -6,6 +6,8 @@ leds-lp5521.txt - notes on how to use the leds-lp5521 driver. leds-lp5523.txt - notes on how to use the leds-lp5523 driver. +leds-lp5562.txt + - notes on how to use the leds-lp5562 driver. leds-lp55xx.txt - description about lp55xx common driver. leds-lm3556.txt diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt new file mode 100644 index 000000000000..96061000dd93 --- /dev/null +++ b/Documentation/leds/leds-lp5562.txt @@ -0,0 +1,135 @@ +Kernel driver for LP5562 +======================== + +* TI LP5562 LED Driver + +Author: Milo(Woogyom) Kim + +Description + + LP5562 can drive up to 4 channels. R/G/B and White. + LEDs can be controlled directly via the led class control interface. + + All four channels can be also controlled using the engine micro programs. + LP5562 has the internal program memory for running various LED patterns. + For the details, please refer to 'firmware' section in leds-lp55xx.txt + +Device attribute: engine_mux + + 3 Engines are allocated in LP5562, but the number of channel is 4. + Therefore each channel should be mapped to the engine number. + Value : RGB or W + + This attribute is used for programming LED data with the firmware interface. + Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux, + so additional sysfs is required. + + LED Map + Red ... Engine 1 (fixed) + Green ... Engine 2 (fixed) + Blue ... Engine 3 (fixed) + White ... Engine 1 or 2 or 3 (selective) + +How to load the program data using engine_mux + + Before loading the LP5562 program data, engine_mux should be written between + the engine selection and loading the firmware. + Engine mux has two different mode, RGB and W. + RGB is used for loading RGB program data, W is used for W program data. + + For example, run blinking green channel pattern, + echo 2 > /sys/bus/i2c/devices/xxxx/select_engine # 2 is for green channel + echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux # engine mux for RGB + echo 1 > /sys/class/firmware/lp5562/loading + echo "4000600040FF6000" > /sys/class/firmware/lp5562/data + echo 0 > /sys/class/firmware/lp5562/loading + echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + + To run a blinking white pattern, + echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine + echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux + echo 1 > /sys/class/firmware/lp5562/loading + echo "4000600040FF6000" > /sys/class/firmware/lp5562/data + echo 0 > /sys/class/firmware/lp5562/loading + echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + +How to load the predefined patterns + + Please refer to 'leds-lp55xx.txt" + +Setting Current of Each Channel + + Like LP5521 and LP5523/55231, LP5562 provides LED current settings. + The 'led_current' and 'max_current' are used. + +(Example of Platform data) + +To configure the platform specific data, lp55xx_platform_data structure is used. + +static struct lp55xx_led_config lp5562_led_config[] = { + { + .name = "R", + .chan_nr = 0, + .led_current = 20, + .max_current = 40, + }, + { + .name = "G", + .chan_nr = 1, + .led_current = 20, + .max_current = 40, + }, + { + .name = "B", + .chan_nr = 2, + .led_current = 20, + .max_current = 40, + }, + { + .name = "W", + .chan_nr = 3, + .led_current = 20, + .max_current = 40, + }, +}; + +static int lp5562_setup(void) +{ + /* setup HW resources */ +} + +static void lp5562_release(void) +{ + /* Release HW resources */ +} + +static void lp5562_enable(bool state) +{ + /* Control of chip enable signal */ +} + +static struct lp55xx_platform_data lp5562_platform_data = { + .led_config = lp5562_led_config, + .num_channels = ARRAY_SIZE(lp5562_led_config), + .setup_resources = lp5562_setup, + .release_resources = lp5562_release, + .enable = lp5562_enable, +}; + +If the current is set to 0 in the platform data, that channel is +disabled and it is not visible in the sysfs. + +The 'update_config' : CONFIG register (ADDR 08h) +This value is platform-specific data. +If update_config is not defined, the CONFIG register is set with +'LP5562_PWRSAVE_EN | LP5562_CLK_AUTO'. +(Enable auto-powersave, set automatic clock source selection) + +#define LP5562_CONFIGS (LP5562_PWM_HF | LP5562_PWRSAVE_EN | \ + LP5562_CLK_SRC_EXT) + +static struct lp55xx_platform_data lp5562_pdata = { + .led_config = lp5562_led_config, + .num_channels = ARRAY_SIZE(lp5562_led_config), + .update_config = LP5562_CONFIGS, +}; diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt index ced41868d2d1..eec8fa2ffe4e 100644 --- a/Documentation/leds/leds-lp55xx.txt +++ b/Documentation/leds/leds-lp55xx.txt @@ -5,7 +5,7 @@ Authors: Milo(Woogyom) Kim Description ----------- -LP5521, LP5523/55231 have common features as below. +LP5521, LP5523/55231 and LP5562 have common features as below. Register access via the I2C Device initialization/deinitialization @@ -116,3 +116,47 @@ To support this, 'run_engine' and 'firmware_cb' are configurable in each driver. run_engine : Control the selected engine firmware_cb : The callback function after loading the firmware is done. Chip specific commands for loading and updating program memory. + +( Predefined pattern data ) + +Without the firmware interface, LP55xx driver provides another method for +loading a LED pattern. That is 'predefined' pattern. +A predefined pattern is defined in the platform data and load it(or them) +via the sysfs if needed. +To use the predefined pattern concept, 'patterns' and 'num_patterns' should be +configured. + + Example of predefined pattern data: + + /* mode_1: blinking data */ + static const u8 mode_1[] = { + 0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00, + }; + + /* mode_2: always on */ + static const u8 mode_2[] = { 0x40, 0xFF, }; + + struct lp55xx_predef_pattern board_led_patterns[] = { + { + .r = mode_1, + .size_r = ARRAY_SIZE(mode_1), + }, + { + .b = mode_2, + .size_b = ARRAY_SIZE(mode_2), + }, + } + + struct lp55xx_platform_data lp5562_pdata = { + ... + .patterns = board_led_patterns, + .num_patterns = ARRAY_SIZE(board_led_patterns), + }; + +Then, mode_1 and mode_2 can be run via through the sysfs. + + echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern # red blinking LED pattern + echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern # blue LED always on + +To stop running pattern, + echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index ec50824c02ec..c7f755034375 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -194,8 +194,8 @@ config LEDS_LP3944 module will be called leds-lp3944. config LEDS_LP55XX_COMMON - tristate "Common Driver for TI/National LP5521 and LP5523/55231" - depends on LEDS_LP5521 || LEDS_LP5523 + tristate "Common Driver for TI/National LP5521, LP5523/55231 and LP5562" + depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 select FW_LOADER help This option supports common operations for LP5521 and LP5523/55231 @@ -222,6 +222,16 @@ config LEDS_LP5523 Driver provides direct control via LED class and interface for programming the engines. +config LEDS_LP5562 + tristate "LED Support for TI LP5562 LED driver chip" + depends on LEDS_CLASS && I2C + select LEDS_LP55XX_COMMON + help + If you say yes here you get support for TI LP5562 LED driver. + It is 4 channels chip with programmable engines. + Driver provides direct control via LED class and interface for + programming the engines. + config LEDS_LP8788 tristate "LED support for the TI LP8788 PMIC" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 215e7e3b6173..ab8f5c549ad3 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_LEDS_LP3944) += leds-lp3944.o obj-$(CONFIG_LEDS_LP55XX_COMMON) += leds-lp55xx-common.o obj-$(CONFIG_LEDS_LP5521) += leds-lp5521.o obj-$(CONFIG_LEDS_LP5523) += leds-lp5523.o +obj-$(CONFIG_LEDS_LP5562) += leds-lp5562.o obj-$(CONFIG_LEDS_LP8788) += leds-lp8788.o obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o obj-$(CONFIG_LEDS_CLEVO_MAIL) += leds-clevo-mail.o diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c new file mode 100644 index 000000000000..f8b927788c3a --- /dev/null +++ b/drivers/leds/leds-lp5562.c @@ -0,0 +1,593 @@ +/* + * LP5562 LED driver + * + * Copyright (C) 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "leds-lp55xx-common.h" + +#define LP5562_PROGRAM_LENGTH 32 +#define LP5562_MAX_LEDS 4 + +/* ENABLE Register 00h */ +#define LP5562_REG_ENABLE 0x00 +#define LP5562_EXEC_ENG1_M 0x30 +#define LP5562_EXEC_ENG2_M 0x0C +#define LP5562_EXEC_ENG3_M 0x03 +#define LP5562_EXEC_M 0x3F +#define LP5562_MASTER_ENABLE 0x40 /* Chip master enable */ +#define LP5562_LOGARITHMIC_PWM 0x80 /* Logarithmic PWM adjustment */ +#define LP5562_EXEC_RUN 0x2A +#define LP5562_ENABLE_DEFAULT \ + (LP5562_MASTER_ENABLE | LP5562_LOGARITHMIC_PWM) +#define LP5562_ENABLE_RUN_PROGRAM \ + (LP5562_ENABLE_DEFAULT | LP5562_EXEC_RUN) + +/* OPMODE Register 01h */ +#define LP5562_REG_OP_MODE 0x01 +#define LP5562_MODE_ENG1_M 0x30 +#define LP5562_MODE_ENG2_M 0x0C +#define LP5562_MODE_ENG3_M 0x03 +#define LP5562_LOAD_ENG1 0x10 +#define LP5562_LOAD_ENG2 0x04 +#define LP5562_LOAD_ENG3 0x01 +#define LP5562_RUN_ENG1 0x20 +#define LP5562_RUN_ENG2 0x08 +#define LP5562_RUN_ENG3 0x02 +#define LP5562_ENG1_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG1_M) == LP5562_LOAD_ENG1) +#define LP5562_ENG2_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG2_M) == LP5562_LOAD_ENG2) +#define LP5562_ENG3_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG3_M) == LP5562_LOAD_ENG3) + +/* BRIGHTNESS Registers */ +#define LP5562_REG_R_PWM 0x04 +#define LP5562_REG_G_PWM 0x03 +#define LP5562_REG_B_PWM 0x02 +#define LP5562_REG_W_PWM 0x0E + +/* CURRENT Registers */ +#define LP5562_REG_R_CURRENT 0x07 +#define LP5562_REG_G_CURRENT 0x06 +#define LP5562_REG_B_CURRENT 0x05 +#define LP5562_REG_W_CURRENT 0x0F + +/* CONFIG Register 08h */ +#define LP5562_REG_CONFIG 0x08 +#define LP5562_DEFAULT_CFG \ + (LP5562_PWM_HF | LP5562_PWRSAVE_EN | LP5562_CLK_INT) + +/* RESET Register 0Dh */ +#define LP5562_REG_RESET 0x0D +#define LP5562_RESET 0xFF + +/* PROGRAM ENGINE Registers */ +#define LP5562_REG_PROG_MEM_ENG1 0x10 +#define LP5562_REG_PROG_MEM_ENG2 0x30 +#define LP5562_REG_PROG_MEM_ENG3 0x50 + +/* LEDMAP Register 70h */ +#define LP5562_REG_ENG_SEL 0x70 +#define LP5562_ENG_SEL_PWM 0 +#define LP5562_ENG_FOR_RGB_M 0x3F +#define LP5562_ENG_SEL_RGB 0x1B /* R:ENG1, G:ENG2, B:ENG3 */ +#define LP5562_ENG_FOR_W_M 0xC0 +#define LP5562_ENG1_FOR_W 0x40 /* W:ENG1 */ +#define LP5562_ENG2_FOR_W 0x80 /* W:ENG2 */ +#define LP5562_ENG3_FOR_W 0xC0 /* W:ENG3 */ + +/* Program Commands */ +#define LP5562_CMD_DISABLE 0x00 +#define LP5562_CMD_LOAD 0x15 +#define LP5562_CMD_RUN 0x2A +#define LP5562_CMD_DIRECT 0x3F +#define LP5562_PATTERN_OFF 0 + +static inline void lp5562_wait_opmode_done(void) +{ + /* operation mode change needs to be longer than 153 us */ + usleep_range(200, 300); +} + +static inline void lp5562_wait_enable_done(void) +{ + /* it takes more 488 us to update ENABLE register */ + usleep_range(500, 600); +} + +static void lp5562_set_led_current(struct lp55xx_led *led, u8 led_current) +{ + u8 addr[] = { + LP5562_REG_R_CURRENT, + LP5562_REG_G_CURRENT, + LP5562_REG_B_CURRENT, + LP5562_REG_W_CURRENT, + }; + + led->led_current = led_current; + lp55xx_write(led->chip, addr[led->chan_nr], led_current); +} + +static void lp5562_load_engine(struct lp55xx_chip *chip) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 mask[] = { + [LP55XX_ENGINE_1] = LP5562_MODE_ENG1_M, + [LP55XX_ENGINE_2] = LP5562_MODE_ENG2_M, + [LP55XX_ENGINE_3] = LP5562_MODE_ENG3_M, + }; + + u8 val[] = { + [LP55XX_ENGINE_1] = LP5562_LOAD_ENG1, + [LP55XX_ENGINE_2] = LP5562_LOAD_ENG2, + [LP55XX_ENGINE_3] = LP5562_LOAD_ENG3, + }; + + lp55xx_update_bits(chip, LP5562_REG_OP_MODE, mask[idx], val[idx]); + + lp5562_wait_opmode_done(); +} + +static void lp5562_stop_engine(struct lp55xx_chip *chip) +{ + lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DISABLE); + lp5562_wait_opmode_done(); +} + +static void lp5562_run_engine(struct lp55xx_chip *chip, bool start) +{ + int ret; + u8 mode; + u8 exec; + + /* stop engine */ + if (!start) { + lp55xx_write(chip, LP5562_REG_ENABLE, LP5562_ENABLE_DEFAULT); + lp5562_wait_enable_done(); + lp5562_stop_engine(chip); + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM); + lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); + lp5562_wait_opmode_done(); + return; + } + + /* + * To run the engine, + * operation mode and enable register should updated at the same time + */ + + ret = lp55xx_read(chip, LP5562_REG_OP_MODE, &mode); + if (ret) + return; + + ret = lp55xx_read(chip, LP5562_REG_ENABLE, &exec); + if (ret) + return; + + /* change operation mode to RUN only when each engine is loading */ + if (LP5562_ENG1_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG1_M) | LP5562_RUN_ENG1; + exec = (exec & ~LP5562_EXEC_ENG1_M) | LP5562_RUN_ENG1; + } + + if (LP5562_ENG2_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG2_M) | LP5562_RUN_ENG2; + exec = (exec & ~LP5562_EXEC_ENG2_M) | LP5562_RUN_ENG2; + } + + if (LP5562_ENG3_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG3_M) | LP5562_RUN_ENG3; + exec = (exec & ~LP5562_EXEC_ENG3_M) | LP5562_RUN_ENG3; + } + + lp55xx_write(chip, LP5562_REG_OP_MODE, mode); + lp5562_wait_opmode_done(); + + lp55xx_update_bits(chip, LP5562_REG_ENABLE, LP5562_EXEC_M, exec); + lp5562_wait_enable_done(); +} + +static int lp5562_update_firmware(struct lp55xx_chip *chip, + const u8 *data, size_t size) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 pattern[LP5562_PROGRAM_LENGTH] = {0}; + u8 addr[] = { + [LP55XX_ENGINE_1] = LP5562_REG_PROG_MEM_ENG1, + [LP55XX_ENGINE_2] = LP5562_REG_PROG_MEM_ENG2, + [LP55XX_ENGINE_3] = LP5562_REG_PROG_MEM_ENG3, + }; + unsigned cmd; + char c[3]; + int program_size; + int nrchars; + int offset = 0; + int ret; + int i; + + /* clear program memory before updating */ + for (i = 0; i < LP5562_PROGRAM_LENGTH; i++) + lp55xx_write(chip, addr[idx] + i, 0); + + i = 0; + while ((offset < size - 1) && (i < LP5562_PROGRAM_LENGTH)) { + /* separate sscanfs because length is working only for %s */ + ret = sscanf(data + offset, "%2s%n ", c, &nrchars); + if (ret != 1) + goto err; + + ret = sscanf(c, "%2x", &cmd); + if (ret != 1) + goto err; + + pattern[i] = (u8)cmd; + offset += nrchars; + i++; + } + + /* Each instruction is 16bit long. Check that length is even */ + if (i % 2) + goto err; + + program_size = i; + for (i = 0; i < program_size; i++) + lp55xx_write(chip, addr[idx] + i, pattern[i]); + + return 0; + +err: + dev_err(&chip->cl->dev, "wrong pattern format\n"); + return -EINVAL; +} + +static void lp5562_firmware_loaded(struct lp55xx_chip *chip) +{ + const struct firmware *fw = chip->fw; + + if (fw->size > LP5562_PROGRAM_LENGTH) { + dev_err(&chip->cl->dev, "firmware data size overflow: %zu\n", + fw->size); + return; + } + + /* + * Program momery sequence + * 1) set engine mode to "LOAD" + * 2) write firmware data into program memory + */ + + lp5562_load_engine(chip); + lp5562_update_firmware(chip, fw->data, fw->size); +} + +static int lp5562_post_init_device(struct lp55xx_chip *chip) +{ + int ret; + u8 update_cfg = chip->pdata->update_config ? : LP5562_DEFAULT_CFG; + + /* Set all PWMs to direct control mode */ + ret = lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); + if (ret) + return ret; + + lp5562_wait_opmode_done(); + + ret = lp55xx_write(chip, LP5562_REG_CONFIG, update_cfg); + if (ret) + return ret; + + /* Initialize all channels PWM to zero -> leds off */ + lp55xx_write(chip, LP5562_REG_R_PWM, 0); + lp55xx_write(chip, LP5562_REG_G_PWM, 0); + lp55xx_write(chip, LP5562_REG_B_PWM, 0); + lp55xx_write(chip, LP5562_REG_W_PWM, 0); + + /* Set LED map as register PWM by default */ + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM); + + return 0; +} + +static void lp5562_led_brightness_work(struct work_struct *work) +{ + struct lp55xx_led *led = container_of(work, struct lp55xx_led, + brightness_work); + struct lp55xx_chip *chip = led->chip; + u8 addr[] = { + LP5562_REG_R_PWM, + LP5562_REG_G_PWM, + LP5562_REG_B_PWM, + LP5562_REG_W_PWM, + }; + + mutex_lock(&chip->lock); + lp55xx_write(chip, addr[led->chan_nr], led->brightness); + mutex_unlock(&chip->lock); +} + +static void lp5562_write_program_memory(struct lp55xx_chip *chip, + u8 base, const u8 *rgb, int size) +{ + int i; + + if (!rgb || size <= 0) + return; + + for (i = 0; i < size; i++) + lp55xx_write(chip, base + i, *(rgb + i)); + + lp55xx_write(chip, base + i, 0); + lp55xx_write(chip, base + i + 1, 0); +} + +/* check the size of program count */ +static inline bool _is_pc_overflow(struct lp55xx_predef_pattern *ptn) +{ + return (ptn->size_r >= LP5562_PROGRAM_LENGTH || + ptn->size_g >= LP5562_PROGRAM_LENGTH || + ptn->size_b >= LP5562_PROGRAM_LENGTH); +} + +static int lp5562_run_predef_led_pattern(struct lp55xx_chip *chip, int mode) +{ + struct lp55xx_predef_pattern *ptn; + int i; + + if (mode == LP5562_PATTERN_OFF) { + lp5562_run_engine(chip, false); + return 0; + } + + ptn = chip->pdata->patterns + (mode - 1); + if (!ptn || _is_pc_overflow(ptn)) { + dev_err(&chip->cl->dev, "invalid pattern data\n"); + return -EINVAL; + } + + lp5562_stop_engine(chip); + + /* Set LED map as RGB */ + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_RGB); + + /* Load engines */ + for (i = LP55XX_ENGINE_1; i <= LP55XX_ENGINE_3; i++) { + chip->engine_idx = i; + lp5562_load_engine(chip); + } + + /* Clear program registers */ + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1 + 1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2 + 1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3 + 1, 0); + + /* Program engines */ + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG1, + ptn->r, ptn->size_r); + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG2, + ptn->g, ptn->size_g); + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG3, + ptn->b, ptn->size_b); + + /* Run engines */ + lp5562_run_engine(chip, true); + + return 0; +} + +static ssize_t lp5562_store_pattern(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_predef_pattern *ptn = chip->pdata->patterns; + int num_patterns = chip->pdata->num_patterns; + unsigned long mode; + int ret; + + ret = kstrtoul(buf, 0, &mode); + if (ret) + return ret; + + if (mode > num_patterns || !ptn) + return -EINVAL; + + mutex_lock(&chip->lock); + ret = lp5562_run_predef_led_pattern(chip, mode); + mutex_unlock(&chip->lock); + + if (ret) + return ret; + + return len; +} + +static ssize_t lp5562_store_engine_mux(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + u8 mask; + u8 val; + + /* LED map + * R ... Engine 1 (fixed) + * G ... Engine 2 (fixed) + * B ... Engine 3 (fixed) + * W ... Engine 1 or 2 or 3 + */ + + if (sysfs_streq(buf, "RGB")) { + mask = LP5562_ENG_FOR_RGB_M; + val = LP5562_ENG_SEL_RGB; + } else if (sysfs_streq(buf, "W")) { + enum lp55xx_engine_index idx = chip->engine_idx; + + mask = LP5562_ENG_FOR_W_M; + switch (idx) { + case LP55XX_ENGINE_1: + val = LP5562_ENG1_FOR_W; + break; + case LP55XX_ENGINE_2: + val = LP5562_ENG2_FOR_W; + break; + case LP55XX_ENGINE_3: + val = LP5562_ENG3_FOR_W; + break; + default: + return -EINVAL; + } + + } else { + dev_err(dev, "choose RGB or W\n"); + return -EINVAL; + } + + mutex_lock(&chip->lock); + lp55xx_update_bits(chip, LP5562_REG_ENG_SEL, mask, val); + mutex_unlock(&chip->lock); + + return len; +} + +static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, lp5562_store_pattern); +static DEVICE_ATTR(engine_mux, S_IWUSR, NULL, lp5562_store_engine_mux); + +static struct attribute *lp5562_attributes[] = { + &dev_attr_led_pattern.attr, + &dev_attr_engine_mux.attr, + NULL, +}; + +static const struct attribute_group lp5562_group = { + .attrs = lp5562_attributes, +}; + +/* Chip specific configurations */ +static struct lp55xx_device_config lp5562_cfg = { + .max_channel = LP5562_MAX_LEDS, + .reset = { + .addr = LP5562_REG_RESET, + .val = LP5562_RESET, + }, + .enable = { + .addr = LP5562_REG_ENABLE, + .val = LP5562_ENABLE_DEFAULT, + }, + .post_init_device = lp5562_post_init_device, + .set_led_current = lp5562_set_led_current, + .brightness_work_fn = lp5562_led_brightness_work, + .run_engine = lp5562_run_engine, + .firmware_cb = lp5562_firmware_loaded, + .dev_attr_group = &lp5562_group, +}; + +static int lp5562_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + struct lp55xx_chip *chip; + struct lp55xx_led *led; + struct lp55xx_platform_data *pdata = client->dev.platform_data; + + if (!pdata) { + dev_err(&client->dev, "no platform data\n"); + return -EINVAL; + } + + chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + led = devm_kzalloc(&client->dev, + sizeof(*led) * pdata->num_channels, GFP_KERNEL); + if (!led) + return -ENOMEM; + + chip->cl = client; + chip->pdata = pdata; + chip->cfg = &lp5562_cfg; + + mutex_init(&chip->lock); + + i2c_set_clientdata(client, led); + + ret = lp55xx_init_device(chip); + if (ret) + goto err_init; + + ret = lp55xx_register_leds(led, chip); + if (ret) + goto err_register_leds; + + ret = lp55xx_register_sysfs(chip); + if (ret) { + dev_err(&client->dev, "registering sysfs failed\n"); + goto err_register_sysfs; + } + + return 0; + +err_register_sysfs: + lp55xx_unregister_leds(led, chip); +err_register_leds: + lp55xx_deinit_device(chip); +err_init: + return ret; +} + +static int lp5562_remove(struct i2c_client *client) +{ + struct lp55xx_led *led = i2c_get_clientdata(client); + struct lp55xx_chip *chip = led->chip; + + lp5562_stop_engine(chip); + + lp55xx_unregister_sysfs(chip); + lp55xx_unregister_leds(led, chip); + lp55xx_deinit_device(chip); + + return 0; +} + +static const struct i2c_device_id lp5562_id[] = { + { "lp5562", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lp5562_id); + +static struct i2c_driver lp5562_driver = { + .driver = { + .name = "lp5562", + }, + .probe = lp5562_probe, + .remove = lp5562_remove, + .id_table = lp5562_id, +}; + +module_i2c_driver(lp5562_driver); + +MODULE_DESCRIPTION("Texas Instruments LP5562 LED Driver"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index d9eb84157423..8a388a4afed7 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -1,5 +1,5 @@ /* - * LP5521/LP5523/LP55231 Common Driver + * LP5521/LP5523/LP55231/LP5562 Common Driver * * Copyright 2012 Texas Instruments * diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h index 1509570d5a3f..1f1041e8b4fc 100644 --- a/include/linux/platform_data/leds-lp55xx.h +++ b/include/linux/platform_data/leds-lp55xx.h @@ -32,6 +32,13 @@ #define LP5521_CLK_INT 1 /* Internal clock */ #define LP5521_CLK_AUTO 2 /* Automatic clock selection */ +/* Bits in LP5562 CONFIG register */ +#define LP5562_PWM_HF LP5521_PWM_HF +#define LP5562_PWRSAVE_EN LP5521_PWRSAVE_EN +#define LP5562_CLK_SRC_EXT LP5521_CLK_SRC_EXT +#define LP5562_CLK_INT LP5521_CLK_INT +#define LP5562_CLK_AUTO LP5521_CLK_AUTO + struct lp55xx_led_config { const char *name; u8 chan_nr; @@ -40,9 +47,9 @@ struct lp55xx_led_config { }; struct lp55xx_predef_pattern { - u8 *r; - u8 *g; - u8 *b; + const u8 *r; + const u8 *g; + const u8 *b; u8 size_r; u8 size_g; u8 size_b; -- cgit From 39f7e08af3fd9ca1cb94a8270354afb2ea5cfcd3 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 14 Mar 2013 04:29:19 -0700 Subject: leds: trigger: use inline functions instead of macros Macros are used in case that an inline function doesn't work. Otherwise, use an empty inline function. (a) Case of !CONFIG_LEDS_TRIGGERS Following macros are replaced with inline functions. led_trigger_register_simple() led_trigger_unregister_simple() led_trigger_event() To make inline types, the structure, 'led_trigger' should be defined. This structure has no member at all. (b) Case of !CONFIG_LEDS_TRIGGER_IDE_DISK ledtrig_ide_activity() macro is replaced with an inline function as well. (c) DEFINE_LED_TRIGGER() and DEFINE_LED_TRIGGER_GLOBAL() Struct 'led_trigger' is defined both cases, with CONFIG_LEDS_TRIGGERS and without CONFIG_LEDS_TRIGGERS. Those macros are moved out of CONFIG_LED_TRIGGERS because of no-dependency on CONFIG_LEDS_TRIGGERS. (d) Fix build errors in mmc-core driver After replacing macros with inline functions, following build errors occur. (condition: CONFIG_LEDS_TRIGGERS is not set) drivers/mmc/core/core.c: In function 'mmc_request_done': drivers/mmc/core/core.c:164:25: error: 'struct mmc_host' has no member named 'led' drivers/mmc/core/core.c: In function 'mmc_start_request': drivers/mmc/core/core.c:254:24: error: 'struct mmc_host' has no member named 'led' make[3]: *** [drivers/mmc/core/core.o] Error 1 The reason of these errors is non-existent member variable, 'led'. It is only valid when CONFIG_LEDS_TRIGGERS is set. But now, it can be used without this dependency. To fix build errors, member 'led' is always used without its config option in 'include/linux/mmc/host.h'. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- include/linux/leds.h | 25 ++++++++++++++----------- include/linux/mmc/host.h | 2 -- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 0d9b5eed714e..2d8c0b4f2f76 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -142,6 +142,10 @@ extern void led_set_brightness(struct led_classdev *led_cdev, /* * LED Triggers */ +/* Registration functions for simple triggers */ +#define DEFINE_LED_TRIGGER(x) static struct led_trigger *x; +#define DEFINE_LED_TRIGGER_GLOBAL(x) struct led_trigger *x; + #ifdef CONFIG_LEDS_TRIGGERS #define TRIG_NAME_MAX 50 @@ -164,9 +168,6 @@ struct led_trigger { extern int led_trigger_register(struct led_trigger *trigger); extern void led_trigger_unregister(struct led_trigger *trigger); -/* Registration functions for simple triggers */ -#define DEFINE_LED_TRIGGER(x) static struct led_trigger *x; -#define DEFINE_LED_TRIGGER_GLOBAL(x) struct led_trigger *x; extern void led_trigger_register_simple(const char *name, struct led_trigger **trigger); extern void led_trigger_unregister_simple(struct led_trigger *trigger); @@ -199,20 +200,22 @@ extern void led_trigger_rename_static(const char *name, #else -/* Triggers aren't active - null macros */ -#define DEFINE_LED_TRIGGER(x) -#define DEFINE_LED_TRIGGER_GLOBAL(x) -#define led_trigger_register_simple(x, y) do {} while(0) -#define led_trigger_unregister_simple(x) do {} while(0) -#define led_trigger_event(x, y) do {} while(0) +/* Trigger has no members */ +struct led_trigger {}; -#endif +/* Trigger inline empty functions */ +static inline void led_trigger_register_simple(const char *name, + struct led_trigger **trigger) {} +static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {} +static inline void led_trigger_event(struct led_trigger *trigger, + enum led_brightness event) {} +#endif /* CONFIG_LEDS_TRIGGERS */ /* Trigger specific functions */ #ifdef CONFIG_LEDS_TRIGGER_IDE_DISK extern void ledtrig_ide_activity(void); #else -#define ledtrig_ide_activity() do {} while(0) +static inline void ledtrig_ide_activity(void) {} #endif /* diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index d6f20cc6415e..357e80efcde0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -341,9 +341,7 @@ struct mmc_host { mmc_pm_flag_t pm_flags; /* requested pm features */ -#ifdef CONFIG_LEDS_TRIGGERS struct led_trigger *led; /* activity led */ -#endif #ifdef CONFIG_REGULATOR bool regulator_enabled; /* regulator state */ -- cgit From 48a1d032c954b9b06c3adbf35ef4735dd70ab757 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 14 Mar 2013 04:29:24 -0700 Subject: leds: add camera LED triggers Some LED devices support flash/torch functionality through the LED subsystem. This patch enables direct LED trigger controls by the driver. Flash on/off and torch on/off can be done simply by other driver space. Two trigger APIs are added, ledtrig_flash_ctrl() and ledtrig_torch_ctrl(). Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- drivers/leds/trigger/Kconfig | 8 +++++ drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-camera.c | 57 +++++++++++++++++++++++++++++++++++ include/linux/leds.h | 8 +++++ 4 files changed, 74 insertions(+) create mode 100644 drivers/leds/trigger/ledtrig-camera.c (limited to 'include/linux') diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index eaa286dc494e..49794b47b51c 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -100,4 +100,12 @@ config LEDS_TRIGGER_TRANSIENT GPIO/PWM based hardware. If unsure, say Y. +config LEDS_TRIGGER_CAMERA + tristate "LED Camera Flash/Torch Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled as a camera flash/torch device. + This enables direct flash/torch on/off by the driver, kernel space. + If unsure, say Y. + endif # LEDS_TRIGGERS diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile index 554e46ee4c24..1abf48dacf7e 100644 --- a/drivers/leds/trigger/Makefile +++ b/drivers/leds/trigger/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o obj-$(CONFIG_LEDS_TRIGGER_CPU) += ledtrig-cpu.o obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o +obj-$(CONFIG_LEDS_TRIGGER_CAMERA) += ledtrig-camera.o diff --git a/drivers/leds/trigger/ledtrig-camera.c b/drivers/leds/trigger/ledtrig-camera.c new file mode 100644 index 000000000000..9bd73a8bad5c --- /dev/null +++ b/drivers/leds/trigger/ledtrig-camera.c @@ -0,0 +1,57 @@ +/* + * Camera Flash and Torch On/Off Trigger + * + * based on ledtrig-ide-disk.c + * + * Copyright 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include + +DEFINE_LED_TRIGGER(ledtrig_flash); +DEFINE_LED_TRIGGER(ledtrig_torch); + +void ledtrig_flash_ctrl(bool on) +{ + enum led_brightness brt = on ? LED_FULL : LED_OFF; + + led_trigger_event(ledtrig_flash, brt); +} +EXPORT_SYMBOL_GPL(ledtrig_flash_ctrl); + +void ledtrig_torch_ctrl(bool on) +{ + enum led_brightness brt = on ? LED_FULL : LED_OFF; + + led_trigger_event(ledtrig_torch, brt); +} +EXPORT_SYMBOL_GPL(ledtrig_torch_ctrl); + +static int __init ledtrig_camera_init(void) +{ + led_trigger_register_simple("flash", &ledtrig_flash); + led_trigger_register_simple("torch", &ledtrig_torch); + return 0; +} +module_init(ledtrig_camera_init); + +static void __exit ledtrig_camera_exit(void) +{ + led_trigger_unregister_simple(ledtrig_torch); + led_trigger_unregister_simple(ledtrig_flash); +} +module_exit(ledtrig_camera_exit); + +MODULE_DESCRIPTION("LED Trigger for Camera Flash/Torch Control"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/leds.h b/include/linux/leds.h index 2d8c0b4f2f76..0287ab296689 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -218,6 +218,14 @@ extern void ledtrig_ide_activity(void); static inline void ledtrig_ide_activity(void) {} #endif +#if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE) +extern void ledtrig_flash_ctrl(bool on); +extern void ledtrig_torch_ctrl(bool on); +#else +static inline void ledtrig_flash_ctrl(bool on) {} +static inline void ledtrig_torch_ctrl(bool on) {} +#endif + /* * Generic LED platform data for describing LED names and default triggers. */ -- cgit From 81f2a5b4a0570a662efd629c176fc1d67e56f7e3 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Wed, 20 Mar 2013 17:37:04 -0700 Subject: leds: lp55xx: configure the clock detection Now LP55xx provides automatic clock detection API, lp55xx_is_extclk_used(). The clock configuration can be done by the driver itself. (a) Concept The default value is set by each driver with clock selection. The internal clock selection bit is updated in case that the external clock is not detected or clock rate is not 32KHz. (b) Change on LP55xx platform data The clock configuration is done automatically, so no need to define 'update_config' in the platform side. Correlated information are removed in the documentations and header. (c) Definitions moved from header to driver files CONFIG register values are moved each driver, LP5521 and LP5562. Not necessary definitions are removed also. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- Documentation/leds/leds-lp5521.txt | 19 ------------------- Documentation/leds/leds-lp5562.txt | 15 --------------- drivers/leds/leds-lp5521.c | 19 +++++++++++++++++-- drivers/leds/leds-lp5562.c | 14 ++++++++++---- include/linux/platform_data/leds-lp55xx.h | 22 ---------------------- 5 files changed, 27 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index 270f57196339..79e4c2e6e5e8 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -81,22 +81,3 @@ static struct lp55xx_platform_data lp5521_platform_data = { If the current is set to 0 in the platform data, that channel is disabled and it is not visible in the sysfs. - -The 'update_config' : CONFIG register (ADDR 08h) -This value is platform-specific data. -If update_config is not defined, the CONFIG register is set with -'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. -(Enable auto-powersave, set charge pump to auto, red to battery) - -example of update_config : - -#define LP5521_CONFIGS (LP5521_PWM_HF | LP5521_PWRSAVE_EN | \ - LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT | \ - LP5521_CLK_INT) - -static struct lp55xx_platform_data lp5521_pdata = { - .led_config = lp5521_led_config, - .num_channels = ARRAY_SIZE(lp5521_led_config), - .clock_mode = LP55XX_CLOCK_INT, - .update_config = LP5521_CONFIGS, -}; diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt index 96061000dd93..5a823ff6b393 100644 --- a/Documentation/leds/leds-lp5562.txt +++ b/Documentation/leds/leds-lp5562.txt @@ -118,18 +118,3 @@ static struct lp55xx_platform_data lp5562_platform_data = { If the current is set to 0 in the platform data, that channel is disabled and it is not visible in the sysfs. - -The 'update_config' : CONFIG register (ADDR 08h) -This value is platform-specific data. -If update_config is not defined, the CONFIG register is set with -'LP5562_PWRSAVE_EN | LP5562_CLK_AUTO'. -(Enable auto-powersave, set automatic clock source selection) - -#define LP5562_CONFIGS (LP5562_PWM_HF | LP5562_PWRSAVE_EN | \ - LP5562_CLK_SRC_EXT) - -static struct lp55xx_platform_data lp5562_pdata = { - .led_config = lp5562_led_config, - .num_channels = ARRAY_SIZE(lp5562_led_config), - .update_config = LP5562_CONFIGS, -}; diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 7f10304219ea..19752c928aa2 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -68,6 +68,18 @@ #define LP5521_ENABLE_RUN_PROGRAM \ (LP5521_ENABLE_DEFAULT | LP5521_EXEC_RUN) +/* CONFIG register */ +#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ +#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ +#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ +#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ +#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ +#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ +#define LP5521_R_TO_BATT 0x04 /* R out: 0 = CP, 1 = Vbat */ +#define LP5521_CLK_INT 0x01 /* Internal clock */ +#define LP5521_DEFAULT_CFG \ + (LP5521_PWM_HF | LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO) + /* Status */ #define LP5521_EXT_CLK_USED 0x08 @@ -296,8 +308,11 @@ static int lp5521_post_init_device(struct lp55xx_chip *chip) /* Set all PWMs to direct control mode */ ret = lp55xx_write(chip, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT); - val = chip->pdata->update_config ? - : (LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT); + /* Update configuration for the clock setting */ + val = LP5521_DEFAULT_CFG; + if (!lp55xx_is_extclk_used(chip)) + val |= LP5521_CLK_INT; + ret = lp55xx_write(chip, LP5521_REG_CONFIG, val); if (ret) return ret; diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c index f8b927788c3a..513f2390ca2d 100644 --- a/drivers/leds/leds-lp5562.c +++ b/drivers/leds/leds-lp5562.c @@ -71,8 +71,10 @@ /* CONFIG Register 08h */ #define LP5562_REG_CONFIG 0x08 -#define LP5562_DEFAULT_CFG \ - (LP5562_PWM_HF | LP5562_PWRSAVE_EN | LP5562_CLK_INT) +#define LP5562_PWM_HF 0x40 +#define LP5562_PWRSAVE_EN 0x20 +#define LP5562_CLK_INT 0x01 /* Internal clock */ +#define LP5562_DEFAULT_CFG (LP5562_PWM_HF | LP5562_PWRSAVE_EN) /* RESET Register 0Dh */ #define LP5562_REG_RESET 0x0D @@ -280,7 +282,7 @@ static void lp5562_firmware_loaded(struct lp55xx_chip *chip) static int lp5562_post_init_device(struct lp55xx_chip *chip) { int ret; - u8 update_cfg = chip->pdata->update_config ? : LP5562_DEFAULT_CFG; + u8 cfg = LP5562_DEFAULT_CFG; /* Set all PWMs to direct control mode */ ret = lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); @@ -289,7 +291,11 @@ static int lp5562_post_init_device(struct lp55xx_chip *chip) lp5562_wait_opmode_done(); - ret = lp55xx_write(chip, LP5562_REG_CONFIG, update_cfg); + /* Update configuration for the clock setting */ + if (!lp55xx_is_extclk_used(chip)) + cfg |= LP5562_CLK_INT; + + ret = lp55xx_write(chip, LP5562_REG_CONFIG, cfg); if (ret) return ret; diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h index 1f1041e8b4fc..202e290faea8 100644 --- a/include/linux/platform_data/leds-lp55xx.h +++ b/include/linux/platform_data/leds-lp55xx.h @@ -20,25 +20,6 @@ #define LP55XX_CLOCK_INT 1 #define LP55XX_CLOCK_EXT 2 -/* Bits in LP5521 CONFIG register. 'update_config' in lp55xx_platform_data */ -#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ -#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ -#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ -#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ -#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ -#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ -#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ -#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ -#define LP5521_CLK_INT 1 /* Internal clock */ -#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ - -/* Bits in LP5562 CONFIG register */ -#define LP5562_PWM_HF LP5521_PWM_HF -#define LP5562_PWRSAVE_EN LP5521_PWRSAVE_EN -#define LP5562_CLK_SRC_EXT LP5521_CLK_SRC_EXT -#define LP5562_CLK_INT LP5521_CLK_INT -#define LP5562_CLK_AUTO LP5521_CLK_AUTO - struct lp55xx_led_config { const char *name; u8 chan_nr; @@ -86,9 +67,6 @@ struct lp55xx_platform_data { /* Predefined pattern data */ struct lp55xx_predef_pattern *patterns; unsigned int num_patterns; - - /* _CONFIG register */ - u8 update_config; }; #endif /* _LEDS_LP55XX_H */ -- cgit From d55262c4d164759a8debe772da6c9b16059dec47 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Apr 2013 11:23:38 -0700 Subject: workqueue: update sysfs interface to reflect NUMA awareness and a kernel param to disable NUMA affinity Unbound workqueues are now NUMA aware. Let's add some control knobs and update sysfs interface accordingly. * Add kernel param workqueue.numa_disable which disables NUMA affinity globally. * Replace sysfs file "pool_id" with "pool_ids" which contain node:pool_id pairs. This change is userland-visible but "pool_id" hasn't seen a release yet, so this is okay. * Add a new sysf files "numa" which can toggle NUMA affinity on individual workqueues. This is implemented as attrs->no_numa whichn is special in that it isn't part of a pool's attributes. It only affects how apply_workqueue_attrs() picks which pools to use. After "pool_ids" change, first_pwq() doesn't have any user left. Removed. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- Documentation/kernel-parameters.txt | 9 ++++ include/linux/workqueue.h | 5 +++ kernel/workqueue.c | 82 ++++++++++++++++++++++++++----------- 3 files changed, 73 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..c75ea0b8ec59 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3222,6 +3222,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. or other driver-specific files in the Documentation/watchdog/ directory. + workqueue.disable_numa + By default, all work items queued to unbound + workqueues are affine to the NUMA nodes they're + issued on, which results in better behavior in + general. If NUMA affinity needs to be disabled for + whatever reason, this option can be used. Note + that this also can be controlled per-workqueue for + workqueues visible under /sys/bus/workqueue/. + x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of default x2apic cluster mode on platforms supporting x2apic. diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 835d12b76960..717975639378 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -119,10 +119,15 @@ struct delayed_work { /* * A struct for workqueue attributes. This can be used to change * attributes of an unbound workqueue. + * + * Unlike other fields, ->no_numa isn't a property of a worker_pool. It + * only modifies how apply_workqueue_attrs() select pools and thus doesn't + * participate in pool hash calculations or equality comparisons. */ struct workqueue_attrs { int nice; /* nice level */ cpumask_var_t cpumask; /* allowed CPUs */ + bool no_numa; /* disable NUMA affinity */ }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 57cd77de4a4f..729ac6a44860 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -268,6 +268,9 @@ static int wq_numa_tbl_len; /* highest possible NUMA node id + 1 */ static cpumask_var_t *wq_numa_possible_cpumask; /* possible CPUs of each node */ +static bool wq_disable_numa; +module_param_named(disable_numa, wq_disable_numa, bool, 0444); + static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */ @@ -516,21 +519,6 @@ static int worker_pool_assign_id(struct worker_pool *pool) return ret; } -/** - * first_pwq - return the first pool_workqueue of the specified workqueue - * @wq: the target workqueue - * - * This must be called either with wq->mutex held or sched RCU read locked. - * If the pwq needs to be used beyond the locking in effect, the caller is - * responsible for guaranteeing that the pwq stays online. - */ -static struct pool_workqueue *first_pwq(struct workqueue_struct *wq) -{ - assert_rcu_or_wq_mutex(wq); - return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue, - pwqs_node); -} - /** * unbound_pwq_by_node - return the unbound pool_workqueue for the given node * @wq: the target workqueue @@ -3114,16 +3102,21 @@ static struct device_attribute wq_sysfs_attrs[] = { __ATTR_NULL, }; -static ssize_t wq_pool_id_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wq_pool_ids_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct workqueue_struct *wq = dev_to_wq(dev); - struct worker_pool *pool; - int written; + const char *delim = ""; + int node, written = 0; rcu_read_lock_sched(); - pool = first_pwq(wq)->pool; - written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id); + for_each_node(node) { + written += scnprintf(buf + written, PAGE_SIZE - written, + "%s%d:%d", delim, node, + unbound_pwq_by_node(wq, node)->pool->id); + delim = " "; + } + written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); rcu_read_unlock_sched(); return written; @@ -3212,10 +3205,46 @@ static ssize_t wq_cpumask_store(struct device *dev, return ret ?: count; } +static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + int written; + + mutex_lock(&wq->mutex); + written = scnprintf(buf, PAGE_SIZE, "%d\n", + !wq->unbound_attrs->no_numa); + mutex_unlock(&wq->mutex); + + return written; +} + +static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + struct workqueue_attrs *attrs; + int v, ret; + + attrs = wq_sysfs_prep_attrs(wq); + if (!attrs) + return -ENOMEM; + + ret = -EINVAL; + if (sscanf(buf, "%d", &v) == 1) { + attrs->no_numa = !v; + ret = apply_workqueue_attrs(wq, attrs); + } + + free_workqueue_attrs(attrs); + return ret ?: count; +} + static struct device_attribute wq_sysfs_unbound_attrs[] = { - __ATTR(pool_id, 0444, wq_pool_id_show, NULL), + __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL), __ATTR(nice, 0644, wq_nice_show, wq_nice_store), __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), + __ATTR(numa, 0644, wq_numa_show, wq_numa_store), __ATTR_NULL, }; @@ -3750,7 +3779,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq) static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node, int cpu_going_down, cpumask_t *cpumask) { - if (!wq_numa_enabled) + if (!wq_numa_enabled || attrs->no_numa) goto use_dfl; /* does @node have any online CPUs @attrs wants? */ @@ -3951,6 +3980,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, cpumask = target_attrs->cpumask; mutex_lock(&wq->mutex); + if (wq->unbound_attrs->no_numa) + goto out_unlock; copy_workqueue_attrs(target_attrs, wq->unbound_attrs); pwq = unbound_pwq_by_node(wq, node); @@ -4763,6 +4794,11 @@ static void __init wq_numa_init(void) if (num_possible_nodes() <= 1) return; + if (wq_disable_numa) { + pr_info("workqueue: NUMA affinity support disabled\n"); + return; + } + wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL); BUG_ON(!wq_update_unbound_numa_attrs_buf); -- cgit From 253b5374f08f3908cc380c5665470a5b7609be1c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Dec 2012 13:14:09 +0900 Subject: mfd: wm5102: Add registers for microphone detection level configuration Signed-off-by: Mark Brown --- drivers/mfd/wm5102-tables.c | 8 ++++++++ include/linux/mfd/arizona/registers.h | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index a433f580aa4c..ca2aed6bc830 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -331,6 +331,10 @@ static const struct reg_default wm5102_reg_default[] = { { 0x000002A3, 0x1102 }, /* R675 - Mic Detect 1 */ { 0x000002A4, 0x009F }, /* R676 - Mic Detect 2 */ { 0x000002A5, 0x0000 }, /* R677 - Mic Detect 3 */ + { 0x000002A6, 0x3737 }, /* R678 - Mic Detect Level 1 */ + { 0x000002A7, 0x372C }, /* R679 - Mic Detect Level 2 */ + { 0x000002A8, 0x1422 }, /* R680 - Mic Detect Level 3 */ + { 0x000002A9, 0x030A }, /* R681 - Mic Detect Level 4 */ { 0x000002C3, 0x0000 }, /* R707 - Mic noise mix control 1 */ { 0x000002CB, 0x0000 }, /* R715 - Isolation control */ { 0x000002D3, 0x0000 }, /* R723 - Jack detect analogue */ @@ -1090,6 +1094,10 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg) case ARIZONA_MIC_DETECT_1: case ARIZONA_MIC_DETECT_2: case ARIZONA_MIC_DETECT_3: + case ARIZONA_MIC_DETECT_LEVEL_1: + case ARIZONA_MIC_DETECT_LEVEL_2: + case ARIZONA_MIC_DETECT_LEVEL_3: + case ARIZONA_MIC_DETECT_LEVEL_4: case ARIZONA_MIC_NOISE_MIX_CONTROL_1: case ARIZONA_ISOLATION_CONTROL: case ARIZONA_JACK_DETECT_ANALOGUE: diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index 340355136069..f43aa7c8d040 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -124,6 +124,10 @@ #define ARIZONA_MIC_DETECT_1 0x2A3 #define ARIZONA_MIC_DETECT_2 0x2A4 #define ARIZONA_MIC_DETECT_3 0x2A5 +#define ARIZONA_MIC_DETECT_LEVEL_1 0x2A6 +#define ARIZONA_MIC_DETECT_LEVEL_2 0x2A7 +#define ARIZONA_MIC_DETECT_LEVEL_3 0x2A8 +#define ARIZONA_MIC_DETECT_LEVEL_4 0x2A9 #define ARIZONA_MIC_NOISE_MIX_CONTROL_1 0x2C3 #define ARIZONA_ISOLATION_CONTROL 0x2CB #define ARIZONA_JACK_DETECT_ANALOGUE 0x2D3 -- cgit From 932bc4d7a53ba418de67fdab533248df5b36c752 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:57:58 +0200 Subject: net: add skb_dst_set_noref_force Rename skb_dst_set_noref to __skb_dst_set_noref and add force flag as suggested by David Miller. The new wrapper skb_dst_set_noref_force will force dst entries that are not cached to be attached as skb dst without taking reference as long as provided dst is reclaimed after RCU grace period. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Acked-by: David S. Miller Signed-off-by: Simon Horman --- include/linux/skbuff.h | 35 ++++++++++++++++++++++++++++++++++- net/core/dst.c | 9 +++++---- 2 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 878e0ee81068..364e2440a7ee 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst); +extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, + bool force); + +/** + * skb_dst_set_noref - sets skb dst, hopefully, without taking reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst. + * If dst entry is cached, we do not take reference and dst_release + * will be avoided by refdst_drop. If dst entry is not cached, we take + * reference, so that last dst_release can destroy the dst immediately. + */ +static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +{ + __skb_dst_set_noref(skb, dst, false); +} + +/** + * skb_dst_set_noref_force - sets skb dst, without taking reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst. + * No reference is taken and no dst_release will be called. While for + * cached dsts deferred reclaim is a basic feature, for entries that are + * not cached it is caller's job to guarantee that last dst_release for + * provided dst happens when nobody uses it, eg. after a RCU grace period. + */ +static inline void skb_dst_set_noref_force(struct sk_buff *skb, + struct dst_entry *dst) +{ + __skb_dst_set_noref(skb, dst, true); +} /** * skb_dst_is_noref - Test if skb dst isn't refcounted diff --git a/net/core/dst.c b/net/core/dst.c index 35fd12f1a69c..df9cc810ec8e 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) EXPORT_SYMBOL(__dst_destroy_metrics_generic); /** - * skb_dst_set_noref - sets skb dst, without a reference + * __skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer * @dst: dst entry + * @force: if force is set, use noref version even for DST_NOCACHE entries * * Sets skb dst, assuming a reference was not taken on dst * skb_dst_drop() should not dst_release() this dst */ -void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - if (unlikely(dst->flags & DST_NOCACHE)) { + if (unlikely((dst->flags & DST_NOCACHE) && !force)) { dst_hold(dst); skb_dst_set(skb, dst); } else { skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } } -EXPORT_SYMBOL(skb_dst_set_noref); +EXPORT_SYMBOL(__skb_dst_set_noref); /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat -- cgit From 094f7b69ea738d7d619cba449d2af97159949459 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 1 Apr 2013 08:14:24 -0400 Subject: selinux: make security_sb_clone_mnt_opts return an error on context mismatch I had the following problem reported a while back. If you mount the same filesystem twice using NFSv4 with different contexts, then the second context= option is ignored. For instance: # mount server:/export /mnt/test1 # mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0 # ls -dZ /mnt/test1 drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1 # ls -dZ /mnt/test2 drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2 When we call into SELinux to set the context of a "cloned" superblock, it will currently just bail out when it notices that we're reusing an existing superblock. Since the existing superblock is already set up and presumably in use, we can't go overwriting its context with the one from the "original" sb. Because of this, the second context= option in this case cannot take effect. This patch fixes this by turning security_sb_clone_mnt_opts into an int return operation. When it finds that the "new" superblock that it has been handed is already set up, it checks to see whether the contexts on the old superblock match it. If it does, then it will just return success, otherwise it'll return -EBUSY and emit a printk to tell the admin why the second mount failed. Note that this patch may cause casualties. The NFSv4 code relies on being able to walk down to an export from the pseudoroot. If you mount filesystems that are nested within one another with different contexts, then this patch will make those mounts fail in new and "exciting" ways. For instance, suppose that /export is a separate filesystem on the server: # mount server:/ /mnt/test1 # mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0 mount.nfs: an incorrect mount option was specified ...with the printk in the ring buffer. Because we *might* eventually walk down to /mnt/test1/export, the mount is denied due to this patch. The second mount needs the pseudoroot superblock, but that's already present with the wrong context. OTOH, if we mount these in the reverse order, then both mounts work, because the pseudoroot superblock created when mounting /export is discarded once that mount is done. If we then however try to walk into that directory, the automount fails for the similar reasons: # cd /mnt/test1/scratch/ -bash: cd: /mnt/test1/scratch: Device or resource busy The story I've gotten from the SELinux folks that I've talked to is that this is desirable behavior. In SELinux-land, mounting the same data under different contexts is wrong -- there can be only one. Cc: Steve Dickson Cc: Stephen Smalley Signed-off-by: Jeff Layton Acked-by: Eric Paris Signed-off-by: James Morris --- fs/nfs/super.c | 3 +-- include/linux/security.h | 10 ++++++---- security/capability.c | 3 ++- security/security.c | 4 ++-- security/selinux/hooks.c | 39 +++++++++++++++++++++++++++++++++++---- 5 files changed, 46 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 95cdcb208dfb..6b4bf7622280 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2380,10 +2380,9 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) { /* clone any lsm security options from the parent to the new sb */ - security_sb_clone_mnt_opts(mount_info->cloned->sb, s); if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) return -ESTALE; - return 0; + return security_sb_clone_mnt_opts(mount_info->cloned->sb, s); } EXPORT_SYMBOL_GPL(nfs_clone_sb_security); diff --git a/include/linux/security.h b/include/linux/security.h index eee7478cda70..4c7058dc5514 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1436,7 +1436,7 @@ struct security_operations { struct path *new_path); int (*sb_set_mnt_opts) (struct super_block *sb, struct security_mnt_opts *opts); - void (*sb_clone_mnt_opts) (const struct super_block *oldsb, + int (*sb_clone_mnt_opts) (const struct super_block *oldsb, struct super_block *newsb); int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); @@ -1721,7 +1721,7 @@ int security_sb_mount(const char *dev_name, struct path *path, int security_sb_umount(struct vfsmount *mnt, int flags); int security_sb_pivotroot(struct path *old_path, struct path *new_path); int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts); -void security_sb_clone_mnt_opts(const struct super_block *oldsb, +int security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb); int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts); @@ -2011,9 +2011,11 @@ static inline int security_sb_set_mnt_opts(struct super_block *sb, return 0; } -static inline void security_sb_clone_mnt_opts(const struct super_block *oldsb, +static inline int security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb) -{ } +{ + return 0; +} static inline int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) { diff --git a/security/capability.c b/security/capability.c index 579775088967..a6290b625be9 100644 --- a/security/capability.c +++ b/security/capability.c @@ -98,9 +98,10 @@ static int cap_sb_set_mnt_opts(struct super_block *sb, return 0; } -static void cap_sb_clone_mnt_opts(const struct super_block *oldsb, +static int cap_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb) { + return 0; } static int cap_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) diff --git a/security/security.c b/security/security.c index 7b88c6aeaed4..108281d2307a 100644 --- a/security/security.c +++ b/security/security.c @@ -299,10 +299,10 @@ int security_sb_set_mnt_opts(struct super_block *sb, } EXPORT_SYMBOL(security_sb_set_mnt_opts); -void security_sb_clone_mnt_opts(const struct super_block *oldsb, +int security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb) { - security_ops->sb_clone_mnt_opts(oldsb, newsb); + return security_ops->sb_clone_mnt_opts(oldsb, newsb); } EXPORT_SYMBOL(security_sb_clone_mnt_opts); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2fa28c88900c..3c02be3f6732 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -750,7 +750,37 @@ out_double_mount: goto out; } -static void selinux_sb_clone_mnt_opts(const struct super_block *oldsb, +static int selinux_cmp_sb_context(const struct super_block *oldsb, + const struct super_block *newsb) +{ + struct superblock_security_struct *old = oldsb->s_security; + struct superblock_security_struct *new = newsb->s_security; + char oldflags = old->flags & SE_MNTMASK; + char newflags = new->flags & SE_MNTMASK; + + if (oldflags != newflags) + goto mismatch; + if ((oldflags & FSCONTEXT_MNT) && old->sid != new->sid) + goto mismatch; + if ((oldflags & CONTEXT_MNT) && old->mntpoint_sid != new->mntpoint_sid) + goto mismatch; + if ((oldflags & DEFCONTEXT_MNT) && old->def_sid != new->def_sid) + goto mismatch; + if (oldflags & ROOTCONTEXT_MNT) { + struct inode_security_struct *oldroot = oldsb->s_root->d_inode->i_security; + struct inode_security_struct *newroot = newsb->s_root->d_inode->i_security; + if (oldroot->sid != newroot->sid) + goto mismatch; + } + return 0; +mismatch: + printk(KERN_WARNING "SELinux: mount invalid. Same superblock, " + "different security settings for (dev %s, " + "type %s)\n", newsb->s_id, newsb->s_type->name); + return -EBUSY; +} + +static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb) { const struct superblock_security_struct *oldsbsec = oldsb->s_security; @@ -765,14 +795,14 @@ static void selinux_sb_clone_mnt_opts(const struct super_block *oldsb, * mount options. thus we can safely deal with this superblock later */ if (!ss_initialized) - return; + return 0; /* how can we clone if the old one wasn't set up?? */ BUG_ON(!(oldsbsec->flags & SE_SBINITIALIZED)); - /* if fs is reusing a sb, just let its options stand... */ + /* if fs is reusing a sb, make sure that the contexts match */ if (newsbsec->flags & SE_SBINITIALIZED) - return; + return selinux_cmp_sb_context(oldsb, newsb); mutex_lock(&newsbsec->lock); @@ -805,6 +835,7 @@ static void selinux_sb_clone_mnt_opts(const struct super_block *oldsb, sb_finish_set_opts(newsb); mutex_unlock(&newsbsec->lock); + return 0; } static int selinux_parse_opts_str(char *options, -- cgit From 181387da2d64c3129e5b5186c4dd388bc5041d53 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Apr 2013 19:08:06 -0700 Subject: writeback: remove unused bdi_pending_list There's no user left. Remove it. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Fengguang Wu --- include/linux/backing-dev.h | 1 - mm/backing-dev.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 350459910fe1..a5ef27f5411a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -130,7 +130,6 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; -extern struct list_head bdi_pending_list; static inline int wb_has_dirty_io(struct bdi_writeback *wb) { diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 41733c5dc820..657569b3fcf6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -31,13 +31,11 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; /* - * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as - * reader side protection for bdi_pending_list. bdi_list has RCU reader side + * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side * locking. */ DEFINE_SPINLOCK(bdi_lock); LIST_HEAD(bdi_list); -LIST_HEAD(bdi_pending_list); void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { -- cgit From 839a8e8660b6777e7fe4e80af1a048aebe2b5977 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Apr 2013 19:08:06 -0700 Subject: writeback: replace custom worker pool implementation with unbound workqueue Writeback implements its own worker pool - each bdi can be associated with a worker thread which is created and destroyed dynamically. The worker thread for the default bdi is always present and serves as the "forker" thread which forks off worker threads for other bdis. there's no reason for writeback to implement its own worker pool when using unbound workqueue instead is much simpler and more efficient. This patch replaces custom worker pool implementation in writeback with an unbound workqueue. The conversion isn't too complicated but the followings are worth mentioning. * bdi_writeback->last_active, task and wakeup_timer are removed. delayed_work ->dwork is added instead. Explicit timer handling is no longer necessary. Everything works by either queueing / modding / flushing / canceling the delayed_work item. * bdi_writeback_thread() becomes bdi_writeback_workfn() which runs off bdi_writeback->dwork. On each execution, it processes bdi->work_list and reschedules itself if there are more things to do. The function also handles low-mem condition, which used to be handled by the forker thread. If the function is running off a rescuer thread, it only writes out limited number of pages so that the rescuer can serve other bdis too. This preserves the flusher creation failure behavior of the forker thread. * INIT_LIST_HEAD(&bdi->bdi_list) is used to tell bdi_writeback_workfn() about on-going bdi unregistration so that it always drains work_list even if it's running off the rescuer. Note that the original code was broken in this regard. Under memory pressure, a bdi could finish unregistration with non-empty work_list. * The default bdi is no longer special. It now is treated the same as any other bdi and bdi_cap_flush_forker() is removed. * BDI_pending is no longer used. Removed. * Some tracepoints become non-applicable. The following TPs are removed - writeback_nothread, writeback_wake_thread, writeback_wake_forker_thread, writeback_thread_start, writeback_thread_stop. Everything, including devices coming and going away and rescuer operation under simulated memory pressure, seems to work fine in my test setup. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Fengguang Wu Cc: Jeff Moyer --- fs/fs-writeback.c | 102 +++++----------- include/linux/backing-dev.h | 15 +-- include/trace/events/writeback.h | 5 - mm/backing-dev.c | 255 +++++---------------------------------- 4 files changed, 65 insertions(+), 312 deletions(-) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 21f46fb3a101..8067d3719e94 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head) #define CREATE_TRACE_POINTS #include -/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ -static void bdi_wakeup_flusher(struct backing_dev_info *bdi) -{ - if (bdi->wb.task) { - wake_up_process(bdi->wb.task); - } else { - /* - * The bdi thread isn't there, wake up the forker thread which - * will create and run it. - */ - wake_up_process(default_backing_dev_info.wb.task); - } -} - static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { @@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, spin_lock_bh(&bdi->wb_lock); list_add_tail(&work->list, &bdi->work_list); - if (!bdi->wb.task) - trace_writeback_nothread(bdi, work); - bdi_wakeup_flusher(bdi); spin_unlock_bh(&bdi->wb_lock); + + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); } static void @@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, */ work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { - if (bdi->wb.task) { - trace_writeback_nowork(bdi); - wake_up_process(bdi->wb.task); - } + trace_writeback_nowork(bdi); + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); return; } @@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) * writeback as soon as there is no other work to do. */ trace_writeback_wake_background(bdi); - spin_lock_bh(&bdi->wb_lock); - bdi_wakeup_flusher(bdi); - spin_unlock_bh(&bdi->wb_lock); + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); } /* @@ -1020,66 +1000,48 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) /* * Handle writeback of dirty data for the device backed by this bdi. Also - * wakes up periodically and does kupdated style flushing. + * reschedules periodically and does kupdated style flushing. */ -int bdi_writeback_thread(void *data) +void bdi_writeback_workfn(struct work_struct *work) { - struct bdi_writeback *wb = data; + struct bdi_writeback *wb = container_of(to_delayed_work(work), + struct bdi_writeback, dwork); struct backing_dev_info *bdi = wb->bdi; long pages_written; current->flags |= PF_SWAPWRITE; - set_freezable(); - wb->last_active = jiffies; - - /* - * Our parent may run at a different priority, just set us to normal - */ - set_user_nice(current, 0); - - trace_writeback_thread_start(bdi); - while (!kthread_freezable_should_stop(NULL)) { + if (likely(!current_is_workqueue_rescuer() || + list_empty(&bdi->bdi_list))) { /* - * Remove own delayed wake-up timer, since we are already awake - * and we'll take care of the periodic write-back. + * The normal path. Keep writing back @bdi until its + * work_list is empty. Note that this path is also taken + * if @bdi is shutting down even when we're running off the + * rescuer as work_list needs to be drained. */ - del_timer(&wb->wakeup_timer); - - pages_written = wb_do_writeback(wb, 0); - + do { + pages_written = wb_do_writeback(wb, 0); + trace_writeback_pages_written(pages_written); + } while (!list_empty(&bdi->work_list)); + } else { + /* + * bdi_wq can't get enough workers and we're running off + * the emergency worker. Don't hog it. Hopefully, 1024 is + * enough for efficient IO. + */ + pages_written = writeback_inodes_wb(&bdi->wb, 1024, + WB_REASON_FORKER_THREAD); trace_writeback_pages_written(pages_written); - - if (pages_written) - wb->last_active = jiffies; - - set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&bdi->work_list) || kthread_should_stop()) { - __set_current_state(TASK_RUNNING); - continue; - } - - if (wb_has_dirty_io(wb) && dirty_writeback_interval) - schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); - else { - /* - * We have nothing to do, so can go sleep without any - * timeout and save power. When a work is queued or - * something is made dirty - we will be woken up. - */ - schedule(); - } } - /* Flush any work that raced with us exiting */ - if (!list_empty(&bdi->work_list)) - wb_do_writeback(wb, 1); + if (!list_empty(&bdi->work_list) || + (wb_has_dirty_io(wb) && dirty_writeback_interval)) + queue_delayed_work(bdi_wq, &wb->dwork, + msecs_to_jiffies(dirty_writeback_interval * 10)); - trace_writeback_thread_stop(bdi); - return 0; + current->flags &= ~PF_SWAPWRITE; } - /* * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back * the whole world. diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a5ef27f5411a..c3881553f7d1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,6 +18,7 @@ #include #include #include +#include struct page; struct device; @@ -27,7 +28,6 @@ struct dentry; * Bits in backing_dev_info.state */ enum bdi_state { - BDI_pending, /* On its way to being activated */ BDI_wb_alloc, /* Default embedded wb allocated */ BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ @@ -53,10 +53,8 @@ struct bdi_writeback { unsigned int nr; unsigned long last_old_flush; /* last old data flush */ - unsigned long last_active; /* last time bdi thread was active */ - struct task_struct *task; /* writeback thread */ - struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ + struct delayed_work dwork; /* work item used for writeback */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ @@ -123,7 +121,7 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); -int bdi_writeback_thread(void *data); +void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); @@ -131,6 +129,8 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; +extern struct workqueue_struct *bdi_wq; + static inline int wb_has_dirty_io(struct bdi_writeback *wb) { return !list_empty(&wb->b_dirty) || @@ -335,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) return bdi->capabilities & BDI_CAP_SWAP_BACKED; } -static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi) -{ - return bdi == &default_backing_dev_info; -} - static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { return bdi_cap_writeback_dirty(mapping->backing_dev_info); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6a16fd2e70ed..464ea82e10db 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class, DEFINE_EVENT(writeback_work_class, name, \ TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ TP_ARGS(bdi, work)) -DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); DEFINE_WRITEBACK_WORK_EVENT(writeback_start); @@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \ DEFINE_WRITEBACK_EVENT(writeback_nowork); DEFINE_WRITEBACK_EVENT(writeback_wake_background); -DEFINE_WRITEBACK_EVENT(writeback_wake_thread); -DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); DEFINE_WRITEBACK_EVENT(writeback_bdi_register); DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); -DEFINE_WRITEBACK_EVENT(writeback_thread_start); -DEFINE_WRITEBACK_EVENT(writeback_thread_stop); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 657569b3fcf6..2857d4f6bca4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -37,6 +37,9 @@ static struct class *bdi_class; DEFINE_SPINLOCK(bdi_lock); LIST_HEAD(bdi_list); +/* bdi_wq serves all asynchronous writeback tasks */ +struct workqueue_struct *bdi_wq; + void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { @@ -255,6 +258,11 @@ static int __init default_bdi_init(void) { int err; + bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | + WQ_UNBOUND, 0); + if (!bdi_wq) + return -ENOMEM; + err = bdi_init(&default_backing_dev_info); if (!err) bdi_register(&default_backing_dev_info, NULL, "default"); @@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) return wb_has_dirty_io(&bdi->wb); } -static void wakeup_timer_fn(unsigned long data) -{ - struct backing_dev_info *bdi = (struct backing_dev_info *)data; - - spin_lock_bh(&bdi->wb_lock); - if (bdi->wb.task) { - trace_writeback_wake_thread(bdi); - wake_up_process(bdi->wb.task); - } else if (bdi->dev) { - /* - * When bdi tasks are inactive for long time, they are killed. - * In this case we have to wake-up the forker thread which - * should create and run the bdi thread. - */ - trace_writeback_wake_forker_thread(bdi); - wake_up_process(default_backing_dev_info.wb.task); - } - spin_unlock_bh(&bdi->wb_lock); -} - /* * This function is used when the first inode for this bdi is marked dirty. It * wakes-up the corresponding bdi thread which should then take care of the @@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); -} - -/* - * Calculate the longest interval (jiffies) bdi threads are allowed to be - * inactive. - */ -static unsigned long bdi_longest_inactive(void) -{ - unsigned long interval; - - interval = msecs_to_jiffies(dirty_writeback_interval * 10); - return max(5UL * 60 * HZ, interval); -} - -/* - * Clear pending bit and wakeup anybody waiting for flusher thread creation or - * shutdown - */ -static void bdi_clear_pending(struct backing_dev_info *bdi) -{ - clear_bit(BDI_pending, &bdi->state); - smp_mb__after_clear_bit(); - wake_up_bit(&bdi->state, BDI_pending); -} - -static int bdi_forker_thread(void *ptr) -{ - struct bdi_writeback *me = ptr; - - current->flags |= PF_SWAPWRITE; - set_freezable(); - - /* - * Our parent may run at a different priority, just set us to normal - */ - set_user_nice(current, 0); - - for (;;) { - struct task_struct *task = NULL; - struct backing_dev_info *bdi; - enum { - NO_ACTION, /* Nothing to do */ - FORK_THREAD, /* Fork bdi thread */ - KILL_THREAD, /* Kill inactive bdi thread */ - } action = NO_ACTION; - - /* - * Temporary measure, we want to make sure we don't see - * dirty data on the default backing_dev_info - */ - if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { - del_timer(&me->wakeup_timer); - wb_do_writeback(me, 0); - } - - spin_lock_bh(&bdi_lock); - /* - * In the following loop we are going to check whether we have - * some work to do without any synchronization with tasks - * waking us up to do work for them. Set the task state here - * so that we don't miss wakeups after verifying conditions. - */ - set_current_state(TASK_INTERRUPTIBLE); - - list_for_each_entry(bdi, &bdi_list, bdi_list) { - bool have_dirty_io; - - if (!bdi_cap_writeback_dirty(bdi) || - bdi_cap_flush_forker(bdi)) - continue; - - WARN(!test_bit(BDI_registered, &bdi->state), - "bdi %p/%s is not registered!\n", bdi, bdi->name); - - have_dirty_io = !list_empty(&bdi->work_list) || - wb_has_dirty_io(&bdi->wb); - - /* - * If the bdi has work to do, but the thread does not - * exist - create it. - */ - if (!bdi->wb.task && have_dirty_io) { - /* - * Set the pending bit - if someone will try to - * unregister this bdi - it'll wait on this bit. - */ - set_bit(BDI_pending, &bdi->state); - action = FORK_THREAD; - break; - } - - spin_lock(&bdi->wb_lock); - - /* - * If there is no work to do and the bdi thread was - * inactive long enough - kill it. The wb_lock is taken - * to make sure no-one adds more work to this bdi and - * wakes the bdi thread up. - */ - if (bdi->wb.task && !have_dirty_io && - time_after(jiffies, bdi->wb.last_active + - bdi_longest_inactive())) { - task = bdi->wb.task; - bdi->wb.task = NULL; - spin_unlock(&bdi->wb_lock); - set_bit(BDI_pending, &bdi->state); - action = KILL_THREAD; - break; - } - spin_unlock(&bdi->wb_lock); - } - spin_unlock_bh(&bdi_lock); - - /* Keep working if default bdi still has things to do */ - if (!list_empty(&me->bdi->work_list)) - __set_current_state(TASK_RUNNING); - - switch (action) { - case FORK_THREAD: - __set_current_state(TASK_RUNNING); - task = kthread_create(bdi_writeback_thread, &bdi->wb, - "flush-%s", dev_name(bdi->dev)); - if (IS_ERR(task)) { - /* - * If thread creation fails, force writeout of - * the bdi from the thread. Hopefully 1024 is - * large enough for efficient IO. - */ - writeback_inodes_wb(&bdi->wb, 1024, - WB_REASON_FORKER_THREAD); - } else { - /* - * The spinlock makes sure we do not lose - * wake-ups when racing with 'bdi_queue_work()'. - * And as soon as the bdi thread is visible, we - * can start it. - */ - spin_lock_bh(&bdi->wb_lock); - bdi->wb.task = task; - spin_unlock_bh(&bdi->wb_lock); - wake_up_process(task); - } - bdi_clear_pending(bdi); - break; - - case KILL_THREAD: - __set_current_state(TASK_RUNNING); - kthread_stop(task); - bdi_clear_pending(bdi); - break; - - case NO_ACTION: - if (!wb_has_dirty_io(me) || !dirty_writeback_interval) - /* - * There are no dirty data. The only thing we - * should now care about is checking for - * inactive bdi threads and killing them. Thus, - * let's sleep for longer time, save energy and - * be friendly for battery-driven devices. - */ - schedule_timeout(bdi_longest_inactive()); - else - schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); - try_to_freeze(); - break; - } - } - - return 0; + mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); } /* @@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) spin_unlock_bh(&bdi_lock); synchronize_rcu_expedited(); + + /* bdi_list is now unused, clear it to mark @bdi dying */ + INIT_LIST_HEAD(&bdi->bdi_list); } int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, bdi->dev = dev; - /* - * Just start the forker thread for our default backing_dev_info, - * and add other bdi's to the list. They will get a thread created - * on-demand when they need it. - */ - if (bdi_cap_flush_forker(bdi)) { - struct bdi_writeback *wb = &bdi->wb; - - wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", - dev_name(dev)); - if (IS_ERR(wb->task)) - return PTR_ERR(wb->task); - } - bdi_debug_register(bdi, dev_name(dev)); set_bit(BDI_registered, &bdi->state); @@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev); */ static void bdi_wb_shutdown(struct backing_dev_info *bdi) { - struct task_struct *task; - if (!bdi_cap_writeback_dirty(bdi)) return; @@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) bdi_remove_from_list(bdi); /* - * If setup is pending, wait for that to complete first + * Drain work list and shutdown the delayed_work. At this point, + * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi + * is dying and its work_list needs to be drained no matter what. */ - wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, - TASK_UNINTERRUPTIBLE); + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + flush_delayed_work(&bdi->wb.dwork); + WARN_ON(!list_empty(&bdi->work_list)); /* - * Finally, kill the kernel thread. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. + * This shouldn't be necessary unless @bdi for some reason has + * unflushed dirty IO after work_list is drained. Do it anyway + * just in case. */ - spin_lock_bh(&bdi->wb_lock); - task = bdi->wb.task; - bdi->wb.task = NULL; - spin_unlock_bh(&bdi->wb_lock); - - if (task) - kthread_stop(task); + cancel_delayed_work_sync(&bdi->wb.dwork); } /* @@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi) bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); - del_timer_sync(&bdi->wb.wakeup_timer); - if (!bdi_cap_flush_forker(bdi)) - bdi_wb_shutdown(bdi); + bdi_wb_shutdown(bdi); bdi_debug_unregister(bdi); spin_lock_bh(&bdi->wb_lock); @@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) INIT_LIST_HEAD(&wb->b_io); INIT_LIST_HEAD(&wb->b_more_io); spin_lock_init(&wb->list_lock); - setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); + INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); } /* @@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); /* - * If bdi_unregister() had already been called earlier, the - * wakeup_timer could still be armed because bdi_prune_sb() - * can race with the bdi_wakeup_thread_delayed() calls from - * __mark_inode_dirty(). + * If bdi_unregister() had already been called earlier, the dwork + * could still be pending because bdi_prune_sb() can race with the + * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). */ - del_timer_sync(&bdi->wb.wakeup_timer); + cancel_delayed_work_sync(&bdi->wb.dwork); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); -- cgit From 19baba4cb6843bbe3dfde87e1e913f6a9cd27da9 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 9 Mar 2013 08:16:44 +0000 Subject: i2c: Remove detach_adapter The detach_adapter callback has been deprecated for quite some time and has no user left. Keeping it alive blocks other cleanups, so remove it. Signed-off-by: Lars-Peter Clausen Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 33 ++++++++++----------------------- include/linux/i2c.h | 7 ++----- 2 files changed, 12 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 0d873ba2e82e..f7cd05b4f327 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -49,7 +49,7 @@ /* core_lock protects i2c_adapter_idr, and guarantees that device detection, deletion of detected devices, and attach_adapter - and detach_adapter calls are serialized */ + calls are serialized */ static DEFINE_MUTEX(core_lock); static DEFINE_IDR(i2c_adapter_idr); @@ -1172,11 +1172,10 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap) } EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter); -static int i2c_do_del_adapter(struct i2c_driver *driver, +static void i2c_do_del_adapter(struct i2c_driver *driver, struct i2c_adapter *adapter) { struct i2c_client *client, *_n; - int res; /* Remove the devices we created ourselves as the result of hardware * probing (using a driver's detect method) */ @@ -1188,16 +1187,6 @@ static int i2c_do_del_adapter(struct i2c_driver *driver, i2c_unregister_device(client); } } - - if (!driver->detach_adapter) - return 0; - dev_warn(&adapter->dev, "%s: detach_adapter method is deprecated\n", - driver->driver.name); - res = driver->detach_adapter(adapter); - if (res) - dev_err(&adapter->dev, "detach_adapter failed (%d) " - "for driver [%s]\n", res, driver->driver.name); - return res; } static int __unregister_client(struct device *dev, void *dummy) @@ -1218,7 +1207,8 @@ static int __unregister_dummy(struct device *dev, void *dummy) static int __process_removed_adapter(struct device_driver *d, void *data) { - return i2c_do_del_adapter(to_i2c_driver(d), data); + i2c_do_del_adapter(to_i2c_driver(d), data); + return 0; } /** @@ -1231,7 +1221,6 @@ static int __process_removed_adapter(struct device_driver *d, void *data) */ int i2c_del_adapter(struct i2c_adapter *adap) { - int res = 0; struct i2c_adapter *found; struct i2c_client *client, *next; @@ -1247,11 +1236,9 @@ int i2c_del_adapter(struct i2c_adapter *adap) /* Tell drivers about this removal */ mutex_lock(&core_lock); - res = bus_for_each_drv(&i2c_bus_type, NULL, adap, + bus_for_each_drv(&i2c_bus_type, NULL, adap, __process_removed_adapter); mutex_unlock(&core_lock); - if (res) - return res; /* Remove devices instantiated from sysfs */ mutex_lock_nested(&adap->userspace_clients_lock, @@ -1270,8 +1257,8 @@ int i2c_del_adapter(struct i2c_adapter *adap) * we can't remove the dummy devices during the first pass: they * could have been instantiated by real devices wishing to clean * them up properly, so we give them a chance to do that first. */ - res = device_for_each_child(&adap->dev, NULL, __unregister_client); - res = device_for_each_child(&adap->dev, NULL, __unregister_dummy); + device_for_each_child(&adap->dev, NULL, __unregister_client); + device_for_each_child(&adap->dev, NULL, __unregister_dummy); #ifdef CONFIG_I2C_COMPAT class_compat_remove_link(i2c_adapter_compat_class, &adap->dev, @@ -1367,9 +1354,9 @@ EXPORT_SYMBOL(i2c_register_driver); static int __process_removed_driver(struct device *dev, void *data) { - if (dev->type != &i2c_adapter_type) - return 0; - return i2c_do_del_adapter(data, to_i2c_adapter(dev)); + if (dev->type == &i2c_adapter_type) + i2c_do_del_adapter(data, to_i2c_adapter(dev)); + return 0; } /** diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 2eca3860b77f..f2bcd46ce194 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -125,7 +125,6 @@ extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @attach_adapter: Callback for bus addition (deprecated) - * @detach_adapter: Callback for bus removal (deprecated) * @probe: Callback for device binding * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown @@ -162,12 +161,10 @@ extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, struct i2c_driver { unsigned int class; - /* Notifies the driver that a new bus has appeared or is about to be - * removed. You should avoid using this, it will be removed in a - * near future. + /* Notifies the driver that a new bus has appeared. You should avoid + * using this, it will be removed in a near future. */ int (*attach_adapter)(struct i2c_adapter *) __deprecated; - int (*detach_adapter)(struct i2c_adapter *) __deprecated; /* Standard driver model interfaces */ int (*probe)(struct i2c_client *, const struct i2c_device_id *); -- cgit From 71546300c8684eb69286604c79624582c16f2f5b Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 9 Mar 2013 08:16:47 +0000 Subject: i2c: Make return type of i2c_del_adapter() void i2c_del_adapter() is usually called from a drivers remove callback. The Linux device driver model does not allow the remove callback to fail and all resources allocated in the probe callback need to be freed, as well as all resources which have been provided to the rest of the kernel(for example a I2C adapter) need to be revoked. So any function revoking such resources isn't allowed to fail either. i2c_del_adapter() adheres to this requirement and will never fail. But i2c_del_adapter()'s return type is int, which may cause driver authors to think that it can fail. This led to code constructs like: ret = i2c_del_adapter(...); BUG_ON(ret); Since i2c_del_adapter() always returns 0 the BUG_ON is never hit and essentially becomes dead code, which means it can be removed. Making the return type of i2c_del_adapter() void makes it explicit that the function will never fail and should prevent constructs like the above from re-appearing in the kernel code. All callers of i2c_del_adapter() have already been updated in a previous patch to ignore the return value, so the conversion of the return type from int to void can be done without causing any build failures. Signed-off-by: Lars-Peter Clausen Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 6 ++---- include/linux/i2c.h | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index e4fe4940fd82..e9ac0d84a01e 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1219,7 +1219,7 @@ static int __process_removed_adapter(struct device_driver *d, void *data) * This unregisters an I2C adapter which was previously registered * by @i2c_add_adapter or @i2c_add_numbered_adapter. */ -int i2c_del_adapter(struct i2c_adapter *adap) +void i2c_del_adapter(struct i2c_adapter *adap) { struct i2c_adapter *found; struct i2c_client *client, *next; @@ -1231,7 +1231,7 @@ int i2c_del_adapter(struct i2c_adapter *adap) if (found != adap) { pr_debug("i2c-core: attempting to delete unregistered " "adapter [%s]\n", adap->name); - return 0; + return; } /* Tell drivers about this removal */ @@ -1283,8 +1283,6 @@ int i2c_del_adapter(struct i2c_adapter *adap) /* Clear the device structure in case this adapter is ever going to be added again */ memset(&adap->dev, 0, sizeof(adap->dev)); - - return 0; } EXPORT_SYMBOL(i2c_del_adapter); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f2bcd46ce194..e988fa935b3c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -488,7 +488,7 @@ void i2c_unlock_adapter(struct i2c_adapter *); */ #if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) extern int i2c_add_adapter(struct i2c_adapter *); -extern int i2c_del_adapter(struct i2c_adapter *); +extern void i2c_del_adapter(struct i2c_adapter *); extern int i2c_add_numbered_adapter(struct i2c_adapter *); extern int i2c_register_driver(struct module *, struct i2c_driver *); -- cgit From 51d95709dddf7fdf6769a547de37a9c98edf8df9 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 9 Mar 2013 08:16:49 +0000 Subject: i2c: Make the return type of i2c_del_mux_adapter() void i2c_del_mux_adapter always returns 0 and none of it current users check its return value anyway. It is also an essential requirement of the Linux device driver model, that functions which may be called from a device's remove callback to free resources provided by the device, are not allowed to fail. This is the case for i2c_del_mux_adapter(), so make its return type void to make the fact that it won't fail explicit. Signed-off-by: Lars-Peter Clausen Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-mux.c | 4 +--- include/linux/i2c-mux.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 361b78d76759..7409ebb33c47 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -191,14 +191,12 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, } EXPORT_SYMBOL_GPL(i2c_add_mux_adapter); -int i2c_del_mux_adapter(struct i2c_adapter *adap) +void i2c_del_mux_adapter(struct i2c_adapter *adap) { struct i2c_mux_priv *priv = adap->algo_data; i2c_del_adapter(adap); kfree(priv); - - return 0; } EXPORT_SYMBOL_GPL(i2c_del_mux_adapter); diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index 40cb05a97b46..b5f9a007a3ab 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -42,7 +42,7 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, int (*deselect) (struct i2c_adapter *, void *mux_dev, u32 chan_id)); -int i2c_del_mux_adapter(struct i2c_adapter *adap); +void i2c_del_mux_adapter(struct i2c_adapter *adap); #endif /* __KERNEL__ */ -- cgit From ef096542642874de10909f02686447a96a66ad14 Mon Sep 17 00:00:00 2001 From: Chao Xie Date: Mon, 25 Mar 2013 03:06:57 -0400 Subject: usb: mv_usb: remove clock name from pdata Using pdata to pass clock name is not correct. Directly get clock from usb drivers. Signed-off-by: Chao Xie Signed-off-by: Felipe Balbi --- include/linux/platform_data/mv_usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mv_usb.h b/include/linux/platform_data/mv_usb.h index 944b01dd103e..98b7925f1a2d 100644 --- a/include/linux/platform_data/mv_usb.h +++ b/include/linux/platform_data/mv_usb.h @@ -34,8 +34,6 @@ struct mv_usb_addon_irq { }; struct mv_usb_platform_data { - unsigned int clknum; - char **clkname; struct mv_usb_addon_irq *id; /* Only valid for OTG. ID pin change*/ struct mv_usb_addon_irq *vbus; /* valid for OTG/UDC. VBUS change*/ -- cgit From 225da3e3cb1f0db9e4cb7fa2a7dc3a360d1cf788 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Sun, 31 Mar 2013 18:34:43 -0700 Subject: usb: renesas_usbhs: fixup sparse errors for common.c This patch fixup below sparse errors CHECK ${RENESAS_USB}/common.c ${RENESAS_USB}/common.c:313:17: error: incompatible types in conditional expression (different base types) ${RENESAS_USB}/common.c:322:17: error: incompatible types in conditional expression (different base types) ${RENESAS_USB}/common.c:384:17: error: incompatible types in conditional expression (different base types) ${RENESAS_USB}/common.c:524:9: error: incompatible types in conditional expression (different base types) ${RENESAS_USB}/common.c:545:9: error: incompatible types in conditional expression (different base types) ${RENESAS_USB}/common.c:574:9: error: incompatible types in conditional expression (different base types) ${RENESAS_USB}/common.c:606:9: error: incompatible types in conditional expression (different base types) ${RENESAS_USB}/mod_gadget.c:233:28: warning: symbol 'req_clear_feature' was not declared. Should it be static? ${RENESAS_USB}/mod_gadget.c:274:28: warning: symbol 'req_set_feature' was not declared. Should it be static? ${RENESAS_USB}/mod_gadget.c:375:28: warning: symbol 'req_get_status' was not declared. Should it be static? [ balbi@ti.com : added three sparse fixes to mod_gadget.c ] Signed-off-by: Kuninori Morimoto Signed-off-by: Felipe Balbi --- arch/arm/mach-shmobile/board-armadillo800eva.c | 8 ++++++-- arch/arm/mach-shmobile/board-kzm9g.c | 8 ++++++-- arch/arm/mach-shmobile/board-mackerel.c | 12 +++++++++--- arch/sh/boards/mach-ecovec24/setup.c | 4 +++- drivers/usb/renesas_usbhs/mod_gadget.c | 6 +++--- include/linux/usb/renesas_usbhs.h | 6 +++--- 6 files changed, 30 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index f2ec0777cfbe..ff8b7ba9b93c 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -169,7 +169,7 @@ static int usbhsf_get_id(struct platform_device *pdev) return USBHS_GADGET; } -static void usbhsf_power_ctrl(struct platform_device *pdev, +static int usbhsf_power_ctrl(struct platform_device *pdev, void __iomem *base, int enable) { struct usbhsf_private *priv = usbhsf_get_priv(pdev); @@ -223,6 +223,8 @@ static void usbhsf_power_ctrl(struct platform_device *pdev, clk_disable(priv->pci); /* usb work around */ clk_disable(priv->usb24); /* usb work around */ } + + return 0; } static int usbhsf_get_vbus(struct platform_device *pdev) @@ -239,7 +241,7 @@ static irqreturn_t usbhsf_interrupt(int irq, void *data) return IRQ_HANDLED; } -static void usbhsf_hardware_exit(struct platform_device *pdev) +static int usbhsf_hardware_exit(struct platform_device *pdev) { struct usbhsf_private *priv = usbhsf_get_priv(pdev); @@ -264,6 +266,8 @@ static void usbhsf_hardware_exit(struct platform_device *pdev) priv->usbh_base = NULL; free_irq(IRQ7, pdev); + + return 0; } static int usbhsf_hardware_init(struct platform_device *pdev) diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index 7f3a6b7e7b7c..a385f570bbfc 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -155,12 +155,14 @@ static int usbhs_get_vbus(struct platform_device *pdev) return !((1 << 7) & __raw_readw(priv->cr2)); } -static void usbhs_phy_reset(struct platform_device *pdev) +static int usbhs_phy_reset(struct platform_device *pdev) { struct usbhs_private *priv = usbhs_get_priv(pdev); /* init phy */ __raw_writew(0x8a0a, priv->cr2); + + return 0; } static int usbhs_get_id(struct platform_device *pdev) @@ -202,7 +204,7 @@ static int usbhs_hardware_init(struct platform_device *pdev) return 0; } -static void usbhs_hardware_exit(struct platform_device *pdev) +static int usbhs_hardware_exit(struct platform_device *pdev) { struct usbhs_private *priv = usbhs_get_priv(pdev); @@ -210,6 +212,8 @@ static void usbhs_hardware_exit(struct platform_device *pdev) __raw_writew(USB_PHY_MODE | USB_PHY_INT_CLR, priv->phy); free_irq(IRQ15, pdev); + + return 0; } static u32 usbhs_pipe_cfg[] = { diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index db968a585ff0..979237c18dad 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -596,12 +596,14 @@ static int usbhs_get_vbus(struct platform_device *pdev) return usbhs_is_connected(usbhs_get_priv(pdev)); } -static void usbhs_phy_reset(struct platform_device *pdev) +static int usbhs_phy_reset(struct platform_device *pdev) { struct usbhs_private *priv = usbhs_get_priv(pdev); /* init phy */ __raw_writew(0x8a0a, priv->usbcrcaddr); + + return 0; } static int usbhs0_get_id(struct platform_device *pdev) @@ -628,11 +630,13 @@ static int usbhs0_hardware_init(struct platform_device *pdev) return 0; } -static void usbhs0_hardware_exit(struct platform_device *pdev) +static int usbhs0_hardware_exit(struct platform_device *pdev) { struct usbhs_private *priv = usbhs_get_priv(pdev); cancel_delayed_work_sync(&priv->work); + + return 0; } static struct usbhs_private usbhs0_private = { @@ -735,7 +739,7 @@ static int usbhs1_hardware_init(struct platform_device *pdev) return 0; } -static void usbhs1_hardware_exit(struct platform_device *pdev) +static int usbhs1_hardware_exit(struct platform_device *pdev) { struct usbhs_private *priv = usbhs_get_priv(pdev); @@ -743,6 +747,8 @@ static void usbhs1_hardware_exit(struct platform_device *pdev) __raw_writew(USB_PHY_MODE | USB_PHY_INT_CLR, priv->usbphyaddr); free_irq(IRQ8, pdev); + + return 0; } static int usbhs1_get_id(struct platform_device *pdev) diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index aaff7671101b..764530c85aa9 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -254,11 +254,13 @@ static int usbhs_get_id(struct platform_device *pdev) return gpio_get_value(GPIO_PTB3); } -static void usbhs_phy_reset(struct platform_device *pdev) +static int usbhs_phy_reset(struct platform_device *pdev) { /* enable vbus if HOST */ if (!gpio_get_value(GPIO_PTB3)) gpio_set_value(GPIO_PTB5, 1); + + return 0; } static struct renesas_usbhs_platform_info usbhs_info = { diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index c2781bc9dabe..ed4949faa70d 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -230,7 +230,7 @@ static int usbhsg_recip_handler_std_clear_endpoint(struct usbhs_priv *priv, return 0; } -struct usbhsg_recip_handle req_clear_feature = { +static struct usbhsg_recip_handle req_clear_feature = { .name = "clear feature", .device = usbhsg_recip_handler_std_control_done, .interface = usbhsg_recip_handler_std_control_done, @@ -271,7 +271,7 @@ static int usbhsg_recip_handler_std_set_endpoint(struct usbhs_priv *priv, return 0; } -struct usbhsg_recip_handle req_set_feature = { +static struct usbhsg_recip_handle req_set_feature = { .name = "set feature", .device = usbhsg_recip_handler_std_set_device, .interface = usbhsg_recip_handler_std_control_done, @@ -372,7 +372,7 @@ static int usbhsg_recip_handler_std_get_endpoint(struct usbhs_priv *priv, return 0; } -struct usbhsg_recip_handle req_get_status = { +static struct usbhsg_recip_handle req_get_status = { .name = "get status", .device = usbhsg_recip_handler_std_get_device, .interface = usbhsg_recip_handler_std_get_interface, diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index c5d36c65c33b..e452ba6ec6bd 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -62,14 +62,14 @@ struct renesas_usbhs_platform_callback { * Hardware exit function for platform. * it is called when driver was removed */ - void (*hardware_exit)(struct platform_device *pdev); + int (*hardware_exit)(struct platform_device *pdev); /* * option: * * for board specific clock control */ - void (*power_ctrl)(struct platform_device *pdev, + int (*power_ctrl)(struct platform_device *pdev, void __iomem *base, int enable); /* @@ -77,7 +77,7 @@ struct renesas_usbhs_platform_callback { * * Phy reset for platform */ - void (*phy_reset)(struct platform_device *pdev); + int (*phy_reset)(struct platform_device *pdev); /* * get USB ID function -- cgit From 6fed4d869a11fdbb4c6a5e444dfb2c22f92c3e46 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 1 Apr 2013 22:03:06 +0100 Subject: extcon: arizona: Allow configuration of button detection The Arizona button detection circuit is configurable, allowing the system integrator to program a range of thresholds for the buttons supported on the accessory but currently the driver uses the default button ranges and does not provide any flexibility in how this is exposed to the application layer. Provide platform data allowing the user to control this and to map the buttons to keys in the input subsystem. Signed-off-by: Mark Brown --- drivers/extcon/extcon-arizona.c | 164 +++++++++++++++++++++++++++++--------- include/linux/mfd/arizona/pdata.h | 9 +++ 2 files changed, 134 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index 4bb0e9ae405d..e2339629126a 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -33,7 +33,7 @@ #include #include -#define ARIZONA_NUM_BUTTONS 6 +#define ARIZONA_MAX_MICD_RANGE 8 #define ARIZONA_ACCDET_MODE_MIC 0 #define ARIZONA_ACCDET_MODE_HPL 1 @@ -50,6 +50,9 @@ struct arizona_extcon_info { const struct arizona_micd_config *micd_modes; int micd_num_modes; + const struct arizona_micd_range *micd_ranges; + int num_micd_ranges; + bool micd_reva; bool micd_clamp; @@ -71,20 +74,25 @@ struct arizona_extcon_info { }; static const struct arizona_micd_config micd_default_modes[] = { - { 0, 2 << ARIZONA_MICD_BIAS_SRC_SHIFT, 1 }, { ARIZONA_ACCDET_SRC, 1 << ARIZONA_MICD_BIAS_SRC_SHIFT, 0 }, + { 0, 2 << ARIZONA_MICD_BIAS_SRC_SHIFT, 1 }, }; -static struct { - u16 status; - int report; -} arizona_lvl_to_key[ARIZONA_NUM_BUTTONS] = { - { 0x1, BTN_0 }, - { 0x2, BTN_1 }, - { 0x4, BTN_2 }, - { 0x8, BTN_3 }, - { 0x10, BTN_4 }, - { 0x20, BTN_5 }, +static const struct arizona_micd_range micd_default_ranges[] = { + { .max = 11, .key = BTN_0 }, + { .max = 28, .key = BTN_1 }, + { .max = 54, .key = BTN_2 }, + { .max = 100, .key = BTN_3 }, + { .max = 186, .key = BTN_4 }, + { .max = 430, .key = BTN_5 }, +}; + +static const int arizona_micd_levels[] = { + 3, 6, 8, 11, 13, 16, 18, 21, 23, 26, 28, 31, 34, 36, 39, 41, 44, 46, + 49, 52, 54, 57, 60, 62, 65, 67, 70, 73, 75, 78, 81, 83, 89, 94, 100, + 105, 111, 116, 122, 127, 139, 150, 161, 173, 186, 196, 209, 220, 245, + 270, 295, 321, 348, 375, 402, 430, 489, 550, 614, 681, 752, 903, 1071, + 1257, }; #define ARIZONA_CABLE_MECHANICAL 0 @@ -153,7 +161,7 @@ static void arizona_extcon_set_mode(struct arizona_extcon_info *info, int mode) { struct arizona *arizona = info->arizona; - mode %= info->num_micd_modes; + mode %= info->micd_num_modes; if (arizona->pdata.micd_pol_gpio > 0) gpio_set_value_cansleep(arizona->pdata.micd_pol_gpio, @@ -728,7 +736,7 @@ static irqreturn_t arizona_micdet(int irq, void *data) struct arizona_extcon_info *info = data; struct arizona *arizona = info->arizona; unsigned int val, lvl; - int ret, i; + int ret, i, key; mutex_lock(&info->lock); @@ -815,12 +823,13 @@ static irqreturn_t arizona_micdet(int irq, void *data) lvl = val & ARIZONA_MICD_LVL_MASK; lvl >>= ARIZONA_MICD_LVL_SHIFT; - for (i = 0; i < ARIZONA_NUM_BUTTONS; i++) - if (lvl & arizona_lvl_to_key[i].status) - input_report_key(info->input, - arizona_lvl_to_key[i].report, - 1); - input_sync(info->input); + WARN_ON(!lvl); + WARN_ON(ffs(lvl) - 1 >= info->num_micd_ranges); + if (lvl && ffs(lvl) - 1 < info->num_micd_ranges) { + key = info->micd_ranges[ffs(lvl) - 1].key; + input_report_key(info->input, key, 1); + input_sync(info->input); + } } else if (info->detecting) { dev_dbg(arizona->dev, "Headphone detected\n"); @@ -834,9 +843,9 @@ static irqreturn_t arizona_micdet(int irq, void *data) } } else { dev_dbg(arizona->dev, "Mic button released\n"); - for (i = 0; i < ARIZONA_NUM_BUTTONS; i++) + for (i = 0; i < info->num_micd_ranges; i++) input_report_key(info->input, - arizona_lvl_to_key[i].report, 0); + info->micd_ranges[i].key, 0); input_sync(info->input); arizona_extcon_pulse_micbias(info); } @@ -923,9 +932,9 @@ static irqreturn_t arizona_jackdet(int irq, void *data) info->mic = false; info->hpdet_done = false; - for (i = 0; i < ARIZONA_NUM_BUTTONS; i++) + for (i = 0; i < info->num_micd_ranges; i++) input_report_key(info->input, - arizona_lvl_to_key[i].report, 0); + info->micd_ranges[i].key, 0); input_sync(info->input); ret = extcon_update_state(&info->edev, 0xffffffff, 0); @@ -954,13 +963,33 @@ static irqreturn_t arizona_jackdet(int irq, void *data) return IRQ_HANDLED; } +/* Map a level onto a slot in the register bank */ +static void arizona_micd_set_level(struct arizona *arizona, int index, + unsigned int level) +{ + int reg; + unsigned int mask; + + reg = ARIZONA_MIC_DETECT_LEVEL_4 - (index / 2); + + if (!(index % 2)) { + mask = 0x3f00; + level <<= 8; + } else { + mask = 0x3f; + } + + /* Program the level itself */ + regmap_update_bits(arizona->regmap, reg, mask, level); +} + static int arizona_extcon_probe(struct platform_device *pdev) { struct arizona *arizona = dev_get_drvdata(pdev->dev.parent); struct arizona_pdata *pdata; struct arizona_extcon_info *info; int jack_irq_fall, jack_irq_rise; - int ret, mode, i; + int ret, mode, i, j; if (!arizona->dapm || !arizona->dapm->card) return -EPROBE_DEFER; @@ -1013,6 +1042,17 @@ static int arizona_extcon_probe(struct platform_device *pdev) goto err; } + info->input = devm_input_allocate_device(&pdev->dev); + if (!info->input) { + dev_err(arizona->dev, "Can't allocate input dev\n"); + ret = -ENOMEM; + goto err_register; + } + + info->input->name = "Headset"; + info->input->phys = "arizona/extcon"; + info->input->dev.parent = &pdev->dev; + if (pdata->num_micd_configs) { info->micd_modes = pdata->micd_configs; info->micd_num_modes = pdata->num_micd_configs; @@ -1068,6 +1108,66 @@ static int arizona_extcon_probe(struct platform_device *pdev) arizona->pdata.micd_dbtime << ARIZONA_MICD_DBTIME_SHIFT); + BUILD_BUG_ON(ARRAY_SIZE(arizona_micd_levels) != 0x40); + + if (arizona->pdata.num_micd_ranges) { + info->micd_ranges = pdata->micd_ranges; + info->num_micd_ranges = pdata->num_micd_ranges; + } else { + info->micd_ranges = micd_default_ranges; + info->num_micd_ranges = ARRAY_SIZE(micd_default_ranges); + } + + if (arizona->pdata.num_micd_ranges > ARIZONA_MAX_MICD_RANGE) { + dev_err(arizona->dev, "Too many MICD ranges: %d\n", + arizona->pdata.num_micd_ranges); + } + + if (info->num_micd_ranges > 1) { + for (i = 1; i < info->num_micd_ranges; i++) { + if (info->micd_ranges[i - 1].max > + info->micd_ranges[i].max) { + dev_err(arizona->dev, + "MICD ranges must be sorted\n"); + ret = -EINVAL; + goto err_input; + } + } + } + + /* Disable all buttons by default */ + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_2, + ARIZONA_MICD_LVL_SEL_MASK, 0x81); + + /* Set up all the buttons the user specified */ + for (i = 0; i < info->num_micd_ranges; i++) { + for (j = 0; j < ARRAY_SIZE(arizona_micd_levels); j++) + if (arizona_micd_levels[j] >= info->micd_ranges[i].max) + break; + + if (j == ARRAY_SIZE(arizona_micd_levels)) { + dev_err(arizona->dev, "Unsupported MICD level %d\n", + info->micd_ranges[i].max); + ret = -EINVAL; + goto err_input; + } + + dev_dbg(arizona->dev, "%d ohms for MICD threshold %d\n", + arizona_micd_levels[j], i); + + arizona_micd_set_level(arizona, i, j); + input_set_capability(info->input, EV_KEY, + info->micd_ranges[i].key); + + /* Enable reporting of that range */ + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_2, + 1 << i, 1 << i); + } + + /* Set all the remaining keys to a maximum */ + for (; i < ARIZONA_MAX_MICD_RANGE; i++) + arizona_micd_set_level(arizona, i, 0x3f); + /* * If we have a clamp use it, activating in conjunction with * GPIO5 if that is connected for jack detect operation. @@ -1095,20 +1195,6 @@ static int arizona_extcon_probe(struct platform_device *pdev) arizona_extcon_set_mode(info, 0); - info->input = devm_input_allocate_device(&pdev->dev); - if (!info->input) { - dev_err(arizona->dev, "Can't allocate input dev\n"); - ret = -ENOMEM; - goto err_register; - } - - for (i = 0; i < ARIZONA_NUM_BUTTONS; i++) - input_set_capability(info->input, EV_KEY, - arizona_lvl_to_key[i].report); - info->input->name = "Headset"; - info->input->phys = "arizona/extcon"; - info->input->dev.parent = &pdev->dev; - pm_runtime_enable(&pdev->dev); pm_runtime_idle(&pdev->dev); pm_runtime_get_sync(&pdev->dev); diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 455c51d22d6b..eb11a8ac6db2 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -86,6 +86,11 @@ struct arizona_micd_config { bool gpio; }; +struct arizona_micd_range { + int max; /** Ohms */ + int key; /** Key to report to input layer */ +}; + struct arizona_pdata { int reset; /** GPIO controlling /RESET, if any */ int ldoena; /** GPIO controlling LODENA, if any */ @@ -138,6 +143,10 @@ struct arizona_pdata { /** Force MICBIAS on for mic detect */ bool micd_force_micbias; + /** Mic detect level parameters */ + const struct arizona_micd_range *micd_ranges; + int num_micd_ranges; + /** Headset polarity configurations */ struct arizona_micd_config *micd_configs; int num_micd_configs; -- cgit From e56a0a572be150c79cdbf62ff98f4a63419e1c0b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 1 Apr 2013 19:03:52 +0100 Subject: extcon: arizona: Allow pull to be disabled on GPIO5 when used for JACKET In some designs an external pull won't be needed. Signed-off-by: Mark Brown --- drivers/extcon/extcon-arizona.c | 9 +++++++-- include/linux/mfd/arizona/pdata.h | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index 95748d3cbc4e..132bc99fdc06 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -986,6 +986,7 @@ static int arizona_extcon_probe(struct platform_device *pdev) struct arizona *arizona = dev_get_drvdata(pdev->dev.parent); struct arizona_pdata *pdata; struct arizona_extcon_info *info; + unsigned int val; int jack_irq_fall, jack_irq_rise; int ret, mode, i, j; @@ -1172,9 +1173,13 @@ static int arizona_extcon_probe(struct platform_device *pdev) */ if (info->micd_clamp) { if (arizona->pdata.jd_gpio5) { - /* Put the GPIO into input mode */ + /* Put the GPIO into input mode with optional pull */ + val = 0xc101; + if (arizona->pdata.jd_gpio5_nopull) + val &= ~ARIZONA_GPN_PU; + regmap_write(arizona->regmap, ARIZONA_GPIO5_CTRL, - 0xc101); + val); regmap_update_bits(arizona->regmap, ARIZONA_MICD_CLAMP_CONTROL, diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index eb11a8ac6db2..008b8c40549f 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -122,6 +122,9 @@ struct arizona_pdata { /** GPIO5 is used for jack detection */ bool jd_gpio5; + /** Internal pull on GPIO5 is disabled when used for jack detection */ + bool jd_gpio5_nopull; + /** Use the headphone detect circuit to identify the accessory */ bool hpdet_acc_id; -- cgit From 9c2ba270eaa227c999af451e1c2c9bf0d24aa8e5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Feb 2013 23:42:31 +0000 Subject: extcon: arizona: Simplify HPDET based identification Rather than measuring both HP channels we can simply directly measure the microphone impedance and then rely on MICDET for final confirmation of the presence of a suitable microphone. This improves the overall performance of the identification process. Signed-off-by: Mark Brown --- drivers/extcon/extcon-arizona.c | 46 +++++++++++++++++++++------------------ include/linux/mfd/arizona/pdata.h | 3 +++ 2 files changed, 28 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index 7c4ce812d735..a83ca27aa99f 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -459,7 +459,8 @@ static int arizona_hpdet_read(struct arizona_extcon_info *info) return val; } -static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading) +static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading, + bool *mic) { struct arizona *arizona = info->arizona; int id_gpio = arizona->pdata.hpdet_id_gpio; @@ -470,11 +471,9 @@ static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading) */ if (arizona->pdata.hpdet_acc_id) { info->hpdet_res[info->num_hpdet_res++] = *reading; - info->hpdet_res[info->num_hpdet_res++] = *reading; /* Only check the mic directly if we didn't already ID it */ - if (id_gpio && info->num_hpdet_res == 2 && - !((info->hpdet_res[0] > info->hpdet_res[1] * 2))) { + if (id_gpio && info->num_hpdet_res == 1) { dev_dbg(arizona->dev, "Measuring mic\n"); regmap_update_bits(arizona->regmap, @@ -493,10 +492,8 @@ static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading) } /* OK, got both. Now, compare... */ - dev_dbg(arizona->dev, "HPDET measured %d %d %d\n", - info->hpdet_res[0], info->hpdet_res[1], - info->hpdet_res[2]); - + dev_dbg(arizona->dev, "HPDET measured %d %d\n", + info->hpdet_res[0], info->hpdet_res[1]); /* Take the headphone impedance for the main report */ *reading = info->hpdet_res[0]; @@ -512,13 +509,11 @@ static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading) } /* - * Either the two grounds measure differently or we - * measure the mic as high impedance. + * If we measure the mic as */ - if ((info->hpdet_res[0] > info->hpdet_res[1] * 2) || - (id_gpio && info->hpdet_res[2] > 1257)) { + if (!id_gpio || info->hpdet_res[1] > 50) { dev_dbg(arizona->dev, "Detected mic\n"); - info->mic = true; + *mic = true; info->detecting = true; } else { dev_dbg(arizona->dev, "Detected headphone\n"); @@ -541,6 +536,7 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data) int id_gpio = arizona->pdata.hpdet_id_gpio; int report = ARIZONA_CABLE_HEADPHONE; int ret, reading; + bool mic = false; mutex_lock(&info->lock); @@ -576,7 +572,7 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data) ARIZONA_HP_IMPEDANCE_RANGE_MASK | ARIZONA_HP_POLL, 0); - ret = arizona_hpdet_do_id(info, &reading); + ret = arizona_hpdet_do_id(info, &reading, &mic); if (ret == -EAGAIN) { goto out; } else if (ret < 0) { @@ -606,7 +602,7 @@ done: ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC); /* If we have a mic then reenable MICDET */ - if (info->mic) + if (mic || info->mic) arizona_start_mic(info); if (info->hpdet_active) { @@ -681,6 +677,8 @@ err: static void arizona_start_hpdet_acc_id(struct arizona_extcon_info *info) { struct arizona *arizona = info->arizona; + int hp_reading = 32; + bool mic; int ret; dev_dbg(arizona->dev, "Starting identification via HPDET\n"); @@ -702,12 +700,18 @@ static void arizona_start_hpdet_acc_id(struct arizona_extcon_info *info) goto err; } - ret = regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1, - ARIZONA_HP_POLL, ARIZONA_HP_POLL); - if (ret != 0) { - dev_err(arizona->dev, "Can't start HPDETL measurement: %d\n", - ret); - goto err; + if (arizona->pdata.hpdet_acc_id_line) { + ret = regmap_update_bits(arizona->regmap, + ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_POLL, ARIZONA_HP_POLL); + if (ret != 0) { + dev_err(arizona->dev, + "Can't start HPDETL measurement: %d\n", + ret); + goto err; + } + } else { + arizona_hpdet_do_id(info, &hp_reading, &mic); } return; diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 008b8c40549f..45c84777c624 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -128,6 +128,9 @@ struct arizona_pdata { /** Use the headphone detect circuit to identify the accessory */ bool hpdet_acc_id; + /** Check for line output with HPDET method */ + bool hpdet_acc_id_line; + /** GPIO used for mic isolation with HPDET */ int hpdet_id_gpio; -- cgit From cd59e79656f4e7137909166248a935d422b1245a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 1 Apr 2013 19:21:48 +0100 Subject: extcon: arizona: Allow additional debounce during microphone detection Help mitigate against mechanical bounce during the initial detection by allowing the configuration of an additional debounce on top of that the hardware does during the initial phase of microphone detection operation. Signed-off-by: Mark Brown --- drivers/extcon/extcon-arizona.c | 35 ++++++++++++++++++++++++++++++----- include/linux/mfd/arizona/pdata.h | 3 +++ 2 files changed, 33 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index 26f9a1ae15c4..c7f8eb4299d2 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -64,6 +64,7 @@ struct arizona_extcon_info { bool micd_clamp; struct delayed_work hpdet_work; + struct delayed_work micd_detect_work; struct delayed_work micd_timeout_work; bool hpdet_active; @@ -750,9 +751,11 @@ static void arizona_micd_timeout_work(struct work_struct *work) mutex_unlock(&info->lock); } -static irqreturn_t arizona_micdet(int irq, void *data) +static void arizona_micd_detect(struct work_struct *work) { - struct arizona_extcon_info *info = data; + struct arizona_extcon_info *info = container_of(work, + struct arizona_extcon_info, + micd_detect_work.work); struct arizona *arizona = info->arizona; unsigned int val = 0, lvl; int ret, i, key; @@ -766,7 +769,7 @@ static irqreturn_t arizona_micdet(int irq, void *data) if (ret != 0) { dev_err(arizona->dev, "Failed to read MICDET: %d\n", ret); mutex_unlock(&info->lock); - return IRQ_NONE; + return; } dev_dbg(arizona->dev, "MICDET: %x\n", val); @@ -774,14 +777,14 @@ static irqreturn_t arizona_micdet(int irq, void *data) if (!(val & ARIZONA_MICD_VALID)) { dev_warn(arizona->dev, "Microphone detection state invalid\n"); mutex_unlock(&info->lock); - return IRQ_NONE; + return; } } if (i == 10 && !(val & 0x7fc)) { dev_err(arizona->dev, "Failed to get valid MICDET value\n"); mutex_unlock(&info->lock); - return IRQ_NONE; + return; } /* Due to jack detect this should never happen */ @@ -890,6 +893,27 @@ handled: pm_runtime_mark_last_busy(info->dev); mutex_unlock(&info->lock); +} + +static irqreturn_t arizona_micdet(int irq, void *data) +{ + struct arizona_extcon_info *info = data; + struct arizona *arizona = info->arizona; + int debounce = arizona->pdata.micd_detect_debounce; + + cancel_delayed_work_sync(&info->micd_detect_work); + cancel_delayed_work_sync(&info->micd_timeout_work); + + mutex_lock(&info->lock); + if (!info->detecting) + debounce = 0; + mutex_unlock(&info->lock); + + if (debounce) + schedule_delayed_work(&info->micd_detect_work, + msecs_to_jiffies(debounce)); + else + arizona_micd_detect(&info->micd_detect_work.work); return IRQ_HANDLED; } @@ -1072,6 +1096,7 @@ static int arizona_extcon_probe(struct platform_device *pdev) info->dev = &pdev->dev; info->last_jackdet = ~(ARIZONA_MICD_CLAMP_STS | ARIZONA_JD1_STS); INIT_DELAYED_WORK(&info->hpdet_work, arizona_hpdet_work); + INIT_DELAYED_WORK(&info->micd_detect_work, arizona_micd_detect); INIT_DELAYED_WORK(&info->micd_timeout_work, arizona_micd_timeout_work); platform_set_drvdata(pdev, info); diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 45c84777c624..3ef300baa2e6 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -134,6 +134,9 @@ struct arizona_pdata { /** GPIO used for mic isolation with HPDET */ int hpdet_id_gpio; + /** Extra debounce timeout used during initial mic detection (ms) */ + int micd_detect_debounce; + /** GPIO for mic detection polarity */ int micd_pol_gpio; -- cgit From 7abd4e2a8f1c3e534da44c35e2d3d6353573e51f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 1 Apr 2013 19:25:55 +0100 Subject: extcon: arizona: Make mic detection timeout configurable Signed-off-by: Mark Brown --- drivers/extcon/extcon-arizona.c | 13 ++++++++++--- include/linux/mfd/arizona/pdata.h | 3 +++ 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index c7f8eb4299d2..7a1b4a7791ba 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -42,7 +42,7 @@ #define ARIZONA_HPDET_MAX 10000 #define HPDET_DEBOUNCE 500 -#define MICD_TIMEOUT 2000 +#define DEFAULT_MICD_TIMEOUT 2000 struct arizona_extcon_info { struct device *dev; @@ -60,6 +60,8 @@ struct arizona_extcon_info { const struct arizona_micd_range *micd_ranges; int num_micd_ranges; + int micd_timeout; + bool micd_reva; bool micd_clamp; @@ -889,7 +891,7 @@ static void arizona_micd_detect(struct work_struct *work) handled: if (info->detecting) schedule_delayed_work(&info->micd_timeout_work, - msecs_to_jiffies(MICD_TIMEOUT)); + msecs_to_jiffies(info->micd_timeout)); pm_runtime_mark_last_busy(info->dev); mutex_unlock(&info->lock); @@ -970,7 +972,7 @@ static irqreturn_t arizona_jackdet(int irq, void *data) if (cancelled_mic) schedule_delayed_work(&info->micd_timeout_work, - msecs_to_jiffies(MICD_TIMEOUT)); + msecs_to_jiffies(info->micd_timeout)); goto out; } @@ -1027,6 +1029,11 @@ static irqreturn_t arizona_jackdet(int irq, void *data) ARIZONA_MICD_CLAMP_DB | ARIZONA_JD1_DB); } + if (arizona->pdata.micd_timeout) + info->micd_timeout = arizona->pdata.micd_timeout; + else + info->micd_timeout = DEFAULT_MICD_TIMEOUT; + /* Clear trig_sts to make sure DCVDD is not forced up */ regmap_write(arizona->regmap, ARIZONA_AOD_WKUP_AND_TRIG, ARIZONA_MICD_CLAMP_FALL_TRIG_STS | diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 3ef300baa2e6..a0f940987a3e 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -149,6 +149,9 @@ struct arizona_pdata { /** Mic detect debounce level */ int micd_dbtime; + /** Mic detect timeout (ms) */ + int micd_timeout; + /** Force MICBIAS on for mic detect */ bool micd_force_micbias; -- cgit From b43a7ffbf33be7e4d3b10b7714ee663ea2c52fe2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sun, 24 Mar 2013 11:56:43 +0530 Subject: cpufreq: Notify all policy->cpus in cpufreq_notify_transition() policy->cpus contains all online cpus that have single shared clock line. And their frequencies are always updated together. Many SMP system's cpufreq drivers take care of this in individual drivers but the best place for this code is in cpufreq core. This patch modifies cpufreq_notify_transition() to notify frequency change for all cpus in policy->cpus and hence updates all users of this API. Signed-off-by: Viresh Kumar Acked-by: Stephen Warren Tested-by: Stephen Warren Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-davinci/cpufreq.c | 5 +- arch/arm/mach-imx/cpufreq.c | 5 +- arch/arm/mach-integrator/cpu.c | 6 +-- arch/arm/mach-pxa/cpufreq-pxa2xx.c | 5 +- arch/arm/mach-pxa/cpufreq-pxa3xx.c | 5 +- arch/arm/mach-s3c24xx/cpufreq.c | 8 +-- arch/arm/mach-sa1100/cpu-sa1100.c | 5 +- arch/arm/mach-sa1100/cpu-sa1110.c | 5 +- arch/arm/mach-tegra/cpu-tegra.c | 15 +++--- arch/avr32/mach-at32ap/cpufreq.c | 5 +- arch/blackfin/mach-common/cpufreq.c | 79 ++++++++++++---------------- arch/cris/arch-v32/mach-a3/cpufreq.c | 20 +++---- arch/cris/arch-v32/mach-fs/cpufreq.c | 17 +++--- arch/ia64/kernel/cpufreq/acpi-cpufreq.c | 22 ++++---- arch/mips/kernel/cpufreq/loongson2_cpufreq.c | 5 +- arch/powerpc/platforms/cell/cbe_cpufreq.c | 5 +- arch/powerpc/platforms/pasemi/cpufreq.c | 5 +- arch/powerpc/platforms/powermac/cpufreq_32.c | 14 ++--- arch/powerpc/platforms/powermac/cpufreq_64.c | 5 +- arch/sh/kernel/cpufreq.c | 5 +- arch/sparc/kernel/us2e_cpufreq.c | 13 ++--- arch/sparc/kernel/us3_cpufreq.c | 13 ++--- arch/unicore32/kernel/cpu-ucv2.c | 5 +- drivers/cpufreq/acpi-cpufreq.c | 11 +--- drivers/cpufreq/cpufreq-cpu0.c | 12 ++--- drivers/cpufreq/cpufreq-nforce2.c | 5 +- drivers/cpufreq/cpufreq.c | 45 +++++++++------- drivers/cpufreq/dbx500-cpufreq.c | 6 +-- drivers/cpufreq/e_powersaver.c | 11 ++-- drivers/cpufreq/elanfreq.c | 10 ++-- drivers/cpufreq/exynos-cpufreq.c | 7 +-- drivers/cpufreq/gx-suspmod.c | 11 ++-- drivers/cpufreq/imx6q-cpufreq.c | 12 ++--- drivers/cpufreq/kirkwood-cpufreq.c | 10 ++-- drivers/cpufreq/longhaul.c | 18 ++++--- drivers/cpufreq/maple-cpufreq.c | 5 +- drivers/cpufreq/omap-cpufreq.c | 11 +--- drivers/cpufreq/p4-clockmod.c | 10 +--- drivers/cpufreq/pcc-cpufreq.c | 5 +- drivers/cpufreq/powernow-k6.c | 12 ++--- drivers/cpufreq/powernow-k7.c | 10 ++-- drivers/cpufreq/powernow-k8.c | 16 +++--- drivers/cpufreq/s3c2416-cpufreq.c | 5 +- drivers/cpufreq/s3c64xx-cpufreq.c | 7 ++- drivers/cpufreq/s5pv210-cpufreq.c | 5 +- drivers/cpufreq/sc520_freq.c | 10 ++-- drivers/cpufreq/spear-cpufreq.c | 7 +-- drivers/cpufreq/speedstep-centrino.c | 24 ++------- drivers/cpufreq/speedstep-ich.c | 12 +---- drivers/cpufreq/speedstep-smi.c | 5 +- include/linux/cpufreq.h | 4 +- 51 files changed, 238 insertions(+), 340 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c index 4729eaab0f40..55eb8703043d 100644 --- a/arch/arm/mach-davinci/cpufreq.c +++ b/arch/arm/mach-davinci/cpufreq.c @@ -90,7 +90,6 @@ static int davinci_target(struct cpufreq_policy *policy, freqs.old = davinci_getspeed(0); freqs.new = clk_round_rate(armclk, target_freq * 1000) / 1000; - freqs.cpu = 0; if (freqs.old == freqs.new) return ret; @@ -102,7 +101,7 @@ static int davinci_target(struct cpufreq_policy *policy, if (ret) return -EINVAL; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* if moving to higher frequency, up the voltage beforehand */ if (pdata->set_voltage && freqs.new > freqs.old) { @@ -126,7 +125,7 @@ static int davinci_target(struct cpufreq_policy *policy, pdata->set_voltage(idx); out: - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return ret; } diff --git a/arch/arm/mach-imx/cpufreq.c b/arch/arm/mach-imx/cpufreq.c index d8c75c3c925d..cfce5e3f67f5 100644 --- a/arch/arm/mach-imx/cpufreq.c +++ b/arch/arm/mach-imx/cpufreq.c @@ -87,13 +87,12 @@ static int mxc_set_target(struct cpufreq_policy *policy, freqs.old = clk_get_rate(cpu_clk) / 1000; freqs.new = freq_Hz / 1000; - freqs.cpu = 0; freqs.flags = 0; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); ret = set_cpu_freq(freq_Hz); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return ret; } diff --git a/arch/arm/mach-integrator/cpu.c b/arch/arm/mach-integrator/cpu.c index 590c192cdf4d..df863c30771c 100644 --- a/arch/arm/mach-integrator/cpu.c +++ b/arch/arm/mach-integrator/cpu.c @@ -123,14 +123,12 @@ static int integrator_set_target(struct cpufreq_policy *policy, vco = icst_hz_to_vco(&cclk_params, target_freq * 1000); freqs.new = icst_hz(&cclk_params, vco) / 1000; - freqs.cpu = policy->cpu; - if (freqs.old == freqs.new) { set_cpus_allowed(current, cpus_allowed); return 0; } - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); cm_osc = __raw_readl(CM_OSC); @@ -151,7 +149,7 @@ static int integrator_set_target(struct cpufreq_policy *policy, */ set_cpus_allowed(current, cpus_allowed); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/arch/arm/mach-pxa/cpufreq-pxa2xx.c b/arch/arm/mach-pxa/cpufreq-pxa2xx.c index 6a7aeab42f6c..f1ca4daa1ad6 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa2xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa2xx.c @@ -311,7 +311,6 @@ static int pxa_set_target(struct cpufreq_policy *policy, new_freq_mem = pxa_freq_settings[idx].membus; freqs.old = policy->cur; freqs.new = new_freq_cpu; - freqs.cpu = policy->cpu; if (freq_debug) pr_debug("Changing CPU frequency to %d Mhz, (SDRAM %d Mhz)\n", @@ -327,7 +326,7 @@ static int pxa_set_target(struct cpufreq_policy *policy, * you should add a notify client with any platform specific * Vcc changing capability */ - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* Calculate the next MDREFR. If we're slowing down the SDRAM clock * we need to preset the smaller DRI before the change. If we're @@ -382,7 +381,7 @@ static int pxa_set_target(struct cpufreq_policy *policy, * you should add a notify client with any platform specific * SDRAM refresh timer adjustments */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); /* * Even if voltage setting fails, we don't report it, as the frequency diff --git a/arch/arm/mach-pxa/cpufreq-pxa3xx.c b/arch/arm/mach-pxa/cpufreq-pxa3xx.c index b85b4ab7aac6..8c45b2b926a7 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa3xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa3xx.c @@ -184,7 +184,6 @@ static int pxa3xx_cpufreq_set(struct cpufreq_policy *policy, freqs.old = policy->cur; freqs.new = next->cpufreq_mhz * 1000; - freqs.cpu = policy->cpu; pr_debug("CPU frequency from %d MHz to %d MHz%s\n", freqs.old / 1000, freqs.new / 1000, @@ -193,14 +192,14 @@ static int pxa3xx_cpufreq_set(struct cpufreq_policy *policy, if (freqs.old == target_freq) return 0; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); local_irq_save(flags); __update_core_freq(next); __update_bus_freq(next); local_irq_restore(flags); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/arch/arm/mach-s3c24xx/cpufreq.c b/arch/arm/mach-s3c24xx/cpufreq.c index 5f181e733eee..3c0e78ede0da 100644 --- a/arch/arm/mach-s3c24xx/cpufreq.c +++ b/arch/arm/mach-s3c24xx/cpufreq.c @@ -204,7 +204,6 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy, freqs.old = cpu_cur.freq; freqs.new = cpu_new.freq; - freqs.freqs.cpu = 0; freqs.freqs.old = cpu_cur.freq.armclk / 1000; freqs.freqs.new = cpu_new.freq.armclk / 1000; @@ -218,9 +217,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy, s3c_cpufreq_updateclk(clk_pclk, cpu_new.freq.pclk); /* start the frequency change */ - - if (policy) - cpufreq_notify_transition(&freqs.freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs.freqs, CPUFREQ_PRECHANGE); /* If hclk is staying the same, then we do not need to * re-write the IO or the refresh timings whilst we are changing @@ -264,8 +261,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy, local_irq_restore(flags); /* notify everyone we've done this */ - if (policy) - cpufreq_notify_transition(&freqs.freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs.freqs, CPUFREQ_POSTCHANGE); s3c_freq_dbg("%s: finished\n", __func__); return 0; diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c index e8f4d1e19233..32687617c7a5 100644 --- a/arch/arm/mach-sa1100/cpu-sa1100.c +++ b/arch/arm/mach-sa1100/cpu-sa1100.c @@ -201,9 +201,8 @@ static int sa1100_target(struct cpufreq_policy *policy, freqs.old = cur; freqs.new = sa11x0_ppcr_to_freq(new_ppcr); - freqs.cpu = 0; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (freqs.new > cur) sa1100_update_dram_timings(cur, freqs.new); @@ -213,7 +212,7 @@ static int sa1100_target(struct cpufreq_policy *policy, if (freqs.new < cur) sa1100_update_dram_timings(cur, freqs.new); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/arch/arm/mach-sa1100/cpu-sa1110.c b/arch/arm/mach-sa1100/cpu-sa1110.c index 48c45b0c92bb..38a77330dc16 100644 --- a/arch/arm/mach-sa1100/cpu-sa1110.c +++ b/arch/arm/mach-sa1100/cpu-sa1110.c @@ -258,7 +258,6 @@ static int sa1110_target(struct cpufreq_policy *policy, freqs.old = sa11x0_getspeed(0); freqs.new = sa11x0_ppcr_to_freq(ppcr); - freqs.cpu = 0; sdram_calculate_timing(&sd, freqs.new, sdram); @@ -279,7 +278,7 @@ static int sa1110_target(struct cpufreq_policy *policy, sd.mdcas[2] = 0xaaaaaaaa; #endif - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* * The clock could be going away for some time. Set the SDRAMs @@ -327,7 +326,7 @@ static int sa1110_target(struct cpufreq_policy *policy, */ sdram_update_refresh(freqs.new, sdram); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index e3d6e15ff188..11ca730970f8 100644 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -106,7 +106,8 @@ out: return ret; } -static int tegra_update_cpu_speed(unsigned long rate) +static int tegra_update_cpu_speed(struct cpufreq_policy *policy, + unsigned long rate) { int ret = 0; struct cpufreq_freqs freqs; @@ -128,8 +129,7 @@ static int tegra_update_cpu_speed(unsigned long rate) else clk_set_rate(emc_clk, 100000000); /* emc 50Mhz */ - for_each_online_cpu(freqs.cpu) - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); #ifdef CONFIG_CPU_FREQ_DEBUG printk(KERN_DEBUG "cpufreq-tegra: transition: %u --> %u\n", @@ -143,8 +143,7 @@ static int tegra_update_cpu_speed(unsigned long rate) return ret; } - for_each_online_cpu(freqs.cpu) - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } @@ -181,7 +180,7 @@ static int tegra_target(struct cpufreq_policy *policy, target_cpu_speed[policy->cpu] = freq; - ret = tegra_update_cpu_speed(tegra_cpu_highest_speed()); + ret = tegra_update_cpu_speed(policy, tegra_cpu_highest_speed()); out: mutex_unlock(&tegra_cpu_lock); @@ -193,10 +192,12 @@ static int tegra_pm_notify(struct notifier_block *nb, unsigned long event, { mutex_lock(&tegra_cpu_lock); if (event == PM_SUSPEND_PREPARE) { + struct cpufreq_policy *policy = cpufreq_cpu_get(0); is_suspended = true; pr_info("Tegra cpufreq suspend: setting frequency to %d kHz\n", freq_table[0].frequency); - tegra_update_cpu_speed(freq_table[0].frequency); + tegra_update_cpu_speed(policy, freq_table[0].frequency); + cpufreq_cpu_put(policy); } else if (event == PM_POST_SUSPEND) { is_suspended = false; } diff --git a/arch/avr32/mach-at32ap/cpufreq.c b/arch/avr32/mach-at32ap/cpufreq.c index 18b765629a0c..654488723cb5 100644 --- a/arch/avr32/mach-at32ap/cpufreq.c +++ b/arch/avr32/mach-at32ap/cpufreq.c @@ -61,7 +61,6 @@ static int at32_set_target(struct cpufreq_policy *policy, freqs.old = at32_get_speed(0); freqs.new = (freq + 500) / 1000; - freqs.cpu = 0; freqs.flags = 0; if (!ref_freq) { @@ -69,7 +68,7 @@ static int at32_set_target(struct cpufreq_policy *policy, loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; } - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (freqs.old < freqs.new) boot_cpu_data.loops_per_jiffy = cpufreq_scale( loops_per_jiffy_ref, ref_freq, freqs.new); @@ -77,7 +76,7 @@ static int at32_set_target(struct cpufreq_policy *policy, if (freqs.new < freqs.old) boot_cpu_data.loops_per_jiffy = cpufreq_scale( loops_per_jiffy_ref, ref_freq, freqs.new); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); pr_debug("cpufreq: set frequency %lu Hz\n", freq); diff --git a/arch/blackfin/mach-common/cpufreq.c b/arch/blackfin/mach-common/cpufreq.c index d88bd31319e6..995511e80bef 100644 --- a/arch/blackfin/mach-common/cpufreq.c +++ b/arch/blackfin/mach-common/cpufreq.c @@ -127,13 +127,13 @@ unsigned long cpu_set_cclk(int cpu, unsigned long new) } #endif -static int bfin_target(struct cpufreq_policy *poli, +static int bfin_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { #ifndef CONFIG_BF60x unsigned int plldiv; #endif - unsigned int index, cpu; + unsigned int index; unsigned long cclk_hz; struct cpufreq_freqs freqs; static unsigned long lpj_ref; @@ -144,59 +144,48 @@ static int bfin_target(struct cpufreq_policy *poli, cycles_t cycles; #endif - for_each_online_cpu(cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (cpufreq_frequency_table_target(policy, bfin_freq_table, target_freq, + relation, &index)) + return -EINVAL; - if (!policy) - continue; + cclk_hz = bfin_freq_table[index].frequency; - if (cpufreq_frequency_table_target(policy, bfin_freq_table, - target_freq, relation, &index)) - return -EINVAL; + freqs.old = bfin_getfreq_khz(0); + freqs.new = cclk_hz; - cclk_hz = bfin_freq_table[index].frequency; + pr_debug("cpufreq: changing cclk to %lu; target = %u, oldfreq = %u\n", + cclk_hz, target_freq, freqs.old); - freqs.old = bfin_getfreq_khz(0); - freqs.new = cclk_hz; - freqs.cpu = cpu; - - pr_debug("cpufreq: changing cclk to %lu; target = %u, oldfreq = %u\n", - cclk_hz, target_freq, freqs.old); - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (cpu == CPUFREQ_CPU) { + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); #ifndef CONFIG_BF60x - plldiv = (bfin_read_PLL_DIV() & SSEL) | - dpm_state_table[index].csel; - bfin_write_PLL_DIV(plldiv); + plldiv = (bfin_read_PLL_DIV() & SSEL) | dpm_state_table[index].csel; + bfin_write_PLL_DIV(plldiv); #else - ret = cpu_set_cclk(cpu, freqs.new * 1000); - if (ret != 0) { - WARN_ONCE(ret, "cpufreq set freq failed %d\n", ret); - break; - } + ret = cpu_set_cclk(policy->cpu, freqs.new * 1000); + if (ret != 0) { + WARN_ONCE(ret, "cpufreq set freq failed %d\n", ret); + return ret; + } #endif - on_each_cpu(bfin_adjust_core_timer, &index, 1); + on_each_cpu(bfin_adjust_core_timer, &index, 1); #if defined(CONFIG_CYCLES_CLOCKSOURCE) - cycles = get_cycles(); - SSYNC(); - cycles += 10; /* ~10 cycles we lose after get_cycles() */ - __bfin_cycles_off += - (cycles << __bfin_cycles_mod) - (cycles << index); - __bfin_cycles_mod = index; + cycles = get_cycles(); + SSYNC(); + cycles += 10; /* ~10 cycles we lose after get_cycles() */ + __bfin_cycles_off += (cycles << __bfin_cycles_mod) - (cycles << index); + __bfin_cycles_mod = index; #endif - if (!lpj_ref_freq) { - lpj_ref = loops_per_jiffy; - lpj_ref_freq = freqs.old; - } - if (freqs.new != freqs.old) { - loops_per_jiffy = cpufreq_scale(lpj_ref, - lpj_ref_freq, freqs.new); - } - } - /* TODO: just test case for cycles clock source, remove later */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + if (!lpj_ref_freq) { + lpj_ref = loops_per_jiffy; + lpj_ref_freq = freqs.old; } + if (freqs.new != freqs.old) { + loops_per_jiffy = cpufreq_scale(lpj_ref, + lpj_ref_freq, freqs.new); + } + + /* TODO: just test case for cycles clock source, remove later */ + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); pr_debug("cpufreq: done\n"); return ret; diff --git a/arch/cris/arch-v32/mach-a3/cpufreq.c b/arch/cris/arch-v32/mach-a3/cpufreq.c index ee391ecb5bc9..ee142c490575 100644 --- a/arch/cris/arch-v32/mach-a3/cpufreq.c +++ b/arch/cris/arch-v32/mach-a3/cpufreq.c @@ -27,23 +27,17 @@ static unsigned int cris_freq_get_cpu_frequency(unsigned int cpu) return clk_ctrl.pll ? 200000 : 6000; } -static void cris_freq_set_cpu_state(unsigned int state) +static void cris_freq_set_cpu_state(struct cpufreq_policy *policy, + unsigned int state) { - int i = 0; struct cpufreq_freqs freqs; reg_clkgen_rw_clk_ctrl clk_ctrl; clk_ctrl = REG_RD(clkgen, regi_clkgen, rw_clk_ctrl); -#ifdef CONFIG_SMP - for_each_present_cpu(i) -#endif - { - freqs.old = cris_freq_get_cpu_frequency(i); - freqs.new = cris_freq_table[state].frequency; - freqs.cpu = i; - } + freqs.old = cris_freq_get_cpu_frequency(policy->cpu); + freqs.new = cris_freq_table[state].frequency; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); local_irq_disable(); @@ -57,7 +51,7 @@ static void cris_freq_set_cpu_state(unsigned int state) local_irq_enable(); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); }; static int cris_freq_verify(struct cpufreq_policy *policy) @@ -75,7 +69,7 @@ static int cris_freq_target(struct cpufreq_policy *policy, target_freq, relation, &newstate)) return -EINVAL; - cris_freq_set_cpu_state(newstate); + cris_freq_set_cpu_state(policy, newstate); return 0; } diff --git a/arch/cris/arch-v32/mach-fs/cpufreq.c b/arch/cris/arch-v32/mach-fs/cpufreq.c index d92cf70d1cbe..12952235d5db 100644 --- a/arch/cris/arch-v32/mach-fs/cpufreq.c +++ b/arch/cris/arch-v32/mach-fs/cpufreq.c @@ -27,20 +27,17 @@ static unsigned int cris_freq_get_cpu_frequency(unsigned int cpu) return clk_ctrl.pll ? 200000 : 6000; } -static void cris_freq_set_cpu_state(unsigned int state) +static void cris_freq_set_cpu_state(struct cpufreq_policy *policy, + unsigned int state) { - int i; struct cpufreq_freqs freqs; reg_config_rw_clk_ctrl clk_ctrl; clk_ctrl = REG_RD(config, regi_config, rw_clk_ctrl); - for_each_possible_cpu(i) { - freqs.old = cris_freq_get_cpu_frequency(i); - freqs.new = cris_freq_table[state].frequency; - freqs.cpu = i; - } + freqs.old = cris_freq_get_cpu_frequency(policy->cpu); + freqs.new = cris_freq_table[state].frequency; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); local_irq_disable(); @@ -54,7 +51,7 @@ static void cris_freq_set_cpu_state(unsigned int state) local_irq_enable(); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); }; static int cris_freq_verify(struct cpufreq_policy *policy) @@ -71,7 +68,7 @@ static int cris_freq_target(struct cpufreq_policy *policy, (policy, cris_freq_table, target_freq, relation, &newstate)) return -EINVAL; - cris_freq_set_cpu_state(newstate); + cris_freq_set_cpu_state(policy, newstate); return 0; } diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c index f09b174244d5..4700fef8d1fa 100644 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -137,7 +137,7 @@ migrate_end: static int processor_set_freq ( struct cpufreq_acpi_io *data, - unsigned int cpu, + struct cpufreq_policy *policy, int state) { int ret = 0; @@ -149,8 +149,8 @@ processor_set_freq ( pr_debug("processor_set_freq\n"); saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - if (smp_processor_id() != cpu) { + set_cpus_allowed_ptr(current, cpumask_of(policy->cpu)); + if (smp_processor_id() != policy->cpu) { retval = -EAGAIN; goto migrate_end; } @@ -170,12 +170,11 @@ processor_set_freq ( data->acpi_data.state, state); /* cpufreq frequency struct */ - cpufreq_freqs.cpu = cpu; cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency; cpufreq_freqs.new = data->freq_table[state].frequency; /* notify cpufreq */ - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &cpufreq_freqs, CPUFREQ_PRECHANGE); /* * First we write the target state's 'control' value to the @@ -189,17 +188,20 @@ processor_set_freq ( ret = processor_set_pstate(value); if (ret) { unsigned int tmp = cpufreq_freqs.new; - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &cpufreq_freqs, + CPUFREQ_POSTCHANGE); cpufreq_freqs.new = cpufreq_freqs.old; cpufreq_freqs.old = tmp; - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &cpufreq_freqs, + CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &cpufreq_freqs, + CPUFREQ_POSTCHANGE); printk(KERN_WARNING "Transition failed with error %d\n", ret); retval = -ENODEV; goto migrate_end; } - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &cpufreq_freqs, CPUFREQ_POSTCHANGE); data->acpi_data.state = state; @@ -240,7 +242,7 @@ acpi_cpufreq_target ( if (result) return (result); - result = processor_set_freq(data, policy->cpu, next_state); + result = processor_set_freq(data, policy, next_state); return (result); } diff --git a/arch/mips/kernel/cpufreq/loongson2_cpufreq.c b/arch/mips/kernel/cpufreq/loongson2_cpufreq.c index 3237c5235f9c..bafda7063f03 100644 --- a/arch/mips/kernel/cpufreq/loongson2_cpufreq.c +++ b/arch/mips/kernel/cpufreq/loongson2_cpufreq.c @@ -80,7 +80,6 @@ static int loongson2_cpufreq_target(struct cpufreq_policy *policy, pr_debug("cpufreq: requested frequency %u Hz\n", target_freq * 1000); - freqs.cpu = cpu; freqs.old = loongson2_cpufreq_get(cpu); freqs.new = freq; freqs.flags = 0; @@ -89,7 +88,7 @@ static int loongson2_cpufreq_target(struct cpufreq_policy *policy, return 0; /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); set_cpus_allowed_ptr(current, &cpus_allowed); @@ -97,7 +96,7 @@ static int loongson2_cpufreq_target(struct cpufreq_policy *policy, clk_set_rate(cpuclk, freq); /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); pr_debug("cpufreq: set frequency %u kHz\n", freq); diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c index d4c39e32f147..718c6a33023d 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c @@ -156,10 +156,9 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, freqs.old = policy->cur; freqs.new = cbe_freqs[cbe_pmode_new].frequency; - freqs.cpu = policy->cpu; mutex_lock(&cbe_switch_mutex); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); pr_debug("setting frequency for cpu %d to %d kHz, " \ "1/%d of max frequency\n", @@ -169,7 +168,7 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, rc = set_pmode(policy->cpu, cbe_pmode_new); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); mutex_unlock(&cbe_switch_mutex); return rc; diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c index 890f30e70f98..be1e7958909e 100644 --- a/arch/powerpc/platforms/pasemi/cpufreq.c +++ b/arch/powerpc/platforms/pasemi/cpufreq.c @@ -273,10 +273,9 @@ static int pas_cpufreq_target(struct cpufreq_policy *policy, freqs.old = policy->cur; freqs.new = pas_freqs[pas_astate_new].frequency; - freqs.cpu = policy->cpu; mutex_lock(&pas_switch_mutex); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", policy->cpu, @@ -288,7 +287,7 @@ static int pas_cpufreq_target(struct cpufreq_policy *policy, for_each_online_cpu(i) set_astate(i, pas_astate_new); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); mutex_unlock(&pas_switch_mutex); ppc_proc_freq = freqs.new * 1000ul; diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c index 311b804353b1..3104fad82480 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_32.c +++ b/arch/powerpc/platforms/powermac/cpufreq_32.c @@ -335,7 +335,8 @@ static int pmu_set_cpu_speed(int low_speed) return 0; } -static int do_set_cpu_speed(int speed_mode, int notify) +static int do_set_cpu_speed(struct cpufreq_policy *policy, int speed_mode, + int notify) { struct cpufreq_freqs freqs; unsigned long l3cr; @@ -343,13 +344,12 @@ static int do_set_cpu_speed(int speed_mode, int notify) freqs.old = cur_freq; freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; - freqs.cpu = smp_processor_id(); if (freqs.old == freqs.new) return 0; if (notify) - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (speed_mode == CPUFREQ_LOW && cpu_has_feature(CPU_FTR_L3CR)) { l3cr = _get_L3CR(); @@ -366,7 +366,7 @@ static int do_set_cpu_speed(int speed_mode, int notify) _set_L3CR(prev_l3cr); } if (notify) - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; return 0; @@ -393,7 +393,7 @@ static int pmac_cpufreq_target( struct cpufreq_policy *policy, target_freq, relation, &newstate)) return -EINVAL; - rc = do_set_cpu_speed(newstate, 1); + rc = do_set_cpu_speed(policy, newstate, 1); ppc_proc_freq = cur_freq * 1000ul; return rc; @@ -442,7 +442,7 @@ static int pmac_cpufreq_suspend(struct cpufreq_policy *policy) no_schedule = 1; sleep_freq = cur_freq; if (cur_freq == low_freq && !is_pmu_based) - do_set_cpu_speed(CPUFREQ_HIGH, 0); + do_set_cpu_speed(policy, CPUFREQ_HIGH, 0); return 0; } @@ -458,7 +458,7 @@ static int pmac_cpufreq_resume(struct cpufreq_policy *policy) * is that we force a switch to whatever it was, which is * probably high speed due to our suspend() routine */ - do_set_cpu_speed(sleep_freq == low_freq ? + do_set_cpu_speed(policy, sleep_freq == low_freq ? CPUFREQ_LOW : CPUFREQ_HIGH, 0); ppc_proc_freq = cur_freq * 1000ul; diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c index 9650c6029c82..7ba423431cfe 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ b/arch/powerpc/platforms/powermac/cpufreq_64.c @@ -339,11 +339,10 @@ static int g5_cpufreq_target(struct cpufreq_policy *policy, freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; freqs.new = g5_cpu_freqs[newstate].frequency; - freqs.cpu = 0; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); rc = g5_switch_freq(newstate); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); mutex_unlock(&g5_switch_mutex); diff --git a/arch/sh/kernel/cpufreq.c b/arch/sh/kernel/cpufreq.c index e68b45b6f3f9..2c7bd94f95ee 100644 --- a/arch/sh/kernel/cpufreq.c +++ b/arch/sh/kernel/cpufreq.c @@ -69,15 +69,14 @@ static int sh_cpufreq_target(struct cpufreq_policy *policy, dev_dbg(dev, "requested frequency %u Hz\n", target_freq * 1000); - freqs.cpu = cpu; freqs.old = sh_cpufreq_get(cpu); freqs.new = (freq + 500) / 1000; freqs.flags = 0; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); set_cpus_allowed_ptr(current, &cpus_allowed); clk_set_rate(cpuclk, freq); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); dev_dbg(dev, "set frequency %lu Hz\n", freq); diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c index 489fc15f3194..abe963d7b87c 100644 --- a/arch/sparc/kernel/us2e_cpufreq.c +++ b/arch/sparc/kernel/us2e_cpufreq.c @@ -248,8 +248,10 @@ static unsigned int us2e_freq_get(unsigned int cpu) return clock_tick / estar_to_divisor(estar); } -static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) +static void us2e_set_cpu_divider_index(struct cpufreq_policy *policy, + unsigned int index) { + unsigned int cpu = policy->cpu; unsigned long new_bits, new_freq; unsigned long clock_tick, divisor, old_divisor, estar; cpumask_t cpus_allowed; @@ -272,14 +274,13 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) freqs.old = clock_tick / old_divisor; freqs.new = new_freq; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (old_divisor != divisor) us2e_transition(estar, new_bits, clock_tick * 1000, old_divisor, divisor); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); set_cpus_allowed_ptr(current, &cpus_allowed); } @@ -295,7 +296,7 @@ static int us2e_freq_target(struct cpufreq_policy *policy, target_freq, relation, &new_index)) return -EINVAL; - us2e_set_cpu_divider_index(policy->cpu, new_index); + us2e_set_cpu_divider_index(policy, new_index); return 0; } @@ -335,7 +336,7 @@ static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy) static int us2e_freq_cpu_exit(struct cpufreq_policy *policy) { if (cpufreq_us2e_driver) - us2e_set_cpu_divider_index(policy->cpu, 0); + us2e_set_cpu_divider_index(policy, 0); return 0; } diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c index eb1624b931d9..7ceb9c8458f0 100644 --- a/arch/sparc/kernel/us3_cpufreq.c +++ b/arch/sparc/kernel/us3_cpufreq.c @@ -96,8 +96,10 @@ static unsigned int us3_freq_get(unsigned int cpu) return ret; } -static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) +static void us3_set_cpu_divider_index(struct cpufreq_policy *policy, + unsigned int index) { + unsigned int cpu = policy->cpu; unsigned long new_bits, new_freq, reg; cpumask_t cpus_allowed; struct cpufreq_freqs freqs; @@ -131,14 +133,13 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) freqs.old = get_current_freq(cpu, reg); freqs.new = new_freq; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); reg &= ~SAFARI_CFG_DIV_MASK; reg |= new_bits; write_safari_cfg(reg); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); set_cpus_allowed_ptr(current, &cpus_allowed); } @@ -156,7 +157,7 @@ static int us3_freq_target(struct cpufreq_policy *policy, &new_index)) return -EINVAL; - us3_set_cpu_divider_index(policy->cpu, new_index); + us3_set_cpu_divider_index(policy, new_index); return 0; } @@ -192,7 +193,7 @@ static int __init us3_freq_cpu_init(struct cpufreq_policy *policy) static int us3_freq_cpu_exit(struct cpufreq_policy *policy) { if (cpufreq_us3_driver) - us3_set_cpu_divider_index(policy->cpu, 0); + us3_set_cpu_divider_index(policy, 0); return 0; } diff --git a/arch/unicore32/kernel/cpu-ucv2.c b/arch/unicore32/kernel/cpu-ucv2.c index 4a99f62584c7..ba5a71ce2d71 100644 --- a/arch/unicore32/kernel/cpu-ucv2.c +++ b/arch/unicore32/kernel/cpu-ucv2.c @@ -52,15 +52,14 @@ static int ucv2_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; struct clk *mclk = clk_get(NULL, "MAIN_CLK"); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (!clk_set_rate(mclk, target_freq * 1000)) { freqs.old = cur; freqs.new = target_freq; - freqs.cpu = 0; } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 57a8774f0b4e..11b8b4b54ceb 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -423,7 +423,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, struct drv_cmd cmd; unsigned int next_state = 0; /* Index into freq_table */ unsigned int next_perf_state = 0; /* Index into perf table */ - unsigned int i; int result = 0; pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); @@ -486,10 +485,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); drv_write(&cmd); @@ -502,10 +498,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); perf->state = next_perf_state; out: diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index a7e51bd20502..65618536abfa 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -46,7 +46,7 @@ static int cpu0_set_target(struct cpufreq_policy *policy, struct opp *opp; unsigned long volt = 0, volt_old = 0, tol = 0; long freq_Hz; - unsigned int index, cpu; + unsigned int index; int ret; ret = cpufreq_frequency_table_target(policy, freq_table, target_freq, @@ -66,10 +66,7 @@ static int cpu0_set_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; - for_each_online_cpu(cpu) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (cpu_reg) { rcu_read_lock(); @@ -121,10 +118,7 @@ static int cpu0_set_target(struct cpufreq_policy *policy, } post_notify: - for_each_online_cpu(cpu) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return ret; } diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c index 13d311ee08b3..224a4787ecc2 100644 --- a/drivers/cpufreq/cpufreq-nforce2.c +++ b/drivers/cpufreq/cpufreq-nforce2.c @@ -263,7 +263,6 @@ static int nforce2_target(struct cpufreq_policy *policy, freqs.old = nforce2_get(policy->cpu); freqs.new = target_fsb * fid * 100; - freqs.cpu = 0; /* Only one CPU on nForce2 platforms */ if (freqs.old == freqs.new) return 0; @@ -271,7 +270,7 @@ static int nforce2_target(struct cpufreq_policy *policy, pr_debug("Old CPU frequency %d kHz, new %d kHz\n", freqs.old, freqs.new); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* Disable IRQs */ /* local_irq_save(flags); */ @@ -286,7 +285,7 @@ static int nforce2_target(struct cpufreq_policy *policy, /* Enable IRQs */ /* local_irq_restore(flags); */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 85963fc48a5f..0198cd0a60ce 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -249,19 +249,9 @@ static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) #endif -/** - * cpufreq_notify_transition - call notifier chain and adjust_jiffies - * on frequency transition. - * - * This function calls the transition notifiers and the "adjust_jiffies" - * function. It is called twice on all CPU frequency changes that have - * external effects. - */ -void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) +void __cpufreq_notify_transition(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, unsigned int state) { - struct cpufreq_policy *policy; - unsigned long flags; - BUG_ON(irqs_disabled()); if (cpufreq_disabled()) @@ -271,10 +261,6 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) pr_debug("notification %u of frequency transition to %u kHz\n", state, freqs->new); - read_lock_irqsave(&cpufreq_driver_lock, flags); - policy = per_cpu(cpufreq_cpu_data, freqs->cpu); - read_unlock_irqrestore(&cpufreq_driver_lock, flags); - switch (state) { case CPUFREQ_PRECHANGE: @@ -308,6 +294,20 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) break; } } +/** + * cpufreq_notify_transition - call notifier chain and adjust_jiffies + * on frequency transition. + * + * This function calls the transition notifiers and the "adjust_jiffies" + * function. It is called twice on all CPU frequency changes that have + * external effects. + */ +void cpufreq_notify_transition(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, unsigned int state) +{ + for_each_cpu(freqs->cpu, policy->cpus) + __cpufreq_notify_transition(policy, freqs, state); +} EXPORT_SYMBOL_GPL(cpufreq_notify_transition); @@ -1141,16 +1141,23 @@ static void handle_update(struct work_struct *work) static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigned int new_freq) { + struct cpufreq_policy *policy; struct cpufreq_freqs freqs; + unsigned long flags; + pr_debug("Warning: CPU frequency out of sync: cpufreq and timing " "core thinks of %u, is %u kHz.\n", old_freq, new_freq); - freqs.cpu = cpu; freqs.old = old_freq; freqs.new = new_freq; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + read_lock_irqsave(&cpufreq_driver_lock, flags); + policy = per_cpu(cpufreq_cpu_data, cpu); + read_unlock_irqrestore(&cpufreq_driver_lock, flags); + + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); } diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c index 72f0c3efa76e..7192a6df94c0 100644 --- a/drivers/cpufreq/dbx500-cpufreq.c +++ b/drivers/cpufreq/dbx500-cpufreq.c @@ -55,8 +55,7 @@ static int dbx500_cpufreq_target(struct cpufreq_policy *policy, return 0; /* pre-change notification */ - for_each_cpu(freqs.cpu, policy->cpus) - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* update armss clk frequency */ ret = clk_set_rate(armss_clk, freqs.new * 1000); @@ -68,8 +67,7 @@ static int dbx500_cpufreq_target(struct cpufreq_policy *policy, } /* post change notification */ - for_each_cpu(freqs.cpu, policy->cpus) - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index 3fffbe6025cd..37380fb92621 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -104,7 +104,7 @@ static unsigned int eps_get(unsigned int cpu) } static int eps_set_state(struct eps_cpu_data *centaur, - unsigned int cpu, + struct cpufreq_policy *policy, u32 dest_state) { struct cpufreq_freqs freqs; @@ -112,10 +112,9 @@ static int eps_set_state(struct eps_cpu_data *centaur, int err = 0; int i; - freqs.old = eps_get(cpu); + freqs.old = eps_get(policy->cpu); freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* Wait while CPU is busy */ rdmsr(MSR_IA32_PERF_STATUS, lo, hi); @@ -162,7 +161,7 @@ postchange: current_multiplier); } #endif - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return err; } @@ -190,7 +189,7 @@ static int eps_target(struct cpufreq_policy *policy, /* Make frequency transition */ dest_state = centaur->freq_table[newstate].index & 0xffff; - ret = eps_set_state(centaur, cpu, dest_state); + ret = eps_set_state(centaur, policy, dest_state); if (ret) printk(KERN_ERR "eps: Timeout!\n"); return ret; diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c index 960671fd3d7e..658d860344b0 100644 --- a/drivers/cpufreq/elanfreq.c +++ b/drivers/cpufreq/elanfreq.c @@ -117,15 +117,15 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) * There is no return value. */ -static void elanfreq_set_cpu_state(unsigned int state) +static void elanfreq_set_cpu_state(struct cpufreq_policy *policy, + unsigned int state) { struct cpufreq_freqs freqs; freqs.old = elanfreq_get_cpu_frequency(0); freqs.new = elan_multiplier[state].clock; - freqs.cpu = 0; /* elanfreq.c is UP only driver */ - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", elan_multiplier[state].clock); @@ -161,7 +161,7 @@ static void elanfreq_set_cpu_state(unsigned int state) udelay(10000); local_irq_enable(); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); }; @@ -188,7 +188,7 @@ static int elanfreq_target(struct cpufreq_policy *policy, target_freq, relation, &newstate)) return -EINVAL; - elanfreq_set_cpu_state(newstate); + elanfreq_set_cpu_state(policy, newstate); return 0; } diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c index 78057a357ddb..c0c4ce53b9e9 100644 --- a/drivers/cpufreq/exynos-cpufreq.c +++ b/drivers/cpufreq/exynos-cpufreq.c @@ -70,7 +70,6 @@ static int exynos_cpufreq_scale(unsigned int target_freq) freqs.old = policy->cur; freqs.new = target_freq; - freqs.cpu = policy->cpu; if (freqs.new == freqs.old) goto out; @@ -105,8 +104,7 @@ static int exynos_cpufreq_scale(unsigned int target_freq) } arm_volt = volt_table[index]; - for_each_cpu(freqs.cpu, policy->cpus) - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* When the new frequency is higher than current frequency */ if ((freqs.new > freqs.old) && !safe_arm_volt) { @@ -131,8 +129,7 @@ static int exynos_cpufreq_scale(unsigned int target_freq) exynos_info->set_freq(old_index, index); - for_each_cpu(freqs.cpu, policy->cpus) - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); /* When the new frequency is lower than current frequency */ if ((freqs.new < freqs.old) || diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c index 456bee058fe6..3dfc99b9ca86 100644 --- a/drivers/cpufreq/gx-suspmod.c +++ b/drivers/cpufreq/gx-suspmod.c @@ -251,14 +251,13 @@ static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, * set cpu speed in khz. **/ -static void gx_set_cpuspeed(unsigned int khz) +static void gx_set_cpuspeed(struct cpufreq_policy *policy, unsigned int khz) { u8 suscfg, pmer1; unsigned int new_khz; unsigned long flags; struct cpufreq_freqs freqs; - freqs.cpu = 0; freqs.old = gx_get_cpuspeed(0); new_khz = gx_validate_speed(khz, &gx_params->on_duration, @@ -266,11 +265,9 @@ static void gx_set_cpuspeed(unsigned int khz) freqs.new = new_khz; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); local_irq_save(flags); - - if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ switch (gx_params->cs55x0->device) { @@ -317,7 +314,7 @@ static void gx_set_cpuspeed(unsigned int khz) gx_params->pci_suscfg = suscfg; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", gx_params->on_duration * 32, gx_params->off_duration * 32); @@ -397,7 +394,7 @@ static int cpufreq_gx_target(struct cpufreq_policy *policy, tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); } - gx_set_cpuspeed(tmp_freq); + gx_set_cpuspeed(policy, tmp_freq); return 0; } diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 54e336de373b..b78bc35973ba 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -50,7 +50,7 @@ static int imx6q_set_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; struct opp *opp; unsigned long freq_hz, volt, volt_old; - unsigned int index, cpu; + unsigned int index; int ret; ret = cpufreq_frequency_table_target(policy, freq_table, target_freq, @@ -68,10 +68,7 @@ static int imx6q_set_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; - for_each_online_cpu(cpu) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); rcu_read_lock(); opp = opp_find_freq_ceil(cpu_dev, &freq_hz); @@ -166,10 +163,7 @@ static int imx6q_set_target(struct cpufreq_policy *policy, } } - for_each_online_cpu(cpu) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index 60524764fe4e..d36ea8dc96eb 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -55,7 +55,8 @@ static unsigned int kirkwood_cpufreq_get_cpu_frequency(unsigned int cpu) return kirkwood_freq_table[0].frequency; } -static void kirkwood_cpufreq_set_cpu_state(unsigned int index) +static void kirkwood_cpufreq_set_cpu_state(struct cpufreq_policy *policy, + unsigned int index) { struct cpufreq_freqs freqs; unsigned int state = kirkwood_freq_table[index].index; @@ -63,9 +64,8 @@ static void kirkwood_cpufreq_set_cpu_state(unsigned int index) freqs.old = kirkwood_cpufreq_get_cpu_frequency(0); freqs.new = kirkwood_freq_table[index].frequency; - freqs.cpu = 0; /* Kirkwood is UP */ - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); dev_dbg(priv.dev, "Attempting to set frequency to %i KHz\n", kirkwood_freq_table[index].frequency); @@ -99,7 +99,7 @@ static void kirkwood_cpufreq_set_cpu_state(unsigned int index) local_irq_enable(); } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); }; static int kirkwood_cpufreq_verify(struct cpufreq_policy *policy) @@ -117,7 +117,7 @@ static int kirkwood_cpufreq_target(struct cpufreq_policy *policy, target_freq, relation, &index)) return -EINVAL; - kirkwood_cpufreq_set_cpu_state(index); + kirkwood_cpufreq_set_cpu_state(policy, index); return 0; } diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 1180d536d1eb..b448638e34de 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -242,7 +242,8 @@ static void do_powersaver(int cx_address, unsigned int mults_index, * Sets a new clock ratio. */ -static void longhaul_setstate(unsigned int table_index) +static void longhaul_setstate(struct cpufreq_policy *policy, + unsigned int table_index) { unsigned int mults_index; int speed, mult; @@ -267,9 +268,8 @@ static void longhaul_setstate(unsigned int table_index) freqs.old = calc_speed(longhaul_get_cpu_mult()); freqs.new = speed; - freqs.cpu = 0; /* longhaul.c is UP only driver */ - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", fsb, mult/10, mult%10, print_speed(speed/1000)); @@ -386,7 +386,7 @@ retry_loop: } } /* Report true CPU frequency */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); if (!bm_timeout) printk(KERN_INFO PFX "Warning: Timeout while waiting for " @@ -648,7 +648,7 @@ static int longhaul_target(struct cpufreq_policy *policy, return 0; if (!can_scale_voltage) - longhaul_setstate(table_index); + longhaul_setstate(policy, table_index); else { /* On test system voltage transitions exceeding single * step up or down were turning motherboard off. Both @@ -663,7 +663,7 @@ static int longhaul_target(struct cpufreq_policy *policy, while (i != table_index) { vid = (longhaul_table[i].index >> 8) & 0x1f; if (vid != current_vid) { - longhaul_setstate(i); + longhaul_setstate(policy, i); current_vid = vid; msleep(200); } @@ -672,7 +672,7 @@ static int longhaul_target(struct cpufreq_policy *policy, else i--; } - longhaul_setstate(table_index); + longhaul_setstate(policy, table_index); } longhaul_index = table_index; return 0; @@ -998,15 +998,17 @@ static int __init longhaul_init(void) static void __exit longhaul_exit(void) { + struct cpufreq_policy *policy = cpufreq_cpu_get(0); int i; for (i = 0; i < numscales; i++) { if (mults[i] == maxmult) { - longhaul_setstate(i); + longhaul_setstate(policy, i); break; } } + cpufreq_cpu_put(policy); cpufreq_unregister_driver(&longhaul_driver); kfree(longhaul_table); } diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index d4c4989823dc..cdd62915efaf 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -158,11 +158,10 @@ static int maple_cpufreq_target(struct cpufreq_policy *policy, freqs.old = maple_cpu_freqs[maple_pmode_cur].frequency; freqs.new = maple_cpu_freqs[newstate].frequency; - freqs.cpu = 0; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); rc = maple_scom_switch_freq(newstate); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); mutex_unlock(&maple_switch_mutex); diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 9128c07bafba..b610edd820b1 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -88,16 +88,12 @@ static int omap_target(struct cpufreq_policy *policy, } freqs.old = omap_getspeed(policy->cpu); - freqs.cpu = policy->cpu; if (freqs.old == freqs.new && policy->cur == freqs.new) return ret; /* notifiers */ - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); freq = freqs.new * 1000; ret = clk_round_rate(mpu_clk, freq); @@ -157,10 +153,7 @@ static int omap_target(struct cpufreq_policy *policy, done: /* notifiers */ - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return ret; } diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index 827629c9aad7..4b2e7737b939 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -125,10 +125,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, return 0; /* notifiers */ - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* run on each logical CPU, * see section 13.15.3 of IA32 Intel Architecture Software @@ -138,10 +135,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); /* notifiers */ - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 503996a94a6a..0de00081a81e 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -215,8 +215,7 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy, (pcch_virt_addr + pcc_cpu_data->input_offset)); freqs.new = target_freq; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); input_buffer = 0x1 | (((target_freq * 100) / (ioread32(&pcch_hdr->nominal) * 1000)) << 8); @@ -237,7 +236,7 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy, } iowrite16(0, &pcch_hdr->status); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu); spin_unlock(&pcc_lock); diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index af23e0b9ec92..ea0222a45b7b 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -68,7 +68,8 @@ static int powernow_k6_get_cpu_multiplier(void) * * Tries to change the PowerNow! multiplier */ -static void powernow_k6_set_state(unsigned int best_i) +static void powernow_k6_set_state(struct cpufreq_policy *policy, + unsigned int best_i) { unsigned long outvalue = 0, invalue = 0; unsigned long msrval; @@ -81,9 +82,8 @@ static void powernow_k6_set_state(unsigned int best_i) freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); freqs.new = busfreq * clock_ratio[best_i].index; - freqs.cpu = 0; /* powernow-k6.c is UP only driver */ - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* we now need to transform best_i to the BVC format, see AMD#23446 */ @@ -98,7 +98,7 @@ static void powernow_k6_set_state(unsigned int best_i) msrval = POWERNOW_IOPORT + 0x0; wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return; } @@ -136,7 +136,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy, target_freq, relation, &newstate)) return -EINVAL; - powernow_k6_set_state(newstate); + powernow_k6_set_state(policy, newstate); return 0; } @@ -182,7 +182,7 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) unsigned int i; for (i = 0; i < 8; i++) { if (i == max_multiplier) - powernow_k6_set_state(i); + powernow_k6_set_state(policy, i); } cpufreq_frequency_table_put_attr(policy->cpu); return 0; diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 334cc2f1e9f1..53888dacbe58 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -248,7 +248,7 @@ static void change_VID(int vid) } -static void change_speed(unsigned int index) +static void change_speed(struct cpufreq_policy *policy, unsigned int index) { u8 fid, vid; struct cpufreq_freqs freqs; @@ -263,15 +263,13 @@ static void change_speed(unsigned int index) fid = powernow_table[index].index & 0xFF; vid = (powernow_table[index].index & 0xFF00) >> 8; - freqs.cpu = 0; - rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); cfid = fidvidstatus.bits.CFID; freqs.old = fsb * fid_codes[cfid] / 10; freqs.new = powernow_table[index].frequency; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* Now do the magic poking into the MSRs. */ @@ -292,7 +290,7 @@ static void change_speed(unsigned int index) if (have_a0 == 1) local_irq_enable(); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); } @@ -546,7 +544,7 @@ static int powernow_target(struct cpufreq_policy *policy, relation, &newstate)) return -EINVAL; - change_speed(newstate); + change_speed(policy, newstate); return 0; } diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index d13a13678b5f..52137a323965 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -928,9 +928,10 @@ static int get_transition_latency(struct powernow_k8_data *data) static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) { + struct cpufreq_policy *policy; u32 fid = 0; u32 vid = 0; - int res, i; + int res; struct cpufreq_freqs freqs; pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index); @@ -959,10 +960,10 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } + policy = cpufreq_cpu_get(smp_processor_id()); + cpufreq_cpu_put(policy); + + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); res = transition_fid_vid(data, fid, vid); if (res) @@ -970,10 +971,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.new = find_khz_freq_from_fid(data->currfid); - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return res; } diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c index bcc053bc02c4..4f1881eee3f1 100644 --- a/drivers/cpufreq/s3c2416-cpufreq.c +++ b/drivers/cpufreq/s3c2416-cpufreq.c @@ -256,7 +256,6 @@ static int s3c2416_cpufreq_set_target(struct cpufreq_policy *policy, goto out; } - freqs.cpu = 0; freqs.flags = 0; freqs.old = s3c_freq->is_dvs ? FREQ_DVS : clk_get_rate(s3c_freq->armclk) / 1000; @@ -274,7 +273,7 @@ static int s3c2416_cpufreq_set_target(struct cpufreq_policy *policy, if (!to_dvs && freqs.old == freqs.new) goto out; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (to_dvs) { pr_debug("cpufreq: enter dvs\n"); @@ -287,7 +286,7 @@ static int s3c2416_cpufreq_set_target(struct cpufreq_policy *policy, ret = s3c2416_cpufreq_set_armdiv(s3c_freq, freqs.new); } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); out: mutex_unlock(&cpufreq_lock); diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c index 6f9490b3c356..27cacb524796 100644 --- a/drivers/cpufreq/s3c64xx-cpufreq.c +++ b/drivers/cpufreq/s3c64xx-cpufreq.c @@ -84,7 +84,6 @@ static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy, if (ret != 0) return ret; - freqs.cpu = 0; freqs.old = clk_get_rate(armclk) / 1000; freqs.new = s3c64xx_freq_table[i].frequency; freqs.flags = 0; @@ -95,7 +94,7 @@ static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy, pr_debug("Transition %d-%dkHz\n", freqs.old, freqs.new); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); #ifdef CONFIG_REGULATOR if (vddarm && freqs.new > freqs.old) { @@ -117,7 +116,7 @@ static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy, goto err; } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); #ifdef CONFIG_REGULATOR if (vddarm && freqs.new < freqs.old) { @@ -141,7 +140,7 @@ err_clk: if (clk_set_rate(armclk, freqs.old * 1000) < 0) pr_err("Failed to restore original clock rate\n"); err: - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return ret; } diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index a484aaea9809..5c7757073793 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -229,7 +229,6 @@ static int s5pv210_target(struct cpufreq_policy *policy, } freqs.new = s5pv210_freq_table[index].frequency; - freqs.cpu = 0; if (freqs.new == freqs.old) goto exit; @@ -256,7 +255,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, goto exit; } - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); /* Check if there need to change PLL */ if ((index == L0) || (priv_index == L0)) @@ -468,7 +467,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, } } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); if (freqs.new < freqs.old) { regulator_set_voltage(int_regulator, diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c index e42e073cd9b8..f740b134d27b 100644 --- a/drivers/cpufreq/sc520_freq.c +++ b/drivers/cpufreq/sc520_freq.c @@ -53,7 +53,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) } } -static void sc520_freq_set_cpu_state(unsigned int state) +static void sc520_freq_set_cpu_state(struct cpufreq_policy *policy, + unsigned int state) { struct cpufreq_freqs freqs; @@ -61,9 +62,8 @@ static void sc520_freq_set_cpu_state(unsigned int state) freqs.old = sc520_freq_get_cpu_frequency(0); freqs.new = sc520_freq_table[state].frequency; - freqs.cpu = 0; /* AMD Elan is UP */ - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); pr_debug("attempting to set frequency to %i kHz\n", sc520_freq_table[state].frequency); @@ -75,7 +75,7 @@ static void sc520_freq_set_cpu_state(unsigned int state) local_irq_enable(); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); }; static int sc520_freq_verify(struct cpufreq_policy *policy) @@ -93,7 +93,7 @@ static int sc520_freq_target(struct cpufreq_policy *policy, target_freq, relation, &newstate)) return -EINVAL; - sc520_freq_set_cpu_state(newstate); + sc520_freq_set_cpu_state(policy, newstate); return 0; } diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 7e4d77327957..156829f4576d 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -121,7 +121,6 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, target_freq, relation, &index)) return -EINVAL; - freqs.cpu = policy->cpu; freqs.old = spear_cpufreq_get(0); newfreq = spear_cpufreq.freq_tbl[index].frequency * 1000; @@ -158,8 +157,7 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, freqs.new = newfreq / 1000; freqs.new /= mult; - for_each_cpu(freqs.cpu, policy->cpus) - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); if (mult == 2) ret = spear1340_set_cpu_rate(srcclk, newfreq); @@ -172,8 +170,7 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, freqs.new = clk_get_rate(spear_cpufreq.clk) / 1000; } - for_each_cpu(freqs.cpu, policy->cpus) - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return ret; } diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index 3a953d519f46..3dbbcc3519af 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -457,7 +457,7 @@ static int centrino_target (struct cpufreq_policy *policy, unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; struct cpufreq_freqs freqs; int retval = 0; - unsigned int j, k, first_cpu, tmp; + unsigned int j, first_cpu, tmp; cpumask_var_t covered_cpus; if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) @@ -522,13 +522,8 @@ static int centrino_target (struct cpufreq_policy *policy, pr_debug("target=%dkHz old=%d new=%d msr=%04x\n", target_freq, freqs.old, freqs.new, msr); - for_each_cpu(k, policy->cpus) { - if (!cpu_online(k)) - continue; - freqs.cpu = k; - cpufreq_notify_transition(&freqs, + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - } first_cpu = 0; /* all but 16 LSB are reserved, treat them with care */ @@ -544,12 +539,7 @@ static int centrino_target (struct cpufreq_policy *policy, cpumask_set_cpu(j, covered_cpus); } - for_each_cpu(k, policy->cpus) { - if (!cpu_online(k)) - continue; - freqs.cpu = k; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); if (unlikely(retval)) { /* @@ -565,12 +555,8 @@ static int centrino_target (struct cpufreq_policy *policy, tmp = freqs.new; freqs.new = freqs.old; freqs.old = tmp; - for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); } retval = 0; diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c index e29b59aa68a8..e2e5aa971452 100644 --- a/drivers/cpufreq/speedstep-ich.c +++ b/drivers/cpufreq/speedstep-ich.c @@ -263,7 +263,6 @@ static int speedstep_target(struct cpufreq_policy *policy, { unsigned int newstate = 0, policy_cpu; struct cpufreq_freqs freqs; - int i; if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) @@ -272,7 +271,6 @@ static int speedstep_target(struct cpufreq_policy *policy, policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); freqs.old = speedstep_get(policy_cpu); freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = policy->cpu; pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new); @@ -280,18 +278,12 @@ static int speedstep_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, true); - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c index 6a457fcaaad5..f5a6b70ee6c0 100644 --- a/drivers/cpufreq/speedstep-smi.c +++ b/drivers/cpufreq/speedstep-smi.c @@ -252,14 +252,13 @@ static int speedstep_target(struct cpufreq_policy *policy, freqs.old = speedstep_freqs[speedstep_get_state()].frequency; freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = 0; /* speedstep.c is UP only driver */ if (freqs.old == freqs.new) return 0; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); speedstep_set_state(newstate); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); return 0; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4bbc572dd521..037d36ae63e5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -278,8 +278,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state); - +void cpufreq_notify_transition(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, unsigned int state); static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) { -- cgit From bb5547acfcd842950b8a22aa83f84af93388b9f2 Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Fri, 29 Mar 2013 01:23:58 +0530 Subject: iommu/fsl: Make iova dma_addr_t in the iommu_iova_to_phys API. This is required in case of PAMU, as it can support a window size of up to 64G (even on 32bit). Signed-off-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/exynos-iommu.c | 2 +- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/iommu.c | 3 +-- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/omap-iommu.c | 2 +- drivers/iommu/shmobile-iommu.c | 2 +- drivers/iommu/tegra-gart.c | 2 +- drivers/iommu/tegra-smmu.c | 2 +- include/linux/iommu.h | 9 +++------ 10 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b287ca33833d..a7f6b04eaa5e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3410,7 +3410,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, - unsigned long iova) + dma_addr_t iova) { struct protection_domain *domain = dom->priv; unsigned long offset_mask; diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 238a3caa949a..3f32d64ab87a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1027,7 +1027,7 @@ done: } static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct exynos_iommu_domain *priv = domain->priv; unsigned long *entry; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0099667a397e..6e0b9ffc79b5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4111,7 +4111,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct dmar_domain *dmar_domain = domain->priv; struct dma_pte *pte; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b972d430d92b..f730ed9d8af9 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -706,8 +706,7 @@ void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_detach_group); -phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) +phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { if (unlikely(domain->ops->iova_to_phys == NULL)) return 0; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 6a8870a31668..8ab4f41090af 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -554,7 +554,7 @@ fail: } static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long va) + dma_addr_t va) { struct msm_priv *priv; struct msm_iommu_drvdata *iommu_drvdata; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 6ac02fa5910f..e02e5d71745b 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1219,7 +1219,7 @@ static void omap_iommu_domain_destroy(struct iommu_domain *domain) } static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long da) + dma_addr_t da) { struct omap_iommu_domain *omap_domain = domain->priv; struct omap_iommu *oiommu = omap_domain->iommu_dev; diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index b6e8b57cf0a8..d572863dfccd 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -296,7 +296,7 @@ done: } static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct shmobile_iommu_domain *sh_domain = domain->priv; uint32_t l1entry = 0, l2entry = 0; diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 86437575f94d..4aec8be38054 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -279,7 +279,7 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, } static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct gart_device *gart = domain->priv; unsigned long pte; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index b34e5fd7fd9e..bc9b59949d09 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -757,7 +757,7 @@ static size_t smmu_iommu_unmap(struct iommu_domain *domain, unsigned long iova, } static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct smmu_as *as = domain->priv; unsigned long *pte; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ba3b8a98a049..bb0a0fc26729 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -91,8 +91,7 @@ struct iommu_ops { phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, - unsigned long iova); + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*domain_has_cap)(struct iommu_domain *domain, unsigned long cap); int (*add_device)(struct device *dev); @@ -134,8 +133,7 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); -extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova); +extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern int iommu_domain_has_cap(struct iommu_domain *domain, unsigned long cap); extern void iommu_set_fault_handler(struct iommu_domain *domain, @@ -267,8 +265,7 @@ static inline void iommu_domain_window_disable(struct iommu_domain *domain, { } -static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) +static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; } -- cgit From 80f97f0f73b82444f714651ea053838d27779dca Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Fri, 29 Mar 2013 01:24:00 +0530 Subject: iommu/fsl: Add the window permission flag as a parameter to iommu_window_enable API. Each iommu window can have access permissions associated with it. Extended the window_enable API to incorporate window access permissions. In case of PAMU each window can have its specific set of permissions. Signed-off-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 5 +++-- include/linux/iommu.h | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f730ed9d8af9..1d72b4f5b006 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -853,12 +853,13 @@ EXPORT_SYMBOL_GPL(iommu_unmap); int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size) + phys_addr_t paddr, u64 size, int prot) { if (unlikely(domain->ops->domain_window_enable == NULL)) return -ENODEV; - return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size); + return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size, + prot); } EXPORT_SYMBOL_GPL(iommu_domain_window_enable); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bb0a0fc26729..272781073110 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -104,7 +104,7 @@ struct iommu_ops { /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size); + phys_addr_t paddr, u64 size, int prot); void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr); /* Set the numer of window per domain */ int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count); @@ -169,7 +169,8 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, /* Window handling function prototypes */ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t offset, u64 size); + phys_addr_t offset, u64 size, + int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework @@ -255,7 +256,7 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, static inline int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, - u64 size) + u64 size, int prot) { return -ENODEV; } -- cgit From 65b3841b9cb5fe1b239f12dbf033f9827d73d032 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 2 Apr 2013 09:35:07 +0000 Subject: of_net.h: Provide empty functions if OF_NET is not configured of_get_mac_address() and of_get_phy_mode() are only provided if OF_NET is configured. While most callers check for the define, not all do, and those who do require #ifdef around the code. For those who don't, the missing check can result in errors such as arch/powerpc/sysdev/tsi108_dev.c:107:3: error: implicit declaration of function 'of_get_mac_address' [-Werror=implicit-function-declaration] arch/powerpc/sysdev/mv64x60_dev.c:253:2: error: implicit declaration of function 'of_get_mac_address' [-Werror=implicit-function-declaration] Provide empty functions if OF_NET is not configured. This is safe because all callers do check the return values. Cc: David Daney Signed-off-by: Guenter Roeck Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/linux/of_net.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index f47464188710..61bf53b02779 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -11,6 +11,16 @@ #include extern const int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); +#else +static inline const int of_get_phy_mode(struct device_node *np) +{ + return -ENODEV; +} + +static inline const void *of_get_mac_address(struct device_node *np) +{ + return NULL; +} #endif #endif /* __LINUX_OF_NET_H */ -- cgit From 35e1d5f6344ed1bb2fab61ac7934aa0f19908b2c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 2 Apr 2013 13:24:05 +0100 Subject: regulator: ab8500-ext: Remove unused REGULATOR_AB8500_EXT guard Before the AB8500 External Regulator driver was Mainlined, it used to be conditionally compiled in using the CONFIG_REGULATOR_AB8500_EXT flag. During the review process that capability was removed, but the guard controlling prototyping slipped though the net. This patch cleans it up. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/regulator/ab8500.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index bb0140c9d4f4..44f67e8f1a6d 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -320,21 +320,6 @@ struct ab8500_regulator_platform_data { struct regulator_init_data *ext_regulator; }; -/* AB8500 external regulator functions (internal) */ -#ifdef CONFIG_REGULATOR_AB8500_EXT -int ab8500_ext_regulator_init(struct platform_device *pdev); -int ab8500_ext_regulator_exit(struct platform_device *pdev); -#else -inline int ab8500_ext_regulator_init(struct platform_device *pdev) -{ - return 0; -} -inline int ab8500_ext_regulator_exit(struct platform_device *pdev) -{ - return 0; -} -#endif - #ifdef CONFIG_REGULATOR_AB8500_DEBUG int ab8500_regulator_debug_init(struct platform_device *pdev); int ab8500_regulator_debug_exit(struct platform_device *pdev); @@ -349,4 +334,8 @@ static inline int ab8500_regulator_debug_exit(struct platform_device *pdev) } #endif +/* AB8500 external regulator functions. */ +int ab8500_ext_regulator_init(struct platform_device *pdev); +int ab8500_ext_regulator_exit(struct platform_device *pdev); + #endif -- cgit From 3566d40c1a4617461b38c82059bdc41d622faa8b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 25 Mar 2013 14:35:07 +0000 Subject: clk: fix clk_mux::flags kerneldoc The kerneldoc comment for struct clk_mux documented the non-existent num_clks instead of flags. Correct this. Signed-off-by: James Hogan Signed-off-by: Mike Turquette --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 1f0352802794..b1675074fe7c 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -284,7 +284,7 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, * @reg: register controlling multiplexer * @shift: shift to multiplexer bit field * @width: width of mutliplexer bit field - * @num_clks: number of parent clocks + * @flags: hardware-specific flags * @lock: register lock * * Clock with multiple selectable parents. Implements .get_parent, .set_parent -- cgit From 119f5e448d32c11faf22fe81f6f2d78467a47149 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Mar 2013 20:32:13 +0900 Subject: gpio: Renesas R-Car GPIO driver V3 This patch is V3 of a GPIO driver for the R-Car series of SoCs from Renesas. This driver is designed to be reusable between multiple SoCs that share the same basic building block, but so far it has only been used on R-Car H1 (r8a7779). Each driver instance handles 32 GPIOs with individually maskable IRQs. The driver operates on a single I/O memory range and the 32 GPIOs are hooked up a single interrupt. In the case of R-Car H1 either external IRQ pins or GPIOs with interrupts can be used for on-board interupts. For external IRQs 4 pins are supported, and in the case of GPIO there are 202 GPIOS as 202 interrupts hooked up via 6 driver instances and to the GIC and the Cortex-A9 Quad. At this point this driver is interfacing as a regular platform device driver. In the future DT support will be submitted as an incremental feature patch. Signed-off-by: Magnus Damm Reviewed-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/gpio/Kconfig | 6 + drivers/gpio/Makefile | 1 + drivers/gpio/gpio-rcar.c | 373 ++++++++++++++++++++++++++++++++ include/linux/platform_data/gpio-rcar.h | 25 +++ 4 files changed, 405 insertions(+) create mode 100644 drivers/gpio/gpio-rcar.c create mode 100644 include/linux/platform_data/gpio-rcar.h (limited to 'include/linux') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 93aaadf99f28..d766e3cbef18 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -204,6 +204,12 @@ config GPIO_PXA help Say yes here to support the PXA GPIO device +config GPIO_RCAR + tristate "Renesas R-Car GPIO" + depends on ARM + help + Say yes here to support GPIO on Renesas R-Car SoCs. + config GPIO_SPEAR_SPICS bool "ST SPEAr13xx SPI Chip Select as GPIO support" depends on PLAT_SPEAR diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 22e07bc9fcb5..b41c74d45287 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_GPIO_PL061) += gpio-pl061.o obj-$(CONFIG_GPIO_PXA) += gpio-pxa.o obj-$(CONFIG_GPIO_RC5T583) += gpio-rc5t583.o obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o +obj-$(CONFIG_GPIO_RCAR) += gpio-rcar.o obj-$(CONFIG_PLAT_SAMSUNG) += gpio-samsung.o obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o obj-$(CONFIG_GPIO_SCH) += gpio-sch.o diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c new file mode 100644 index 000000000000..581ba56131a7 --- /dev/null +++ b/drivers/gpio/gpio-rcar.c @@ -0,0 +1,373 @@ +/* + * Renesas R-Car GPIO Support + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct gpio_rcar_priv { + void __iomem *base; + spinlock_t lock; + struct gpio_rcar_config config; + struct platform_device *pdev; + struct gpio_chip gpio_chip; + struct irq_chip irq_chip; + struct irq_domain *irq_domain; +}; + +#define IOINTSEL 0x00 +#define INOUTSEL 0x04 +#define OUTDT 0x08 +#define INDT 0x0c +#define INTDT 0x10 +#define INTCLR 0x14 +#define INTMSK 0x18 +#define MSKCLR 0x1c +#define POSNEG 0x20 +#define EDGLEVEL 0x24 +#define FILONOFF 0x28 + +static inline u32 gpio_rcar_read(struct gpio_rcar_priv *p, int offs) +{ + return ioread32(p->base + offs); +} + +static inline void gpio_rcar_write(struct gpio_rcar_priv *p, int offs, + u32 value) +{ + iowrite32(value, p->base + offs); +} + +static void gpio_rcar_modify_bit(struct gpio_rcar_priv *p, int offs, + int bit, bool value) +{ + u32 tmp = gpio_rcar_read(p, offs); + + if (value) + tmp |= BIT(bit); + else + tmp &= ~BIT(bit); + + gpio_rcar_write(p, offs, tmp); +} + +static void gpio_rcar_irq_disable(struct irq_data *d) +{ + struct gpio_rcar_priv *p = irq_data_get_irq_chip_data(d); + + gpio_rcar_write(p, INTMSK, ~BIT(irqd_to_hwirq(d))); +} + +static void gpio_rcar_irq_enable(struct irq_data *d) +{ + struct gpio_rcar_priv *p = irq_data_get_irq_chip_data(d); + + gpio_rcar_write(p, MSKCLR, BIT(irqd_to_hwirq(d))); +} + +static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, + unsigned int hwirq, + bool active_high_rising_edge, + bool level_trigger) +{ + unsigned long flags; + + /* follow steps in the GPIO documentation for + * "Setting Edge-Sensitive Interrupt Input Mode" and + * "Setting Level-Sensitive Interrupt Input Mode" + */ + + spin_lock_irqsave(&p->lock, flags); + + /* Configure postive or negative logic in POSNEG */ + gpio_rcar_modify_bit(p, POSNEG, hwirq, !active_high_rising_edge); + + /* Configure edge or level trigger in EDGLEVEL */ + gpio_rcar_modify_bit(p, EDGLEVEL, hwirq, !level_trigger); + + /* Select "Interrupt Input Mode" in IOINTSEL */ + gpio_rcar_modify_bit(p, IOINTSEL, hwirq, true); + + /* Write INTCLR in case of edge trigger */ + if (!level_trigger) + gpio_rcar_write(p, INTCLR, BIT(hwirq)); + + spin_unlock_irqrestore(&p->lock, flags); +} + +static int gpio_rcar_irq_set_type(struct irq_data *d, unsigned int type) +{ + struct gpio_rcar_priv *p = irq_data_get_irq_chip_data(d); + unsigned int hwirq = irqd_to_hwirq(d); + + dev_dbg(&p->pdev->dev, "sense irq = %d, type = %d\n", hwirq, type); + + switch (type & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_LEVEL_HIGH: + gpio_rcar_config_interrupt_input_mode(p, hwirq, true, true); + break; + case IRQ_TYPE_LEVEL_LOW: + gpio_rcar_config_interrupt_input_mode(p, hwirq, false, true); + break; + case IRQ_TYPE_EDGE_RISING: + gpio_rcar_config_interrupt_input_mode(p, hwirq, true, false); + break; + case IRQ_TYPE_EDGE_FALLING: + gpio_rcar_config_interrupt_input_mode(p, hwirq, false, false); + break; + default: + return -EINVAL; + } + return 0; +} + +static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id) +{ + struct gpio_rcar_priv *p = dev_id; + u32 pending; + unsigned int offset, irqs_handled = 0; + + while ((pending = gpio_rcar_read(p, INTDT))) { + offset = __ffs(pending); + gpio_rcar_write(p, INTCLR, BIT(offset)); + generic_handle_irq(irq_find_mapping(p->irq_domain, offset)); + irqs_handled++; + } + + return irqs_handled ? IRQ_HANDLED : IRQ_NONE; +} + +static inline struct gpio_rcar_priv *gpio_to_priv(struct gpio_chip *chip) +{ + return container_of(chip, struct gpio_rcar_priv, gpio_chip); +} + +static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, + unsigned int gpio, + bool output) +{ + struct gpio_rcar_priv *p = gpio_to_priv(chip); + unsigned long flags; + + /* follow steps in the GPIO documentation for + * "Setting General Output Mode" and + * "Setting General Input Mode" + */ + + spin_lock_irqsave(&p->lock, flags); + + /* Configure postive logic in POSNEG */ + gpio_rcar_modify_bit(p, POSNEG, gpio, false); + + /* Select "General Input/Output Mode" in IOINTSEL */ + gpio_rcar_modify_bit(p, IOINTSEL, gpio, false); + + /* Select Input Mode or Output Mode in INOUTSEL */ + gpio_rcar_modify_bit(p, INOUTSEL, gpio, output); + + spin_unlock_irqrestore(&p->lock, flags); +} + +static int gpio_rcar_direction_input(struct gpio_chip *chip, unsigned offset) +{ + gpio_rcar_config_general_input_output_mode(chip, offset, false); + return 0; +} + +static int gpio_rcar_get(struct gpio_chip *chip, unsigned offset) +{ + return (int)(gpio_rcar_read(gpio_to_priv(chip), INDT) & BIT(offset)); +} + +static void gpio_rcar_set(struct gpio_chip *chip, unsigned offset, int value) +{ + struct gpio_rcar_priv *p = gpio_to_priv(chip); + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + gpio_rcar_modify_bit(p, OUTDT, offset, value); + spin_unlock_irqrestore(&p->lock, flags); +} + +static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset, + int value) +{ + /* write GPIO value to output before selecting output mode of pin */ + gpio_rcar_set(chip, offset, value); + gpio_rcar_config_general_input_output_mode(chip, offset, true); + return 0; +} + +static int gpio_rcar_to_irq(struct gpio_chip *chip, unsigned offset) +{ + return irq_create_mapping(gpio_to_priv(chip)->irq_domain, offset); +} + +static int gpio_rcar_irq_domain_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct gpio_rcar_priv *p = h->host_data; + + dev_dbg(&p->pdev->dev, "map hw irq = %d, virq = %d\n", (int)hw, virq); + + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); + set_irq_flags(virq, IRQF_VALID); /* kill me now */ + return 0; +} + +static struct irq_domain_ops gpio_rcar_irq_domain_ops = { + .map = gpio_rcar_irq_domain_map, +}; + +static int gpio_rcar_probe(struct platform_device *pdev) +{ + struct gpio_rcar_config *pdata = pdev->dev.platform_data; + struct gpio_rcar_priv *p; + struct resource *io, *irq; + struct gpio_chip *gpio_chip; + struct irq_chip *irq_chip; + const char *name = dev_name(&pdev->dev); + int ret; + + p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); + if (!p) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + ret = -ENOMEM; + goto err0; + } + + /* deal with driver instance configuration */ + if (pdata) + p->config = *pdata; + + p->pdev = pdev; + platform_set_drvdata(pdev, p); + spin_lock_init(&p->lock); + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + + if (!io || !irq) { + dev_err(&pdev->dev, "missing IRQ or IOMEM\n"); + ret = -EINVAL; + goto err0; + } + + p->base = devm_ioremap_nocache(&pdev->dev, io->start, + resource_size(io)); + if (!p->base) { + dev_err(&pdev->dev, "failed to remap I/O memory\n"); + ret = -ENXIO; + goto err0; + } + + gpio_chip = &p->gpio_chip; + gpio_chip->direction_input = gpio_rcar_direction_input; + gpio_chip->get = gpio_rcar_get; + gpio_chip->direction_output = gpio_rcar_direction_output; + gpio_chip->set = gpio_rcar_set; + gpio_chip->to_irq = gpio_rcar_to_irq; + gpio_chip->label = name; + gpio_chip->owner = THIS_MODULE; + gpio_chip->base = p->config.gpio_base; + gpio_chip->ngpio = p->config.number_of_pins; + + irq_chip = &p->irq_chip; + irq_chip->name = name; + irq_chip->irq_mask = gpio_rcar_irq_disable; + irq_chip->irq_unmask = gpio_rcar_irq_enable; + irq_chip->irq_enable = gpio_rcar_irq_enable; + irq_chip->irq_disable = gpio_rcar_irq_disable; + irq_chip->irq_set_type = gpio_rcar_irq_set_type; + irq_chip->flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_SET_TYPE_MASKED; + + p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, + p->config.number_of_pins, + p->config.irq_base, + &gpio_rcar_irq_domain_ops, p); + if (!p->irq_domain) { + ret = -ENXIO; + dev_err(&pdev->dev, "cannot initialize irq domain\n"); + goto err1; + } + + if (devm_request_irq(&pdev->dev, irq->start, + gpio_rcar_irq_handler, 0, name, p)) { + dev_err(&pdev->dev, "failed to request IRQ\n"); + ret = -ENOENT; + goto err1; + } + + ret = gpiochip_add(gpio_chip); + if (ret) { + dev_err(&pdev->dev, "failed to add GPIO controller\n"); + goto err1; + } + + dev_info(&pdev->dev, "driving %d GPIOs\n", p->config.number_of_pins); + + /* warn in case of mismatch if irq base is specified */ + if (p->config.irq_base) { + ret = irq_find_mapping(p->irq_domain, 0); + if (p->config.irq_base != ret) + dev_warn(&pdev->dev, "irq base mismatch (%u/%u)\n", + p->config.irq_base, ret); + } + + return 0; + +err1: + irq_domain_remove(p->irq_domain); +err0: + return ret; +} + +static int gpio_rcar_remove(struct platform_device *pdev) +{ + struct gpio_rcar_priv *p = platform_get_drvdata(pdev); + int ret; + + ret = gpiochip_remove(&p->gpio_chip); + if (ret) + return ret; + + irq_domain_remove(p->irq_domain); + return 0; +} + +static struct platform_driver gpio_rcar_device_driver = { + .probe = gpio_rcar_probe, + .remove = gpio_rcar_remove, + .driver = { + .name = "gpio_rcar", + } +}; + +module_platform_driver(gpio_rcar_device_driver); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("Renesas R-Car GPIO Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/gpio-rcar.h b/include/linux/platform_data/gpio-rcar.h new file mode 100644 index 000000000000..bebfcd86fb80 --- /dev/null +++ b/include/linux/platform_data/gpio-rcar.h @@ -0,0 +1,25 @@ +/* + * Renesas R-Car GPIO Support + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __GPIO_RCAR_H__ +#define __GPIO_RCAR_H__ + +struct gpio_rcar_config { + unsigned int gpio_base; + unsigned int irq_base; + unsigned int number_of_pins; +}; + +#endif /* __GPIO_RCAR_H__ */ -- cgit From dc3465a943ed2dd5de37d3d60df5c4e11c49efcb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 03:27:00 +0100 Subject: gpio-rcar: Add pinctrl support Register the GPIO pin range, and request and free GPIO pins using the pinctrl API. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/gpio/gpio-rcar.c | 23 +++++++++++++++++++++++ include/linux/platform_data/gpio-rcar.h | 1 + 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 581ba56131a7..b4ca450947b8 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -190,6 +191,21 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, spin_unlock_irqrestore(&p->lock, flags); } +static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset) +{ + return pinctrl_request_gpio(chip->base + offset); +} + +static void gpio_rcar_free(struct gpio_chip *chip, unsigned offset) +{ + pinctrl_free_gpio(chip->base + offset); + + /* Set the GPIO as an input to ensure that the next GPIO request won't + * drive the GPIO pin as an output. + */ + gpio_rcar_config_general_input_output_mode(chip, offset, false); +} + static int gpio_rcar_direction_input(struct gpio_chip *chip, unsigned offset) { gpio_rcar_config_general_input_output_mode(chip, offset, false); @@ -285,6 +301,8 @@ static int gpio_rcar_probe(struct platform_device *pdev) } gpio_chip = &p->gpio_chip; + gpio_chip->request = gpio_rcar_request; + gpio_chip->free = gpio_rcar_free; gpio_chip->direction_input = gpio_rcar_direction_input; gpio_chip->get = gpio_rcar_get; gpio_chip->direction_output = gpio_rcar_direction_output; @@ -337,6 +355,11 @@ static int gpio_rcar_probe(struct platform_device *pdev) p->config.irq_base, ret); } + ret = gpiochip_add_pin_range(gpio_chip, p->config.pctl_name, 0, + gpio_chip->base, gpio_chip->ngpio); + if (ret < 0) + dev_warn(&pdev->dev, "failed to add pin range\n"); + return 0; err1: diff --git a/include/linux/platform_data/gpio-rcar.h b/include/linux/platform_data/gpio-rcar.h index bebfcd86fb80..b253f77a7ddf 100644 --- a/include/linux/platform_data/gpio-rcar.h +++ b/include/linux/platform_data/gpio-rcar.h @@ -20,6 +20,7 @@ struct gpio_rcar_config { unsigned int gpio_base; unsigned int irq_base; unsigned int number_of_pins; + const char *pctl_name; }; #endif /* __GPIO_RCAR_H__ */ -- cgit From d7ca4c755a82eda8f0fc4f72c52130056b28c7d2 Mon Sep 17 00:00:00 2001 From: "Manjunathappa, Prakash" Date: Thu, 28 Mar 2013 18:41:59 +0530 Subject: ARM: davinci: mmc: derive version information from device name Remove specifying mmc controller IP version information via platform data, instead specify device name so that driver derives it from platform_device_id table. Also change the clock node name to match the changed dev_id. Tested on da850-evm to make sure driver loads without clk_get failures. Signed-off-by: Manjunathappa, Prakash Reviewed-by: Sekhar Nori Acked-by: Arnd Bergmann Acked-by: Chris Ball Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/board-da830-evm.c | 1 - arch/arm/mach-davinci/board-da850-evm.c | 2 -- arch/arm/mach-davinci/board-dm355-evm.c | 1 - arch/arm/mach-davinci/board-dm365-evm.c | 1 - arch/arm/mach-davinci/board-dm644x-evm.c | 1 - arch/arm/mach-davinci/board-neuros-osd2.c | 1 - arch/arm/mach-davinci/board-omapl138-hawk.c | 1 - arch/arm/mach-davinci/board-tnetv107x-evm.c | 1 - arch/arm/mach-davinci/da830.c | 2 +- arch/arm/mach-davinci/da850.c | 4 ++-- arch/arm/mach-davinci/devices-da8xx.c | 4 ++-- arch/arm/mach-davinci/devices-tnetv107x.c | 4 ++-- arch/arm/mach-davinci/devices.c | 6 ++++-- arch/arm/mach-davinci/dm355.c | 4 ++-- arch/arm/mach-davinci/dm365.c | 4 ++-- arch/arm/mach-davinci/dm644x.c | 2 +- arch/arm/mach-davinci/tnetv107x.c | 4 ++-- drivers/mmc/host/davinci_mmc.c | 18 +++++++++++++++++- include/linux/platform_data/mmc-davinci.h | 3 --- 19 files changed, 35 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 6da25eebf911..12e6f756361d 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -246,7 +246,6 @@ static struct davinci_mmc_config da830_evm_mmc_config = { .wires = 8, .max_freq = 50000000, .caps = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED, - .version = MMC_CTLR_VERSION_2, }; static inline void da830_evm_init_mmc(void) diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index c2dfe06563df..dcc8710936a5 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -802,7 +802,6 @@ static struct davinci_mmc_config da850_mmc_config = { .wires = 4, .max_freq = 50000000, .caps = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED, - .version = MMC_CTLR_VERSION_2, }; static const short da850_evm_mmcsd0_pins[] __initconst = { @@ -1372,7 +1371,6 @@ static struct davinci_mmc_config da850_wl12xx_mmc_config = { .max_freq = 25000000, .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_NONREMOVABLE | MMC_CAP_POWER_OFF_CARD, - .version = MMC_CTLR_VERSION_2, }; static const short da850_wl12xx_pins[] __initconst = { diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index 147b8e1a4407..bfdf8b979a64 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -280,7 +280,6 @@ static struct davinci_mmc_config dm355evm_mmc_config = { .wires = 4, .max_freq = 50000000, .caps = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED, - .version = MMC_CTLR_VERSION_1, }; /* Don't connect anything to J10 unless you're only using USB host diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index c2d4958a0cb6..4cfdd9109e19 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -253,7 +253,6 @@ static struct davinci_mmc_config dm365evm_mmc_config = { .wires = 4, .max_freq = 50000000, .caps = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED, - .version = MMC_CTLR_VERSION_2, }; static void dm365evm_emac_configure(void) diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index 71735e7797cc..c0206d5f2bf6 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -570,7 +570,6 @@ static struct davinci_mmc_config dm6446evm_mmc_config = { .get_cd = dm6444evm_mmc_get_cd, .get_ro = dm6444evm_mmc_get_ro, .wires = 4, - .version = MMC_CTLR_VERSION_1 }; static struct i2c_board_info __initdata i2c_info[] = { diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c index 1c98107527fa..b70e83c03bed 100644 --- a/arch/arm/mach-davinci/board-neuros-osd2.c +++ b/arch/arm/mach-davinci/board-neuros-osd2.c @@ -164,7 +164,6 @@ static void __init davinci_ntosd2_map_io(void) static struct davinci_mmc_config davinci_ntosd2_mmc_config = { .wires = 4, - .version = MMC_CTLR_VERSION_1 }; diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index 5a2bd44da54d..328dbd8a37f5 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -136,7 +136,6 @@ static struct davinci_mmc_config da850_mmc_config = { .wires = 4, .max_freq = 50000000, .caps = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED, - .version = MMC_CTLR_VERSION_2, }; static __init void omapl138_hawk_mmc_init(void) diff --git a/arch/arm/mach-davinci/board-tnetv107x-evm.c b/arch/arm/mach-davinci/board-tnetv107x-evm.c index 4f416023d4e2..ba798370fc96 100644 --- a/arch/arm/mach-davinci/board-tnetv107x-evm.c +++ b/arch/arm/mach-davinci/board-tnetv107x-evm.c @@ -85,7 +85,6 @@ static struct davinci_mmc_config mmc_config = { .wires = 4, .max_freq = 50000000, .caps = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED, - .version = MMC_CTLR_VERSION_1, }; static const short sdio1_pins[] __initconst = { diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c index 678a54a64dae..abbaf0270be6 100644 --- a/arch/arm/mach-davinci/da830.c +++ b/arch/arm/mach-davinci/da830.c @@ -394,7 +394,7 @@ static struct clk_lookup da830_clks[] = { CLK(NULL, "tpcc", &tpcc_clk), CLK(NULL, "tptc0", &tptc0_clk), CLK(NULL, "tptc1", &tptc1_clk), - CLK("davinci_mmc.0", NULL, &mmcsd_clk), + CLK("da830-mmc.0", NULL, &mmcsd_clk), CLK(NULL, "uart0", &uart0_clk), CLK(NULL, "uart1", &uart1_clk), CLK(NULL, "uart2", &uart2_clk), diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index 2a2f60c54ec6..4d6933848abf 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -463,8 +463,8 @@ static struct clk_lookup da850_clks[] = { CLK("davinci_emac.1", NULL, &emac_clk), CLK("davinci-mcasp.0", NULL, &mcasp_clk), CLK("da8xx_lcdc.0", "fck", &lcdc_clk), - CLK("davinci_mmc.0", NULL, &mmcsd0_clk), - CLK("davinci_mmc.1", NULL, &mmcsd1_clk), + CLK("da830-mmc.0", NULL, &mmcsd0_clk), + CLK("da830-mmc.1", NULL, &mmcsd1_clk), CLK(NULL, "aemif", &aemif_clk), CLK(NULL, "usb11", &usb11_clk), CLK(NULL, "usb20", &usb20_clk), diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index fc50243b1481..cb97e07db284 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -664,7 +664,7 @@ static struct resource da8xx_mmcsd0_resources[] = { }; static struct platform_device da8xx_mmcsd0_device = { - .name = "davinci_mmc", + .name = "da830-mmc", .id = 0, .num_resources = ARRAY_SIZE(da8xx_mmcsd0_resources), .resource = da8xx_mmcsd0_resources, @@ -701,7 +701,7 @@ static struct resource da850_mmcsd1_resources[] = { }; static struct platform_device da850_mmcsd1_device = { - .name = "davinci_mmc", + .name = "da830-mmc", .id = 1, .num_resources = ARRAY_SIZE(da850_mmcsd1_resources), .resource = da850_mmcsd1_resources, diff --git a/arch/arm/mach-davinci/devices-tnetv107x.c b/arch/arm/mach-davinci/devices-tnetv107x.c index 773ab07a71a0..cfb194df18ed 100644 --- a/arch/arm/mach-davinci/devices-tnetv107x.c +++ b/arch/arm/mach-davinci/devices-tnetv107x.c @@ -218,7 +218,7 @@ static u64 mmc1_dma_mask = DMA_BIT_MASK(32); static struct platform_device mmc_devices[2] = { { - .name = "davinci_mmc", + .name = "dm6441-mmc", .id = 0, .dev = { .dma_mask = &mmc0_dma_mask, @@ -228,7 +228,7 @@ static struct platform_device mmc_devices[2] = { .resource = mmc0_resources }, { - .name = "davinci_mmc", + .name = "dm6441-mmc", .id = 1, .dev = { .dma_mask = &mmc1_dma_mask, diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 4c48a36ee567..f6927df2dda8 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -150,7 +150,7 @@ static struct resource mmcsd0_resources[] = { }; static struct platform_device davinci_mmcsd0_device = { - .name = "davinci_mmc", + .name = "dm6441-mmc", .id = 0, .dev = { .dma_mask = &mmcsd0_dma_mask, @@ -187,7 +187,7 @@ static struct resource mmcsd1_resources[] = { }; static struct platform_device davinci_mmcsd1_device = { - .name = "davinci_mmc", + .name = "dm6441-mmc", .id = 1, .dev = { .dma_mask = &mmcsd1_dma_mask, @@ -235,6 +235,7 @@ void __init davinci_setup_mmc(int module, struct davinci_mmc_config *config) mmcsd1_resources[0].end = DM365_MMCSD1_BASE + SZ_4K - 1; mmcsd1_resources[2].start = IRQ_DM365_SDIOINT1; + davinci_mmcsd1_device.name = "da830-mmc"; } else break; @@ -256,6 +257,7 @@ void __init davinci_setup_mmc(int module, struct davinci_mmc_config *config) mmcsd0_resources[0].end = DM365_MMCSD0_BASE + SZ_4K - 1; mmcsd0_resources[2].start = IRQ_DM365_SDIOINT0; + davinci_mmcsd0_device.name = "da830-mmc"; } else if (cpu_is_davinci_dm644x()) { /* REVISIT: should this be in board-init code? */ /* Power-on 3.3V IO cells */ diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index b49c3b77d55e..87e6104f45e6 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -361,8 +361,8 @@ static struct clk_lookup dm355_clks[] = { CLK("i2c_davinci.1", NULL, &i2c_clk), CLK("davinci-mcbsp.0", NULL, &asp0_clk), CLK("davinci-mcbsp.1", NULL, &asp1_clk), - CLK("davinci_mmc.0", NULL, &mmcsd0_clk), - CLK("davinci_mmc.1", NULL, &mmcsd1_clk), + CLK("dm6441-mmc.0", NULL, &mmcsd0_clk), + CLK("dm6441-mmc.1", NULL, &mmcsd1_clk), CLK("spi_davinci.0", NULL, &spi0_clk), CLK("spi_davinci.1", NULL, &spi1_clk), CLK("spi_davinci.2", NULL, &spi2_clk), diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 6c3980540be0..2791df9187b3 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -454,8 +454,8 @@ static struct clk_lookup dm365_clks[] = { CLK(NULL, "uart0", &uart0_clk), CLK(NULL, "uart1", &uart1_clk), CLK("i2c_davinci.1", NULL, &i2c_clk), - CLK("davinci_mmc.0", NULL, &mmcsd0_clk), - CLK("davinci_mmc.1", NULL, &mmcsd1_clk), + CLK("da830-mmc.0", NULL, &mmcsd0_clk), + CLK("da830-mmc.1", NULL, &mmcsd1_clk), CLK("spi_davinci.0", NULL, &spi0_clk), CLK("spi_davinci.1", NULL, &spi1_clk), CLK("spi_davinci.2", NULL, &spi2_clk), diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index db1dd92e00af..ab6bf54c65c7 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -310,7 +310,7 @@ static struct clk_lookup dm644x_clks[] = { CLK("i2c_davinci.1", NULL, &i2c_clk), CLK("palm_bk3710", NULL, &ide_clk), CLK("davinci-mcbsp", NULL, &asp_clk), - CLK("davinci_mmc.0", NULL, &mmcsd_clk), + CLK("dm6441-mmc.0", NULL, &mmcsd_clk), CLK(NULL, "spi", &spi_clk), CLK(NULL, "gpio", &gpio_clk), CLK(NULL, "usb", &usb_clk), diff --git a/arch/arm/mach-davinci/tnetv107x.c b/arch/arm/mach-davinci/tnetv107x.c index dc1a209b9b66..3b2a70d43efa 100644 --- a/arch/arm/mach-davinci/tnetv107x.c +++ b/arch/arm/mach-davinci/tnetv107x.c @@ -272,7 +272,7 @@ static struct clk_lookup clks[] = { CLK("tnetv107x-keypad.0", NULL, &clk_keypad), CLK(NULL, "clk_gpio", &clk_gpio), CLK(NULL, "clk_mdio", &clk_mdio), - CLK("davinci_mmc.0", NULL, &clk_sdio0), + CLK("dm6441-mmc.0", NULL, &clk_sdio0), CLK(NULL, "uart0", &clk_uart0), CLK(NULL, "uart1", &clk_uart1), CLK(NULL, "timer0", &clk_timer0), @@ -292,7 +292,7 @@ static struct clk_lookup clks[] = { CLK(NULL, "clk_system", &clk_system), CLK(NULL, "clk_imcop", &clk_imcop), CLK(NULL, "clk_spare", &clk_spare), - CLK("davinci_mmc.1", NULL, &clk_sdio1), + CLK("dm6441-mmc.1", NULL, &clk_sdio1), CLK(NULL, "clk_ddr2_vrst", &clk_ddr2_vrst), CLK(NULL, "clk_ddr2_vctl_rst", &clk_ddr2_vctl_rst), CLK(NULL, NULL, NULL), diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index 20636772c09b..b5f1c019ecad 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1157,6 +1157,18 @@ static void __init init_mmcsd_host(struct mmc_davinci_host *host) mmc_davinci_reset_ctrl(host, 0); } +static struct platform_device_id davinci_mmc_devtype[] = { + { + .name = "dm6441-mmc", + .driver_data = MMC_CTLR_VERSION_1, + }, { + .name = "da830-mmc", + .driver_data = MMC_CTLR_VERSION_2, + }, + {}, +}; +MODULE_DEVICE_TABLE(platform, davinci_mmc_devtype); + static int __init davinci_mmcsd_probe(struct platform_device *pdev) { struct davinci_mmc_config *pdata = pdev->dev.platform_data; @@ -1165,6 +1177,7 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev) struct resource *r, *mem = NULL; int ret = 0, irq = 0; size_t mem_size; + const struct platform_device_id *id_entry; /* REVISIT: when we're fully converted, fail if pdata is NULL */ @@ -1237,7 +1250,9 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev) if (pdata && (pdata->wires == 8)) mmc->caps |= (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA); - host->version = pdata->version; + id_entry = platform_get_device_id(pdev); + if (id_entry) + host->version = id_entry->driver_data; mmc->ops = &mmc_davinci_ops; mmc->f_min = 312500; @@ -1408,6 +1423,7 @@ static struct platform_driver davinci_mmcsd_driver = { .pm = davinci_mmcsd_pm_ops, }, .remove = __exit_p(davinci_mmcsd_remove), + .id_table = davinci_mmc_devtype, }; static int __init davinci_mmcsd_init(void) diff --git a/include/linux/platform_data/mmc-davinci.h b/include/linux/platform_data/mmc-davinci.h index 5ba6b22ce338..9cea4ee377b5 100644 --- a/include/linux/platform_data/mmc-davinci.h +++ b/include/linux/platform_data/mmc-davinci.h @@ -23,9 +23,6 @@ struct davinci_mmc_config { /* any additional host capabilities: OR'd in to mmc->f_caps */ u32 caps; - /* Version of the MMC/SD controller */ - u8 version; - /* Number of sg segments */ u8 nr_sg; }; -- cgit From 88af8bbe4ef781031ad3370847553f3b42ba0076 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 23 Dec 2012 21:10:24 +0100 Subject: usb: gadget: the start of the configfs interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit |# modprobe dummy_hcd num=2 |# modprobe libcomposite |# lsmod |Module Size Used by |libcomposite 31648 0 |dummy_hcd 19871 0 |# mkdir /sys/kernel/config/usb_gadget/oha |# cd /sys/kernel/config/usb_gadget/oha |# mkdir configs/def.1 |# mkdir configs/def.2 |# mkdir functions/acm.ttyS1 |# mkdir strings/0x1 |mkdir: cannot create directory `strings/0x1': Invalid argument |# mkdir strings/0x409 |# mkdir strings/1033 |mkdir: cannot create directory `strings/1033': File exists |# mkdir strings/1032 |# mkdir configs/def.1/strings/0x409 |# mkdir configs/def.2/strings/0x409 |#find . -ls | 975 0 drwxr-xr-x 5 root root 0 Dec 23 17:40 . | 978 0 drwxr-xr-x 4 root root 0 Dec 23 17:43 ./strings | 4100 0 drwxr-xr-x 2 root root 0 Dec 23 17:43 ./strings/1032 | 995 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./strings/1032/serialnumber | 996 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./strings/1032/product | 997 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./strings/1032/manufacturer | 2002 0 drwxr-xr-x 2 root root 0 Dec 23 17:41 ./strings/0x409 | 998 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./strings/0x409/serialnumber | 999 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./strings/0x409/product | 1000 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./strings/0x409/manufacturer | 977 0 drwxr-xr-x 4 root root 0 Dec 23 17:41 ./configs | 4081 0 drwxr-xr-x 3 root root 0 Dec 23 17:41 ./configs/def.2 | 4082 0 drwxr-xr-x 3 root root 0 Dec 23 17:42 ./configs/def.2/strings | 2016 0 drwxr-xr-x 2 root root 0 Dec 23 17:42 ./configs/def.2/strings/0x409 | 1001 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./configs/def.2/strings/0x409/configuration | 1002 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./configs/def.2/bmAttributes | 1003 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./configs/def.2/MaxPower | 979 0 drwxr-xr-x 3 root root 0 Dec 23 17:42 ./configs/def.1 | 980 0 drwxr-xr-x 3 root root 0 Dec 23 17:42 ./configs/def.1/strings | 5122 0 drwxr-xr-x 2 root root 0 Dec 23 17:42 ./configs/def.1/strings/0x409 | 1004 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./configs/def.1/strings/0x409/configuration | 1005 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./configs/def.1/bmAttributes | 1006 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./configs/def.1/MaxPower | 976 0 drwxr-xr-x 3 root root 0 Dec 23 17:41 ./functions | 981 0 drwxr-xr-x 2 root root 0 Dec 23 17:41 ./functions/acm.ttyS1 | 1007 0 -r--r--r-- 1 root root 4096 Dec 23 17:43 ./functions/acm.ttyS1/port_num | 1008 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./UDC | 1009 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./bcdUSB | 1010 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./bcdDevice | 1011 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./idProduct | 1012 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./idVendor | 1013 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./bMaxPacketSize0 | 1014 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./bDeviceProtocol | 1015 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./bDeviceSubClass | 1016 0 -rw-r--r-- 1 root root 4096 Dec 23 17:43 ./bDeviceClass |# cat functions/acm.ttyS1/port_num |0 |# ls -lah /dev/ttyGS* |crw-rw---T 1 root dialout 252, 0 Dec 23 17:41 /dev/ttyGS0 | |# echo 0x1234 > idProduct |# echo 0xabcd > idVendor |# echo 1122 > strings/0x409/serialnumber |# echo "The manufacturer" > strings/0x409/manufacturer |# echo 1 > strings/1032/manufacturer |# echo 1sa > strings/1032/product |# echo tada > strings/1032/serialnumber |echo "Primary configuration" > configs/def.1/strings/0x409/configuration |# echo "Secondary configuration" > configs/def.2/strings/0x409/configuration |# ln -s functions/acm.ttyS1 configs/def.1/ |# ln -s functions/acm.ttyS1 configs/def.2/ |find configs/def.1/ -ls | 979 0 drwxr-xr-x 3 root root 0 Dec 23 17:49 configs/def.1/ | 6264 0 lrwxrwxrwx 1 root root 0 Dec 23 17:48 configs/def.1/acm.ttyS1 -> ../../../../usb_gadget/oha/functions/acm.ttyS1 | 980 0 drwxr-xr-x 3 root root 0 Dec 23 17:42 configs/def.1/strings | 5122 0 drwxr-xr-x 2 root root 0 Dec 23 17:49 configs/def.1/strings/0x409 | 6284 0 -rw-r--r-- 1 root root 4096 Dec 23 17:47 configs/def.1/strings/0x409/configuration | 6285 0 -rw-r--r-- 1 root root 4096 Dec 23 17:49 configs/def.1/bmAttributes | 6286 0 -rw-r--r-- 1 root root 4096 Dec 23 17:49 configs/def.1/MaxPower | |echo 120 > configs/def.1/MaxPower | |# ls -lh /sys/class/udc/ |total 0 |lrwxrwxrwx 1 root root 0 Dec 23 17:50 dummy_udc.0 -> ../../devices/platform/dummy_udc.0/udc/dummy_udc.0 |lrwxrwxrwx 1 root root 0 Dec 23 17:50 dummy_udc.1 -> ../../devices/platform/dummy_udc.1/udc/dummy_udc.1 |# echo dummy_udc.0 > UDC |# lsusb |Bus 001 Device 002: ID abcd:1234 Unknown | |lsusb -d abcd:1234 -v |Device Descriptor: … | idVendor 0xabcd Unknown | idProduct 0x1234 | bcdDevice 3.06 | iManufacturer 1 The manufacturer | iProduct 2 | iSerial 3 1122 | bNumConfigurations 2 … |echo "" > UDC v5…v6 - wired up strings with usb_gstrings_attach() - add UDC attribe. Write "udc-name" will bind the gadget. Write an empty string (it should contain \n since 0 bytes write get optimzed away) will unbind the UDC from the gadget. The name of available UDCs can be obtained from /sys/class/udc/ v4…v5 - string rework. This will add a strings folder incl. language code like strings/409/manufacturer as suggested by Alan. - rebased ontop reworked functions.c which has usb_function_instance which is used prior after "mkdir acm.instance" and can be directly used for configuration via configfs. v3…v4 - moved functions from the root folde down to the gadget as suggested by Michał - configs have now their own configs folder as suggested by Michał. The folder is still name.bConfigurationValue where name becomes the sConfiguration. Is this usefull should we just stilc configs/bConfigurationValue/ ? - added configfs support to the ACM function. The port_num attribute is exported by f_acm. An argument has been added to the USB alloc function to distinguish between "old" (use facm_configure() to configure and configfs interface (expose a config_node). The port_num is currently a dumb counter. It will require some function re-work to make it work. scheduled for v5: - sym linking function into config. v2…v3 - replaced one ifndef by ifdef as suggested by Micahał - strstr()/strchr() function_make as suggested by Micahł - replace [iSerialNumber|iProduct|iManufacturer] with [sSerialNumber|sProduct|sManufacturer] as suggested by Alan - added creation of config descriptors v1…v2 - moved gadgets from configfs' root directory into /udcs/ within our "usb_gadget" folder. Requested by Andrzej & Michał - use a dot as a delimiter between function's name and its instance's name as suggested by Michał - renamed all config_item_type, configfs_group_operations, make_group, drop_item as suggested by suggested by Andrzej to remain consisten within this file and within other configfs users - Since configfs.c and functions.c are now part of the udc-core module, the module itself is now called udc. Also added a tiny ifdef around init code becuase udc-core is subsys init and this is too early for configfs in the built-in case. In the module case, we can only have one init function. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 1 + drivers/usb/gadget/Makefile | 2 +- drivers/usb/gadget/composite.c | 1 + drivers/usb/gadget/configfs.c | 1003 +++++++++++++++++++++++++++++++++++ drivers/usb/gadget/f_acm.c | 55 ++ include/linux/usb/composite.h | 3 + include/linux/usb/gadget_configfs.h | 110 ++++ 7 files changed, 1174 insertions(+), 1 deletion(-) create mode 100644 drivers/usb/gadget/configfs.c create mode 100644 include/linux/usb/gadget_configfs.h (limited to 'include/linux') diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index e1d3e0803cd5..74a29de8f254 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -493,6 +493,7 @@ endmenu # composite based drivers config USB_LIBCOMPOSITE tristate + select CONFIGFS_FS depends on USB_GADGET config USB_F_ACM diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 82fb22511356..96e72433cd31 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -6,7 +6,7 @@ ccflags-$(CONFIG_USB_GADGET_DEBUG) := -DDEBUG obj-$(CONFIG_USB_GADGET) += udc-core.o obj-$(CONFIG_USB_LIBCOMPOSITE) += libcomposite.o libcomposite-y := usbstring.o config.o epautoconf.o -libcomposite-y += composite.o functions.o +libcomposite-y += composite.o functions.o configfs.o obj-$(CONFIG_USB_DUMMY_HCD) += dummy_hcd.o obj-$(CONFIG_USB_NET2272) += net2272.o obj-$(CONFIG_USB_NET2280) += net2280.o diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index c0d62b278610..55f4df60f327 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1637,6 +1637,7 @@ void composite_dev_cleanup(struct usb_composite_dev *cdev) kfree(cdev->req->buf); usb_ep_free_request(cdev->gadget->ep0, cdev->req); } + cdev->next_string_id = 0; device_remove_file(&cdev->gadget->dev, &dev_attr_suspended); } diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c new file mode 100644 index 000000000000..a34633a898a1 --- /dev/null +++ b/drivers/usb/gadget/configfs.c @@ -0,0 +1,1003 @@ +#include +#include +#include +#include +#include +#include + +int check_user_usb_string(const char *name, + struct usb_gadget_strings *stringtab_dev) +{ + unsigned primary_lang; + unsigned sub_lang; + u16 num; + int ret; + + ret = kstrtou16(name, 0, &num); + if (ret) + return ret; + + primary_lang = num & 0x3ff; + sub_lang = num >> 10; + + /* simple sanity check for valid langid */ + switch (primary_lang) { + case 0: + case 0x62 ... 0xfe: + case 0x100 ... 0x3ff: + return -EINVAL; + } + if (!sub_lang) + return -EINVAL; + + stringtab_dev->language = num; + return 0; +} + +#define MAX_NAME_LEN 40 +#define MAX_USB_STRING_LANGS 2 + +struct gadget_info { + struct config_group group; + struct config_group functions_group; + struct config_group configs_group; + struct config_group strings_group; + struct config_group *default_groups[4]; + + struct mutex lock; + struct usb_gadget_strings *gstrings[MAX_USB_STRING_LANGS + 1]; + struct list_head string_list; + struct list_head available_func; + + const char *udc_name; +#ifdef CONFIG_USB_OTG + struct usb_otg_descriptor otg; +#endif + struct usb_composite_driver composite; + struct usb_composite_dev cdev; +}; + +struct config_usb_cfg { + struct config_group group; + struct config_group strings_group; + struct config_group *default_groups[2]; + struct list_head string_list; + struct usb_configuration c; + struct list_head func_list; + struct usb_gadget_strings *gstrings[MAX_USB_STRING_LANGS + 1]; +}; + +struct gadget_strings { + struct usb_gadget_strings stringtab_dev; + struct usb_string strings[USB_GADGET_FIRST_AVAIL_IDX]; + char *manufacturer; + char *product; + char *serialnumber; + + struct config_group group; + struct list_head list; +}; + +struct gadget_config_name { + struct usb_gadget_strings stringtab_dev; + struct usb_string strings; + char *configuration; + + struct config_group group; + struct list_head list; +}; + +static int usb_string_copy(const char *s, char **s_copy) +{ + int ret; + char *str; + char *copy = *s_copy; + ret = strlen(s); + if (ret > 126) + return -EOVERFLOW; + + str = kstrdup(s, GFP_KERNEL); + if (!str) + return -ENOMEM; + if (str[ret - 1] == '\n') + str[ret - 1] = '\0'; + kfree(copy); + *s_copy = str; + return 0; +} + +CONFIGFS_ATTR_STRUCT(gadget_info); +CONFIGFS_ATTR_STRUCT(config_usb_cfg); + +#define GI_DEVICE_DESC_ITEM_ATTR(name) \ + static struct gadget_info_attribute gadget_cdev_desc_##name = \ + __CONFIGFS_ATTR(name, S_IRUGO | S_IWUSR, \ + gadget_dev_desc_##name##_show, \ + gadget_dev_desc_##name##_store) + +#define GI_DEVICE_DESC_SIMPLE_R_u8(__name) \ + static ssize_t gadget_dev_desc_##__name##_show(struct gadget_info *gi, \ + char *page) \ +{ \ + return sprintf(page, "0x%02x\n", gi->cdev.desc.__name); \ +} + +#define GI_DEVICE_DESC_SIMPLE_R_u16(__name) \ + static ssize_t gadget_dev_desc_##__name##_show(struct gadget_info *gi, \ + char *page) \ +{ \ + return sprintf(page, "0x%04x\n", le16_to_cpup(&gi->cdev.desc.__name)); \ +} + + +#define GI_DEVICE_DESC_SIMPLE_W_u8(_name) \ + static ssize_t gadget_dev_desc_##_name##_store(struct gadget_info *gi, \ + const char *page, size_t len) \ +{ \ + u8 val; \ + int ret; \ + ret = kstrtou8(page, 0, &val); \ + if (ret) \ + return ret; \ + gi->cdev.desc._name = val; \ + return len; \ +} + +#define GI_DEVICE_DESC_SIMPLE_W_u16(_name) \ + static ssize_t gadget_dev_desc_##_name##_store(struct gadget_info *gi, \ + const char *page, size_t len) \ +{ \ + u16 val; \ + int ret; \ + ret = kstrtou16(page, 0, &val); \ + if (ret) \ + return ret; \ + gi->cdev.desc._name = cpu_to_le16p(&val); \ + return len; \ +} + +#define GI_DEVICE_DESC_SIMPLE_RW(_name, _type) \ + GI_DEVICE_DESC_SIMPLE_R_##_type(_name) \ + GI_DEVICE_DESC_SIMPLE_W_##_type(_name) + +GI_DEVICE_DESC_SIMPLE_R_u16(bcdUSB); +GI_DEVICE_DESC_SIMPLE_RW(bDeviceClass, u8); +GI_DEVICE_DESC_SIMPLE_RW(bDeviceSubClass, u8); +GI_DEVICE_DESC_SIMPLE_RW(bDeviceProtocol, u8); +GI_DEVICE_DESC_SIMPLE_RW(bMaxPacketSize0, u8); +GI_DEVICE_DESC_SIMPLE_RW(idVendor, u16); +GI_DEVICE_DESC_SIMPLE_RW(idProduct, u16); +GI_DEVICE_DESC_SIMPLE_R_u16(bcdDevice); + +static ssize_t is_valid_bcd(u16 bcd_val) +{ + if ((bcd_val & 0xf) > 9) + return -EINVAL; + if (((bcd_val >> 4) & 0xf) > 9) + return -EINVAL; + if (((bcd_val >> 8) & 0xf) > 9) + return -EINVAL; + if (((bcd_val >> 12) & 0xf) > 9) + return -EINVAL; + return 0; +} + +static ssize_t gadget_dev_desc_bcdDevice_store(struct gadget_info *gi, + const char *page, size_t len) +{ + u16 bcdDevice; + int ret; + + ret = kstrtou16(page, 0, &bcdDevice); + if (ret) + return ret; + ret = is_valid_bcd(bcdDevice); + if (ret) + return ret; + + gi->cdev.desc.bcdDevice = cpu_to_le16(bcdDevice); + return len; +} + +static ssize_t gadget_dev_desc_bcdUSB_store(struct gadget_info *gi, + const char *page, size_t len) +{ + u16 bcdUSB; + int ret; + + ret = kstrtou16(page, 0, &bcdUSB); + if (ret) + return ret; + ret = is_valid_bcd(bcdUSB); + if (ret) + return ret; + + gi->cdev.desc.bcdUSB = cpu_to_le16(bcdUSB); + return len; +} + +static ssize_t gadget_dev_desc_UDC_show(struct gadget_info *gi, char *page) +{ + return sprintf(page, "%s\n", gi->udc_name ?: ""); +} + +static int unregister_gadget(struct gadget_info *gi) +{ + int ret; + + if (!gi->udc_name) + return -ENODEV; + + ret = usb_gadget_unregister_driver(&gi->composite.gadget_driver); + if (ret) + return ret; + kfree(gi->udc_name); + gi->udc_name = NULL; + return 0; +} + +static ssize_t gadget_dev_desc_UDC_store(struct gadget_info *gi, + const char *page, size_t len) +{ + char *name; + int ret; + + name = kstrdup(page, GFP_KERNEL); + if (!name) + return -ENOMEM; + if (name[len - 1] == '\n') + name[len - 1] = '\0'; + + mutex_lock(&gi->lock); + + if (!strlen(name)) { + ret = unregister_gadget(gi); + if (ret) + goto err; + } else { + if (gi->udc_name) { + ret = -EBUSY; + goto err; + } + ret = udc_attach_driver(name, &gi->composite.gadget_driver); + if (ret) + goto err; + gi->udc_name = name; + } + mutex_unlock(&gi->lock); + return len; +err: + kfree(name); + mutex_unlock(&gi->lock); + return ret; +} + +GI_DEVICE_DESC_ITEM_ATTR(bDeviceClass); +GI_DEVICE_DESC_ITEM_ATTR(bDeviceSubClass); +GI_DEVICE_DESC_ITEM_ATTR(bDeviceProtocol); +GI_DEVICE_DESC_ITEM_ATTR(bMaxPacketSize0); +GI_DEVICE_DESC_ITEM_ATTR(idVendor); +GI_DEVICE_DESC_ITEM_ATTR(idProduct); +GI_DEVICE_DESC_ITEM_ATTR(bcdDevice); +GI_DEVICE_DESC_ITEM_ATTR(bcdUSB); +GI_DEVICE_DESC_ITEM_ATTR(UDC); + +static struct configfs_attribute *gadget_root_attrs[] = { + &gadget_cdev_desc_bDeviceClass.attr, + &gadget_cdev_desc_bDeviceSubClass.attr, + &gadget_cdev_desc_bDeviceProtocol.attr, + &gadget_cdev_desc_bMaxPacketSize0.attr, + &gadget_cdev_desc_idVendor.attr, + &gadget_cdev_desc_idProduct.attr, + &gadget_cdev_desc_bcdDevice.attr, + &gadget_cdev_desc_bcdUSB.attr, + &gadget_cdev_desc_UDC.attr, + NULL, +}; + +static inline struct gadget_info *to_gadget_info(struct config_item *item) +{ + return container_of(to_config_group(item), struct gadget_info, group); +} + +static inline struct gadget_strings *to_gadget_strings(struct config_item *item) +{ + return container_of(to_config_group(item), struct gadget_strings, + group); +} + +static inline struct gadget_config_name *to_gadget_config_name( + struct config_item *item) +{ + return container_of(to_config_group(item), struct gadget_config_name, + group); +} + +static inline struct config_usb_cfg *to_config_usb_cfg(struct config_item *item) +{ + return container_of(to_config_group(item), struct config_usb_cfg, + group); +} + +static inline struct usb_function_instance *to_usb_function_instance( + struct config_item *item) +{ + return container_of(to_config_group(item), + struct usb_function_instance, group); +} + +static void gadget_info_attr_release(struct config_item *item) +{ + struct gadget_info *gi = to_gadget_info(item); + + WARN_ON(!list_empty(&gi->cdev.configs)); + WARN_ON(!list_empty(&gi->string_list)); + WARN_ON(!list_empty(&gi->available_func)); + kfree(gi->composite.gadget_driver.function); + kfree(gi); +} + +CONFIGFS_ATTR_OPS(gadget_info); + +static struct configfs_item_operations gadget_root_item_ops = { + .release = gadget_info_attr_release, + .show_attribute = gadget_info_attr_show, + .store_attribute = gadget_info_attr_store, +}; + +static void gadget_config_attr_release(struct config_item *item) +{ + struct config_usb_cfg *cfg = to_config_usb_cfg(item); + + WARN_ON(!list_empty(&cfg->c.functions)); + list_del(&cfg->c.list); + kfree(cfg->c.label); + kfree(cfg); +} + +static int config_usb_cfg_link( + struct config_item *usb_cfg_ci, + struct config_item *usb_func_ci) +{ + struct config_usb_cfg *cfg = to_config_usb_cfg(usb_cfg_ci); + struct usb_composite_dev *cdev = cfg->c.cdev; + struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev); + + struct config_group *group = to_config_group(usb_func_ci); + struct usb_function_instance *fi = container_of(group, + struct usb_function_instance, group); + struct usb_function_instance *a_fi; + struct usb_function *f; + int ret; + + mutex_lock(&gi->lock); + /* + * Make sure this function is from within our _this_ gadget and not + * from another gadget or a random directory. + * Also a function instance can only be linked once. + */ + list_for_each_entry(a_fi, &gi->available_func, cfs_list) { + if (a_fi == fi) + break; + } + if (a_fi != fi) { + ret = -EINVAL; + goto out; + } + + list_for_each_entry(f, &cfg->func_list, list) { + if (f->fi == fi) { + ret = -EEXIST; + goto out; + } + } + + f = usb_get_function(fi); + if (IS_ERR(f)) { + ret = PTR_ERR(f); + goto out; + } + + /* stash the function until we bind it to the gadget */ + list_add_tail(&f->list, &cfg->func_list); + ret = 0; +out: + mutex_unlock(&gi->lock); + return ret; +} + +static int config_usb_cfg_unlink( + struct config_item *usb_cfg_ci, + struct config_item *usb_func_ci) +{ + struct config_usb_cfg *cfg = to_config_usb_cfg(usb_cfg_ci); + struct usb_composite_dev *cdev = cfg->c.cdev; + struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev); + + struct config_group *group = to_config_group(usb_func_ci); + struct usb_function_instance *fi = container_of(group, + struct usb_function_instance, group); + struct usb_function *f; + + /* + * ideally I would like to forbid to unlink functions while a gadget is + * bound to an UDC. Since this isn't possible at the moment, we simply + * force an unbind, the function is available here and then we can + * remove the function. + */ + mutex_lock(&gi->lock); + if (gi->udc_name) + unregister_gadget(gi); + WARN_ON(gi->udc_name); + + list_for_each_entry(f, &cfg->func_list, list) { + if (f->fi == fi) { + list_del(&f->list); + usb_put_function(f); + mutex_unlock(&gi->lock); + return 0; + } + } + mutex_unlock(&gi->lock); + __WARN_printf("Unable to locate function to unbind\n"); + return 0; +} + +CONFIGFS_ATTR_OPS(config_usb_cfg); + +static struct configfs_item_operations gadget_config_item_ops = { + .release = gadget_config_attr_release, + .show_attribute = config_usb_cfg_attr_show, + .store_attribute = config_usb_cfg_attr_store, + .allow_link = config_usb_cfg_link, + .drop_link = config_usb_cfg_unlink, +}; + + +static ssize_t gadget_config_desc_MaxPower_show(struct config_usb_cfg *cfg, + char *page) +{ + return sprintf(page, "%u\n", cfg->c.MaxPower); +} + +static ssize_t gadget_config_desc_MaxPower_store(struct config_usb_cfg *cfg, + const char *page, size_t len) +{ + u16 val; + int ret; + ret = kstrtou16(page, 0, &val); + if (ret) + return ret; + if (DIV_ROUND_UP(val, 8) > 0xff) + return -ERANGE; + cfg->c.MaxPower = val; + return len; +} + +static ssize_t gadget_config_desc_bmAttributes_show(struct config_usb_cfg *cfg, + char *page) +{ + return sprintf(page, "0x%02x\n", cfg->c.bmAttributes); +} + +static ssize_t gadget_config_desc_bmAttributes_store(struct config_usb_cfg *cfg, + const char *page, size_t len) +{ + u8 val; + int ret; + ret = kstrtou8(page, 0, &val); + if (ret) + return ret; + if (!(val & USB_CONFIG_ATT_ONE)) + return -EINVAL; + if (val & ~(USB_CONFIG_ATT_ONE | USB_CONFIG_ATT_SELFPOWER | + USB_CONFIG_ATT_WAKEUP)) + return -EINVAL; + cfg->c.bmAttributes = val; + return len; +} + +#define CFG_CONFIG_DESC_ITEM_ATTR(name) \ + static struct config_usb_cfg_attribute gadget_usb_cfg_##name = \ + __CONFIGFS_ATTR(name, S_IRUGO | S_IWUSR, \ + gadget_config_desc_##name##_show, \ + gadget_config_desc_##name##_store) + +CFG_CONFIG_DESC_ITEM_ATTR(MaxPower); +CFG_CONFIG_DESC_ITEM_ATTR(bmAttributes); + +static struct configfs_attribute *gadget_config_attrs[] = { + &gadget_usb_cfg_MaxPower.attr, + &gadget_usb_cfg_bmAttributes.attr, + NULL, +}; + +static struct config_item_type gadget_config_type = { + .ct_item_ops = &gadget_config_item_ops, + .ct_attrs = gadget_config_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_item_type gadget_root_type = { + .ct_item_ops = &gadget_root_item_ops, + .ct_attrs = gadget_root_attrs, + .ct_owner = THIS_MODULE, +}; + +static void composite_init_dev(struct usb_composite_dev *cdev) +{ + spin_lock_init(&cdev->lock); + INIT_LIST_HEAD(&cdev->configs); + INIT_LIST_HEAD(&cdev->gstrings); +} + +static struct config_group *function_make( + struct config_group *group, + const char *name) +{ + struct gadget_info *gi; + struct usb_function_instance *fi; + char buf[MAX_NAME_LEN]; + char *func_name; + char *instance_name; + int ret; + + ret = snprintf(buf, MAX_NAME_LEN, "%s", name); + if (ret >= MAX_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + func_name = buf; + instance_name = strchr(func_name, '.'); + if (!instance_name) { + pr_err("Unable to locate . in FUNC.INSTANCE\n"); + return ERR_PTR(-EINVAL); + } + *instance_name = '\0'; + instance_name++; + + fi = usb_get_function_instance(func_name); + if (IS_ERR(fi)) + return ERR_PTR(PTR_ERR(fi)); + + ret = config_item_set_name(&fi->group.cg_item, name); + if (ret) { + usb_put_function_instance(fi); + return ERR_PTR(ret); + } + + gi = container_of(group, struct gadget_info, functions_group); + + mutex_lock(&gi->lock); + list_add_tail(&fi->cfs_list, &gi->available_func); + mutex_unlock(&gi->lock); + return &fi->group; +} + +static void function_drop( + struct config_group *group, + struct config_item *item) +{ + struct usb_function_instance *fi = to_usb_function_instance(item); + struct gadget_info *gi; + + gi = container_of(group, struct gadget_info, functions_group); + + mutex_lock(&gi->lock); + list_del(&fi->cfs_list); + mutex_unlock(&gi->lock); + config_item_put(item); +} + +static struct configfs_group_operations functions_ops = { + .make_group = &function_make, + .drop_item = &function_drop, +}; + +static struct config_item_type functions_type = { + .ct_group_ops = &functions_ops, + .ct_owner = THIS_MODULE, +}; + +CONFIGFS_ATTR_STRUCT(gadget_config_name); +GS_STRINGS_RW(gadget_config_name, configuration); + +static struct configfs_attribute *gadget_config_name_langid_attrs[] = { + &gadget_config_name_configuration.attr, + NULL, +}; + +static void gadget_config_name_attr_release(struct config_item *item) +{ + struct gadget_config_name *cn = to_gadget_config_name(item); + + kfree(cn->configuration); + + list_del(&cn->list); + kfree(cn); +} + +USB_CONFIG_STRING_RW_OPS(gadget_config_name); +USB_CONFIG_STRINGS_LANG(gadget_config_name, config_usb_cfg); + +static struct config_group *config_desc_make( + struct config_group *group, + const char *name) +{ + struct gadget_info *gi; + struct config_usb_cfg *cfg; + char buf[MAX_NAME_LEN]; + char *num_str; + u8 num; + int ret; + + gi = container_of(group, struct gadget_info, configs_group); + ret = snprintf(buf, MAX_NAME_LEN, "%s", name); + if (ret >= MAX_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + num_str = strchr(buf, '.'); + if (!num_str) { + pr_err("Unable to locate . in name.bConfigurationValue\n"); + return ERR_PTR(-EINVAL); + } + + *num_str = '\0'; + num_str++; + + if (!strlen(buf)) + return ERR_PTR(-EINVAL); + + ret = kstrtou8(num_str, 0, &num); + if (ret) + return ERR_PTR(ret); + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return ERR_PTR(-ENOMEM); + cfg->c.label = kstrdup(buf, GFP_KERNEL); + if (!cfg->c.label) { + ret = -ENOMEM; + goto err; + } + cfg->c.bConfigurationValue = num; + cfg->c.MaxPower = CONFIG_USB_GADGET_VBUS_DRAW; + cfg->c.bmAttributes = USB_CONFIG_ATT_ONE; + INIT_LIST_HEAD(&cfg->string_list); + INIT_LIST_HEAD(&cfg->func_list); + + cfg->group.default_groups = cfg->default_groups; + cfg->default_groups[0] = &cfg->strings_group; + + config_group_init_type_name(&cfg->group, name, + &gadget_config_type); + config_group_init_type_name(&cfg->strings_group, "strings", + &gadget_config_name_strings_type); + + ret = usb_add_config_only(&gi->cdev, &cfg->c); + if (ret) + goto err; + + return &cfg->group; +err: + kfree(cfg->c.label); + kfree(cfg); + return ERR_PTR(ret); +} + +static void config_desc_drop( + struct config_group *group, + struct config_item *item) +{ + config_item_put(item); +} + +static struct configfs_group_operations config_desc_ops = { + .make_group = &config_desc_make, + .drop_item = &config_desc_drop, +}; + +static struct config_item_type config_desc_type = { + .ct_group_ops = &config_desc_ops, + .ct_owner = THIS_MODULE, +}; + +CONFIGFS_ATTR_STRUCT(gadget_strings); +GS_STRINGS_RW(gadget_strings, manufacturer); +GS_STRINGS_RW(gadget_strings, product); +GS_STRINGS_RW(gadget_strings, serialnumber); + +static struct configfs_attribute *gadget_strings_langid_attrs[] = { + &gadget_strings_manufacturer.attr, + &gadget_strings_product.attr, + &gadget_strings_serialnumber.attr, + NULL, +}; + +static void gadget_strings_attr_release(struct config_item *item) +{ + struct gadget_strings *gs = to_gadget_strings(item); + + kfree(gs->manufacturer); + kfree(gs->product); + kfree(gs->serialnumber); + + list_del(&gs->list); + kfree(gs); +} + +USB_CONFIG_STRING_RW_OPS(gadget_strings); +USB_CONFIG_STRINGS_LANG(gadget_strings, gadget_info); + +static int configfs_do_nothing(struct usb_composite_dev *cdev) +{ + __WARN(); + return -EINVAL; +} + +int composite_dev_prepare(struct usb_composite_driver *composite, + struct usb_composite_dev *dev); + +static void purge_configs_funcs(struct gadget_info *gi) +{ + struct usb_configuration *c; + + list_for_each_entry(c, &gi->cdev.configs, list) { + struct usb_function *f, *tmp; + struct config_usb_cfg *cfg; + + cfg = container_of(c, struct config_usb_cfg, c); + + list_for_each_entry_safe(f, tmp, &c->functions, list) { + + list_move_tail(&f->list, &cfg->func_list); + if (f->unbind) { + dev_err(&gi->cdev.gadget->dev, "unbind function" + " '%s'/%p\n", f->name, f); + f->unbind(c, f); + } + } + c->next_interface_id = 0; + c->superspeed = 0; + c->highspeed = 0; + c->fullspeed = 0; + } +} + +static int configfs_composite_bind(struct usb_gadget *gadget, + struct usb_gadget_driver *gdriver) +{ + struct usb_composite_driver *composite = to_cdriver(gdriver); + struct gadget_info *gi = container_of(composite, + struct gadget_info, composite); + struct usb_composite_dev *cdev = &gi->cdev; + struct usb_configuration *c; + struct usb_string *s; + unsigned i; + int ret; + + /* the gi->lock is hold by the caller */ + cdev->gadget = gadget; + set_gadget_data(gadget, cdev); + ret = composite_dev_prepare(composite, cdev); + if (ret) + return ret; + /* and now the gadget bind */ + ret = -EINVAL; + + if (list_empty(&gi->cdev.configs)) { + pr_err("Need atleast one configuration in %s.\n", + gi->composite.name); + goto err_comp_cleanup; + } + + + list_for_each_entry(c, &gi->cdev.configs, list) { + struct config_usb_cfg *cfg; + + cfg = container_of(c, struct config_usb_cfg, c); + if (list_empty(&cfg->func_list)) { + pr_err("Config %s/%d of %s needs atleast one function.\n", + c->label, c->bConfigurationValue, + gi->composite.name); + goto err_comp_cleanup; + } + } + + /* init all strings */ + if (!list_empty(&gi->string_list)) { + struct gadget_strings *gs; + + i = 0; + list_for_each_entry(gs, &gi->string_list, list) { + + gi->gstrings[i] = &gs->stringtab_dev; + gs->stringtab_dev.strings = gs->strings; + gs->strings[USB_GADGET_MANUFACTURER_IDX].s = + gs->manufacturer; + gs->strings[USB_GADGET_PRODUCT_IDX].s = gs->product; + gs->strings[USB_GADGET_SERIAL_IDX].s = gs->serialnumber; + i++; + } + gi->gstrings[i] = NULL; + s = usb_gstrings_attach(&gi->cdev, gi->gstrings, + USB_GADGET_FIRST_AVAIL_IDX); + if (IS_ERR(s)) + goto err_comp_cleanup; + + gi->cdev.desc.iManufacturer = s[USB_GADGET_MANUFACTURER_IDX].id; + gi->cdev.desc.iProduct = s[USB_GADGET_PRODUCT_IDX].id; + gi->cdev.desc.iSerialNumber = s[USB_GADGET_SERIAL_IDX].id; + } + + /* Go through all configs, attach all functions */ + list_for_each_entry(c, &gi->cdev.configs, list) { + struct config_usb_cfg *cfg; + struct usb_function *f; + struct usb_function *tmp; + struct gadget_config_name *cn; + + cfg = container_of(c, struct config_usb_cfg, c); + if (!list_empty(&cfg->string_list)) { + i = 0; + list_for_each_entry(cn, &cfg->string_list, list) { + cfg->gstrings[i] = &cn->stringtab_dev; + cn->stringtab_dev.strings = &cn->strings; + cn->strings.s = cn->configuration; + i++; + } + cfg->gstrings[i] = NULL; + s = usb_gstrings_attach(&gi->cdev, cfg->gstrings, 1); + if (IS_ERR(s)) + goto err_comp_cleanup; + c->iConfiguration = s[0].id; + } + + list_for_each_entry_safe(f, tmp, &cfg->func_list, list) { + list_del(&f->list); + ret = usb_add_function(c, f); + if (ret) + goto err_purge_funcs; + } + usb_ep_autoconfig_reset(cdev->gadget); + } + usb_ep_autoconfig_reset(cdev->gadget); + return 0; + +err_purge_funcs: + purge_configs_funcs(gi); +err_comp_cleanup: + composite_dev_cleanup(cdev); + return ret; +} + +static void configfs_composite_unbind(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + + /* the gi->lock is hold by the caller */ + + cdev = get_gadget_data(gadget); + gi = container_of(cdev, struct gadget_info, cdev); + + purge_configs_funcs(gi); + composite_dev_cleanup(cdev); + usb_ep_autoconfig_reset(cdev->gadget); + cdev->gadget = NULL; + set_gadget_data(gadget, NULL); +} + +static const struct usb_gadget_driver configfs_driver_template = { + .bind = configfs_composite_bind, + .unbind = configfs_composite_unbind, + + .setup = composite_setup, + .disconnect = composite_disconnect, + + .max_speed = USB_SPEED_SUPER, + .driver = { + .owner = THIS_MODULE, + .name = "configfs-gadget", + }, +}; + +static struct config_group *gadgets_make( + struct config_group *group, + const char *name) +{ + struct gadget_info *gi; + + gi = kzalloc(sizeof(*gi), GFP_KERNEL); + if (!gi) + return ERR_PTR(-ENOMEM); + + gi->group.default_groups = gi->default_groups; + gi->group.default_groups[0] = &gi->functions_group; + gi->group.default_groups[1] = &gi->configs_group; + gi->group.default_groups[2] = &gi->strings_group; + + config_group_init_type_name(&gi->functions_group, "functions", + &functions_type); + config_group_init_type_name(&gi->configs_group, "configs", + &config_desc_type); + config_group_init_type_name(&gi->strings_group, "strings", + &gadget_strings_strings_type); + + gi->composite.bind = configfs_do_nothing; + gi->composite.unbind = configfs_do_nothing; + gi->composite.suspend = NULL; + gi->composite.resume = NULL; + gi->composite.max_speed = USB_SPEED_SUPER; + + mutex_init(&gi->lock); + INIT_LIST_HEAD(&gi->string_list); + INIT_LIST_HEAD(&gi->available_func); + + composite_init_dev(&gi->cdev); + gi->cdev.desc.bLength = USB_DT_DEVICE_SIZE; + gi->cdev.desc.bDescriptorType = USB_DT_DEVICE; + gi->cdev.desc.bcdDevice = cpu_to_le16(get_default_bcdDevice()); + + gi->composite.gadget_driver = configfs_driver_template; + + gi->composite.gadget_driver.function = kstrdup(name, GFP_KERNEL); + gi->composite.name = gi->composite.gadget_driver.function; + + if (!gi->composite.gadget_driver.function) + goto err; + +#ifdef CONFIG_USB_OTG + gi->otg.bLength = sizeof(struct usb_otg_descriptor); + gi->otg.bDescriptorType = USB_DT_OTG; + gi->otg.bmAttributes = USB_OTG_SRP | USB_OTG_HNP; +#endif + + config_group_init_type_name(&gi->group, name, + &gadget_root_type); + return &gi->group; +err: + kfree(gi); + return ERR_PTR(-ENOMEM); +} + +static void gadgets_drop(struct config_group *group, struct config_item *item) +{ + config_item_put(item); +} + +static struct configfs_group_operations gadgets_ops = { + .make_group = &gadgets_make, + .drop_item = &gadgets_drop, +}; + +static struct config_item_type gadgets_type = { + .ct_group_ops = &gadgets_ops, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem gadget_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "usb_gadget", + .ci_type = &gadgets_type, + }, + }, + .su_mutex = __MUTEX_INITIALIZER(gadget_subsys.su_mutex), +}; + +static int __init gadget_cfs_init(void) +{ + int ret; + + config_group_init(&gadget_subsys.su_group); + + ret = configfs_register_subsystem(&gadget_subsys); + return ret; +} +module_init(gadget_cfs_init); + +static void __exit gadget_cfs_exit(void) +{ + configfs_unregister_subsystem(&gadget_subsys); +} +module_exit(gadget_cfs_exit); diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c index ba7daaaad148..4b7e33e5d9c6 100644 --- a/drivers/usb/gadget/f_acm.c +++ b/drivers/usb/gadget/f_acm.c @@ -763,6 +763,59 @@ static struct usb_function *acm_alloc_func(struct usb_function_instance *fi) return &acm->port.func; } +static inline struct f_serial_opts *to_f_serial_opts(struct config_item *item) +{ + return container_of(to_config_group(item), struct f_serial_opts, + func_inst.group); +} + +CONFIGFS_ATTR_STRUCT(f_serial_opts); +static ssize_t f_acm_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + struct f_serial_opts *opts = to_f_serial_opts(item); + struct f_serial_opts_attribute *f_serial_opts_attr = + container_of(attr, struct f_serial_opts_attribute, attr); + ssize_t ret = 0; + + if (f_serial_opts_attr->show) + ret = f_serial_opts_attr->show(opts, page); + return ret; +} + +static void acm_attr_release(struct config_item *item) +{ + struct f_serial_opts *opts = to_f_serial_opts(item); + + usb_put_function_instance(&opts->func_inst); +} + +static struct configfs_item_operations acm_item_ops = { + .release = acm_attr_release, + .show_attribute = f_acm_attr_show, +}; + +static ssize_t f_acm_port_num_show(struct f_serial_opts *opts, char *page) +{ + return sprintf(page, "%u\n", opts->port_num); +} + +static struct f_serial_opts_attribute f_acm_port_num = + __CONFIGFS_ATTR_RO(port_num, f_acm_port_num_show); + + +static struct configfs_attribute *acm_attrs[] = { + &f_acm_port_num.attr, + NULL, +}; + +static struct config_item_type acm_func_type = { + .ct_item_ops = &acm_item_ops, + .ct_attrs = acm_attrs, + .ct_owner = THIS_MODULE, +}; + static void acm_free_instance(struct usb_function_instance *fi) { struct f_serial_opts *opts; @@ -786,6 +839,8 @@ static struct usb_function_instance *acm_alloc_instance(void) kfree(opts); return ERR_PTR(ret); } + config_group_init_type_name(&opts->func_inst.group, "", + &acm_func_type); return &opts->func_inst; } DECLARE_USB_FUNCTION_INIT(acm, acm_alloc_instance, acm_alloc_func); diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 8860594d6364..5e61589fc166 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -39,6 +39,7 @@ #include #include #include +#include /* * USB function drivers should return USB_GADGET_DELAYED_STATUS if they @@ -464,6 +465,8 @@ struct usb_function_driver { }; struct usb_function_instance { + struct config_group group; + struct list_head cfs_list; struct usb_function_driver *fd; void (*free_func_inst)(struct usb_function_instance *inst); }; diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h new file mode 100644 index 000000000000..d74c0ae989d5 --- /dev/null +++ b/include/linux/usb/gadget_configfs.h @@ -0,0 +1,110 @@ +#ifndef __GADGET_CONFIGFS__ +#define __GADGET_CONFIGFS__ + +#include + +int check_user_usb_string(const char *name, + struct usb_gadget_strings *stringtab_dev); + +#define GS_STRINGS_W(__struct, __name) \ + static ssize_t __struct##_##__name##_store(struct __struct *gs, \ + const char *page, size_t len) \ +{ \ + int ret; \ + \ + ret = usb_string_copy(page, &gs->__name); \ + if (ret) \ + return ret; \ + return len; \ +} + +#define GS_STRINGS_R(__struct, __name) \ + static ssize_t __struct##_##__name##_show(struct __struct *gs, \ + char *page) \ +{ \ + return sprintf(page, "%s\n", gs->__name ?: ""); \ +} + +#define GS_STRING_ITEM_ATTR(struct_name, name) \ + static struct struct_name##_attribute struct_name##_##name = \ + __CONFIGFS_ATTR(name, S_IRUGO | S_IWUSR, \ + struct_name##_##name##_show, \ + struct_name##_##name##_store) + +#define GS_STRINGS_RW(struct_name, _name) \ + GS_STRINGS_R(struct_name, _name) \ + GS_STRINGS_W(struct_name, _name) \ + GS_STRING_ITEM_ATTR(struct_name, _name) + +#define USB_CONFIG_STRING_RW_OPS(struct_in) \ + CONFIGFS_ATTR_OPS(struct_in); \ + \ +static struct configfs_item_operations struct_in##_langid_item_ops = { \ + .release = struct_in##_attr_release, \ + .show_attribute = struct_in##_attr_show, \ + .store_attribute = struct_in##_attr_store, \ +}; \ + \ +static struct config_item_type struct_in##_langid_type = { \ + .ct_item_ops = &struct_in##_langid_item_ops, \ + .ct_attrs = struct_in##_langid_attrs, \ + .ct_owner = THIS_MODULE, \ +} + +#define USB_CONFIG_STRINGS_LANG(struct_in, struct_member) \ + static struct config_group *struct_in##_strings_make( \ + struct config_group *group, \ + const char *name) \ + { \ + struct struct_member *gi; \ + struct struct_in *gs; \ + struct struct_in *new; \ + int langs = 0; \ + int ret; \ + \ + new = kzalloc(sizeof(*new), GFP_KERNEL); \ + if (!new) \ + return ERR_PTR(-ENOMEM); \ + \ + ret = check_user_usb_string(name, &new->stringtab_dev); \ + if (ret) \ + goto err; \ + config_group_init_type_name(&new->group, name, \ + &struct_in##_langid_type); \ + \ + gi = container_of(group, struct struct_member, strings_group); \ + ret = -EEXIST; \ + list_for_each_entry(gs, &gi->string_list, list) { \ + if (gs->stringtab_dev.language == new->stringtab_dev.language) \ + goto err; \ + langs++; \ + } \ + ret = -EOVERFLOW; \ + if (langs >= MAX_USB_STRING_LANGS) \ + goto err; \ + \ + list_add_tail(&new->list, &gi->string_list); \ + return &new->group; \ +err: \ + kfree(new); \ + return ERR_PTR(ret); \ +} \ + \ +static void struct_in##_strings_drop( \ + struct config_group *group, \ + struct config_item *item) \ +{ \ + config_item_put(item); \ +} \ + \ +static struct configfs_group_operations struct_in##_strings_ops = { \ + .make_group = &struct_in##_strings_make, \ + .drop_item = &struct_in##_strings_drop, \ +}; \ + \ +static struct config_item_type struct_in##_strings_type = { \ + .ct_group_ops = &struct_in##_strings_ops, \ + .ct_owner = THIS_MODULE, \ +} + +#endif -- cgit From 3451d0243c3cdfd729b36f9684a14659d4895ca3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Rename CONFIG_NO_HZ to CONFIG_NO_HZ_COMMON We are planning to convert the dynticks Kconfig options layout into a choice menu. The user must be able to easily pick any of the following implementations: constant periodic tick, idle dynticks, full dynticks. As this implies a mutual exclusion, the two dynticks implementions need to converge on the selection of a common Kconfig option in order to ease the sharing of a common infrastructure. It would thus seem pretty natural to reuse CONFIG_NO_HZ to that end. It already implements all the idle dynticks code and the full dynticks depends on all that code for now. So ideally the choice menu would propose CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED then both would select CONFIG_NO_HZ. On the other hand we want to stay backward compatible: if CONFIG_NO_HZ is set in an older config file, we want to enable CONFIG_NO_HZ_IDLE by default. But we can't afford both at the same time or we run into a circular dependency: 1) CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED both select CONFIG_NO_HZ 2) If CONFIG_NO_HZ is set, we default to CONFIG_NO_HZ_IDLE We might be able to support that from Kconfig/Kbuild but it may not be wise to introduce such a confusing behaviour. So to solve this, create a new CONFIG_NO_HZ_COMMON option which gathers the common code between idle and full dynticks (that common code for now is simply the idle dynticks code) and select it from their referring Kconfig. Then we'll later create CONFIG_NO_HZ_IDLE and map CONFIG_NO_HZ to it for backward compatibility. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/RCU/stallwarn.txt | 2 +- Documentation/cpu-freq/governors.txt | 4 ++-- arch/um/include/shared/common-offsets.h | 4 ++-- arch/um/os-Linux/time.c | 2 +- include/linux/sched.h | 8 ++++---- include/linux/tick.h | 8 ++++---- init/Kconfig | 2 +- kernel/hrtimer.c | 4 ++-- kernel/sched/core.c | 18 +++++++++--------- kernel/sched/fair.c | 10 +++++----- kernel/sched/sched.h | 4 ++-- kernel/softirq.c | 2 +- kernel/time/Kconfig | 13 +++++++++---- kernel/time/tick-sched.c | 12 ++++++------ kernel/timer.c | 4 ++-- 15 files changed, 51 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1927151b386b..b336755b71ed 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -176,7 +176,7 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that o A hardware or software issue shuts off the scheduler-clock interrupt on a CPU that is not in dyntick-idle mode. This problem really has happened, and seems to be most likely to - result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. + result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. o A bug in the RCU implementation. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index c7a2eb8450c2..e3e5d9ae50cd 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -131,8 +131,8 @@ sampling_rate_min: The sampling rate is limited by the HW transition latency: transition_latency * 100 Or by kernel restrictions: -If CONFIG_NO_HZ is set, the limit is 10ms fixed. -If CONFIG_NO_HZ is not set or nohz=off boot parameter is used, the +If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed. +If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the limits depend on the CONFIG_HZ option: HZ=1000: min=20000us (20ms) HZ=250: min=80000us (80ms) diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 2df313b6a586..c92306809029 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -30,8 +30,8 @@ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); #ifdef CONFIG_PRINTK DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); #endif -#ifdef CONFIG_NO_HZ -DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ); +#ifdef CONFIG_NO_HZ_COMMON +DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON); #endif #ifdef CONFIG_UML_X86 DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index fac388cb464f..e9824d5dd7d5 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -79,7 +79,7 @@ long long os_nsecs(void) return timeval_to_ns(&tv); } -#ifdef UML_CONFIG_NO_HZ +#ifdef UML_CONFIG_NO_HZ_COMMON static int after_sleep_interval(struct timespec *ts) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index 10626e2ee688..1ff9e0a5de27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -230,7 +230,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); @@ -1758,13 +1758,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON void calc_load_enter_idle(void); void calc_load_exit_idle(void); #else static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) @@ -1850,7 +1850,7 @@ extern void idle_task_exit(void); static inline void idle_task_exit(void) {} #endif -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) extern void wake_up_nohz_cpu(int cpu); #else static inline void wake_up_nohz_cpu(int cpu) { } diff --git a/include/linux/tick.h b/include/linux/tick.h index 44bfa8aa439f..5e403339ee14 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -82,7 +82,7 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_setup_sched_timer(void); # endif -# if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); # else static inline void tick_cancel_sched_timer(int cpu) { } @@ -123,7 +123,7 @@ static inline void tick_check_idle(int cpu) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_NO_HZ +# ifdef CONFIG_NO_HZ_COMMON DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); static inline int tick_nohz_tick_stopped(void) @@ -138,7 +138,7 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else /* !CONFIG_NO_HZ */ +# else /* !CONFIG_NO_HZ_COMMON */ static inline int tick_nohz_tick_stopped(void) { return 0; @@ -155,7 +155,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !NO_HZ */ +# endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_EXTENDED extern int tick_nohz_extended_cpu(int cpu); diff --git a/init/Kconfig b/init/Kconfig index 8a1dac2f80a9..edc8132584f1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -580,7 +580,7 @@ config RCU_FANOUT_EXACT config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ && SMP + depends on NO_HZ_COMMON && SMP default n help This option causes RCU to attempt to accelerate grace periods in diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3feb..ec60482d8b03 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -160,7 +160,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, */ static int hrtimer_get_target(int this_cpu, int pinned) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif @@ -1106,7 +1106,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /** * hrtimer_get_next_event - get the time until next expiry event * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e91ee589f793..9bb397da63d6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -549,7 +549,7 @@ void resched_cpu(int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * In the semi idle case, use the nearest busy cpu for migrating timers * from an idle cpu. This is good for power-savings. @@ -641,14 +641,14 @@ static inline bool got_nohz_idle_kick(void) return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } -#else /* CONFIG_NO_HZ */ +#else /* CONFIG_NO_HZ_COMMON */ static inline bool got_nohz_idle_kick(void) { return false; } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ void sched_avg_update(struct rq *rq) { @@ -2139,7 +2139,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) return load >> FSHIFT; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Handle NO_HZ for the global load-average. * @@ -2365,12 +2365,12 @@ static void calc_global_nohz(void) smp_wmb(); calc_load_idx++; } -#else /* !CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ static inline long calc_load_fold_idle(void) { return 0; } static inline void calc_global_nohz(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2530,7 +2530,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading @@ -2590,7 +2590,7 @@ void update_cpu_load_nohz(void) } raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from scheduler_tick() @@ -7023,7 +7023,7 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 539760ef00c4..5c97fca091a7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5331,7 +5331,7 @@ out_unlock: return 0; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details * - When one of the busy CPUs notice that there may be an idle rebalancing @@ -5541,9 +5541,9 @@ out: rq->next_balance = next_balance; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* - * In CONFIG_NO_HZ case, the idle balance kickee will do the + * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. */ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) @@ -5686,7 +5686,7 @@ void trigger_load_balance(struct rq *rq, int cpu) if (time_after_eq(jiffies, rq->next_balance) && likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); #endif @@ -6156,7 +6156,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); cpu_notifier(sched_ilb_notifier, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3bd15a43eebc..889904dd6d77 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -404,7 +404,7 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif @@ -1333,7 +1333,7 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); extern void account_cfs_bandwidth_used(int enabled, int was_enabled); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, diff --git a/kernel/softirq.c b/kernel/softirq.c index b4d252fd195b..de15813f2a66 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -348,7 +348,7 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* Make sure that timer wheel updates are propagated */ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_irq_exit(); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 726c33e00da2..c88fc43494c9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -64,16 +64,21 @@ config GENERIC_CMOS_UPDATE if GENERIC_CLOCKEVENTS menu "Timers subsystem" -# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independ of this. config TICK_ONESHOT bool +config NO_HZ_COMMON + bool + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + select TICK_ONESHOT + config NO_HZ bool "Tickless System (Dynamic Ticks)" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS - select TICK_ONESHOT + select NO_HZ_COMMON help This option enables a tickless system: timer interrupts will only trigger on an as-needed basis both when the system is @@ -81,14 +86,14 @@ config NO_HZ config NO_HZ_EXTENDED bool "Full dynticks system" - # NO_HZ dependency + # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # RCU_USER_QS depends on HAVE_CONTEXT_TRACKING && SMP # RCU_NOCB_CPU dependency depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN - select NO_HZ + select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 57bb3fe5aaa3..ccfc2086cd4b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -104,7 +104,7 @@ static void tick_sched_do_timer(ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went @@ -124,7 +124,7 @@ static void tick_sched_do_timer(ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -235,7 +235,7 @@ core_initcall(init_tick_nohz_extended); /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ @@ -907,7 +907,7 @@ static inline void tick_check_nohz(int cpu) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() @@ -992,14 +992,14 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); diff --git a/kernel/timer.c b/kernel/timer.c index 4e3040b40d16..1b7489fdea41 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -738,7 +738,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, cpu = smp_processor_id(); -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) cpu = get_nohz_timer_target(); #endif @@ -1188,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) spin_unlock_irq(&base->lock); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a CPU is idle. -- cgit From 684d5ce4afafddb2ad08f36ea30ca7d7adc88ebe Mon Sep 17 00:00:00 2001 From: Zhenhua HUANG Date: Tue, 2 Apr 2013 13:24:15 +0100 Subject: regulator: ab8500: Introduce aux5, aux6 regulators for AB8540 Introduce aux5, aux6 into ab8540 regulator framework. Signed-off-by: Zhenhua HUANG Signed-off-by: Lee Jones Reviewed-by: Maxime COQUELIN Reviewed-by: David PARIS Reviewed-by: Philippe LANGLAIS Signed-off-by: Mark Brown --- drivers/regulator/ab8500.c | 65 ++++++++++++++++++++++++++++++++++++++++ include/linux/regulator/ab8500.h | 2 ++ 2 files changed, 67 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 9cb634807ff8..433cac4396df 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -149,6 +149,21 @@ static const unsigned int ldo_vaux3_ab8540_voltages[] = { 3050000, }; +static const unsigned int ldo_vaux56_ab8540_voltages[] = { + 750000, 760000, 770000, 780000, 790000, 800000, + 810000, 820000, 830000, 840000, 850000, 860000, + 870000, 880000, 890000, 900000, 910000, 920000, + 930000, 940000, 950000, 960000, 970000, 980000, + 990000, 1000000, 1010000, 1020000, 1030000, + 1040000, 1050000, 1060000, 1070000, 1080000, + 1090000, 1100000, 1110000, 1120000, 1130000, + 1140000, 1150000, 1160000, 1170000, 1180000, + 1190000, 1200000, 1210000, 1220000, 1230000, + 1240000, 1250000, 1260000, 1270000, 1280000, + 1290000, 1300000, 1310000, 1320000, 1330000, + 1340000, 1350000, 1360000, 1800000, 2790000, +}; + static const unsigned int ldo_vintcore_voltages[] = { 1200000, 1225000, @@ -1569,6 +1584,54 @@ static struct ab8500_regulator_info .voltage_reg = 0x2f, .voltage_mask = 0x0f, }, + [AB8540_LDO_AUX5] = { + .desc = { + .name = "LDO-AUX5", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8540_LDO_AUX5, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vaux56_ab8540_voltages), + }, + .load_lp_uA = 20000, + /* values for Vaux5Regu register */ + .update_bank = 0x04, + .update_reg = 0x32, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + /* values for Vaux5SEL register */ + .voltage_bank = 0x04, + .voltage_reg = 0x33, + .voltage_mask = 0x3f, + .voltages = ldo_vaux56_ab8540_voltages, + .voltages_len = ARRAY_SIZE(ldo_vaux56_ab8540_voltages), + }, + [AB8540_LDO_AUX6] = { + .desc = { + .name = "LDO-AUX6", + .ops = &ab8500_regulator_volt_mode_ops, + .type = REGULATOR_VOLTAGE, + .id = AB8540_LDO_AUX6, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(ldo_vaux56_ab8540_voltages), + }, + .load_lp_uA = 20000, + /* values for Vaux6Regu register */ + .update_bank = 0x04, + .update_reg = 0x35, + .update_mask = 0x03, + .update_val = 0x01, + .update_val_idle = 0x03, + .update_val_normal = 0x01, + /* values for Vaux6SEL register */ + .voltage_bank = 0x04, + .voltage_reg = 0x36, + .voltage_mask = 0x3f, + .voltages = ldo_vaux56_ab8540_voltages, + .voltages_len = ARRAY_SIZE(ldo_vaux56_ab8540_voltages), + }, [AB8540_LDO_INTCORE] = { .desc = { .name = "LDO-INTCORE", @@ -2979,6 +3042,8 @@ static struct of_regulator_match ab8540_regulator_match[] = { { .name = "ab8500_ldo_aux2", .driver_data = (void *) AB8540_LDO_AUX2, }, { .name = "ab8500_ldo_aux3", .driver_data = (void *) AB8540_LDO_AUX3, }, { .name = "ab8500_ldo_aux4", .driver_data = (void *) AB8540_LDO_AUX4, }, + { .name = "ab8500_ldo_aux5", .driver_data = (void *) AB8540_LDO_AUX5, }, + { .name = "ab8500_ldo_aux6", .driver_data = (void *) AB8540_LDO_AUX6, }, { .name = "ab8500_ldo_intcore", .driver_data = (void *) AB8540_LDO_INTCORE, }, { .name = "ab8500_ldo_tvout", .driver_data = (void *) AB8540_LDO_TVOUT, }, { .name = "ab8500_ldo_audio", .driver_data = (void *) AB8540_LDO_AUDIO, }, diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 44f67e8f1a6d..90b8b5ae9a4e 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -74,6 +74,8 @@ enum ab8540_regulator_id { AB8540_LDO_AUX2, AB8540_LDO_AUX3, AB8540_LDO_AUX4, + AB8540_LDO_AUX5, + AB8540_LDO_AUX6, AB8540_LDO_INTCORE, AB8540_LDO_TVOUT, AB8540_LDO_AUDIO, -- cgit From c4e67bbc99ce661808c3ee77b0bb5779d0df11ca Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 2 Apr 2013 13:24:19 +0100 Subject: ARM: ux500: Pass regulator platform data using the new format Regulator platform data is now passed though a single structure as opposed to the old way where four separate struct elements were required. This patch makes use of the new format. Signed-off-by: Lee Jones Signed-off-by: Mark Brown --- arch/arm/mach-ux500/board-mop500.c | 5 +---- include/linux/mfd/abx500/ab8500.h | 11 ++--------- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c index b03457881c4b..b1124bd34a6c 100644 --- a/arch/arm/mach-ux500/board-mop500.c +++ b/arch/arm/mach-ux500/board-mop500.c @@ -198,10 +198,7 @@ static struct platform_device snowball_sbnet_dev = { struct ab8500_platform_data ab8500_platdata = { .irq_base = MOP500_AB8500_IRQ_BASE, - .regulator_reg_init = ab8500_regulator_reg_init, - .num_regulator_reg_init = ARRAY_SIZE(ab8500_regulator_reg_init), - .regulator = ab8500_regulators, - .num_regulator = ARRAY_SIZE(ab8500_regulators), + .regulator = &ab8500_regulator_plat_data, .gpio = &ab8500_gpio_pdata, .codec = &ab8500_codec_pdata, }; diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 9db0bda446a0..84f449475c25 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -364,8 +364,7 @@ struct ab8500 { const int *irq_reg_offset; }; -struct regulator_reg_init; -struct regulator_init_data; +struct ab8500_regulator_platform_data; struct ab8500_gpio_platform_data; struct ab8500_codec_platform_data; struct ab8500_sysctrl_platform_data; @@ -375,19 +374,13 @@ struct ab8500_sysctrl_platform_data; * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used * @pm_power_off: Should machine pm power off hook be registered or not * @init: board-specific initialization after detection of ab8500 - * @num_regulator_reg_init: number of regulator init registers - * @regulator_reg_init: regulator init registers - * @num_regulator: number of regulators * @regulator: machine-specific constraints for regulators */ struct ab8500_platform_data { int irq_base; bool pm_power_off; void (*init) (struct ab8500 *); - int num_regulator_reg_init; - struct ab8500_regulator_reg_init *regulator_reg_init; - int num_regulator; - struct regulator_init_data *regulator; + struct ab8500_regulator_platform_data *regulator; struct abx500_gpio_platform_data *gpio; struct ab8500_codec_platform_data *codec; struct ab8500_sysctrl_platform_data *sysctrl; -- cgit From 056b205316cc3dcf8a67cf813a26ff8a72bf3cb9 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Tue, 2 Apr 2013 15:36:56 -0700 Subject: clk: divider: Introduce CLK_DIVIDER_ALLOW_ZERO flag Dividers which have CLK_DIVIDER_ONE_BASED set have a redundant state, being a divider value of zero. Some hardware implementations allow a zero divider which simply doesn't alter the frequency. I.e. it acts like a divide by one or bypassing the divider. This flag is used to handle such HW in the clk-divider model. Signed-off-by: Soren Brinkmann Signed-off-by: Mike Turquette --- drivers/clk/clk-divider.c | 5 +++-- include/linux/clk-provider.h | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 68b402101170..6d9674160430 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -109,8 +109,9 @@ static unsigned long clk_divider_recalc_rate(struct clk_hw *hw, div = _get_div(divider, val); if (!div) { - WARN(1, "%s: Invalid divisor for clock %s\n", __func__, - __clk_get_name(hw->clk)); + WARN(!(divider->flags & CLK_DIVIDER_ALLOW_ZERO), + "%s: Zero divisor and CLK_DIVIDER_ALLOW_ZERO not set\n", + __clk_get_name(hw->clk)); return parent_rate; } diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index b1675074fe7c..9fdfae74d669 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -249,9 +249,14 @@ struct clk_div_table { * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the * register plus one. If CLK_DIVIDER_ONE_BASED is set then the divider is * the raw value read from the register, with the value of zero considered - * invalid + * invalid, unless CLK_DIVIDER_ALLOW_ZERO is set. * CLK_DIVIDER_POWER_OF_TWO - clock divisor is 2 raised to the value read from * the hardware register + * CLK_DIVIDER_ALLOW_ZERO - Allow zero divisors. For dividers which have + * CLK_DIVIDER_ONE_BASED set, it is possible to end up with a zero divisor. + * Some hardware implementations gracefully handle this case and allow a + * zero divisor by not modifying their input clock + * (divide by one / bypass). */ struct clk_divider { struct clk_hw hw; @@ -265,6 +270,7 @@ struct clk_divider { #define CLK_DIVIDER_ONE_BASED BIT(0) #define CLK_DIVIDER_POWER_OF_TWO BIT(1) +#define CLK_DIVIDER_ALLOW_ZERO BIT(2) extern const struct clk_ops clk_divider_ops; struct clk *clk_register_divider(struct device *dev, const char *name, -- cgit From d8668fcb0b257d9fdcfbe5c172a99b8d85e1cd82 Mon Sep 17 00:00:00 2001 From: Shan Hai Date: Mon, 18 Mar 2013 10:30:43 +0800 Subject: libata: Use integer return value for atapi_command_packet_set The function returns type of ATAPI drives so it should return integer value. The commit 4dce8ba94c7 (libata: Use 'bool' return value for ata_id_XXX) since v2.6.39 changed the type of return value from int to bool, the change would cause all of the ATAPI class drives to be treated as TYPE_TAPE and the max_sectors of the drives to be set to 65535 because of the commit f8d8e5799b7(libata: increase 128 KB / cmd limit for ATAPI tape drives), for the function would return true for all ATAPI class drives and the TYPE_TAPE is defined as 0x01. Cc: stable@vger.kernel.org Signed-off-by: Shan Hai Signed-off-by: Jeff Garzik --- include/linux/ata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 8f7a3d68371a..ee0bd9524055 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -954,7 +954,7 @@ static inline int atapi_cdb_len(const u16 *dev_id) } } -static inline bool atapi_command_packet_set(const u16 *dev_id) +static inline int atapi_command_packet_set(const u16 *dev_id) { return (dev_id[ATA_ID_CONFIG] >> 8) & 0x1f; } -- cgit From a32450e127fc6e5ca6d958ceb3cfea4d30a00846 Mon Sep 17 00:00:00 2001 From: Shan Hai Date: Mon, 18 Mar 2013 10:30:44 +0800 Subject: libata: Set max sector to 65535 for Slimtype DVD A DS8A8SH drive The Slimtype DVD A DS8A8SH drive locks up when max sector is smaller than 65535, and the blow backtrace is observed on locking up: INFO: task flush-8:32:1130 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. flush-8:32 D ffffffff8180cf60 0 1130 2 0x00000000 ffff880273aef618 0000000000000046 0000000000000005 ffff880273aee000 ffff880273aee000 ffff880273aeffd8 ffff880273aee010 ffff880273aee000 ffff880273aeffd8 ffff880273aee000 ffff88026e842ea0 ffff880274a10000 Call Trace: [] schedule+0x5d/0x70 [] io_schedule+0x8c/0xd0 [] get_request+0x731/0x7d0 [] ? cfq_allow_merge+0x50/0x90 [] ? wake_up_bit+0x40/0x40 [] ? bio_attempt_back_merge+0x33/0x110 [] blk_queue_bio+0x23a/0x3f0 [] generic_make_request+0xc6/0x120 [] submit_bio+0x138/0x160 [] ? bio_alloc_bioset+0x96/0x120 [] submit_bh+0x1f1/0x220 [] __block_write_full_page+0x228/0x340 [] ? attach_nobh_buffers+0xc0/0xc0 [] ? I_BDEV+0x10/0x10 [] ? I_BDEV+0x10/0x10 [] block_write_full_page_endio+0xe6/0x100 [] block_write_full_page+0x15/0x20 [] blkdev_writepage+0x18/0x20 [] __writepage+0x17/0x40 [] write_cache_pages+0x34a/0x4a0 [] ? set_page_dirty+0x70/0x70 [] generic_writepages+0x51/0x80 [] do_writepages+0x20/0x50 [] __writeback_single_inode+0xa6/0x2b0 [] writeback_sb_inodes+0x311/0x4d0 [] __writeback_inodes_wb+0x86/0xd0 [] wb_writeback+0x1a3/0x330 [] ? _raw_spin_lock_irqsave+0x3f/0x50 [] ? get_nr_inodes+0x52/0x70 [] wb_do_writeback+0x1dc/0x260 [] ? schedule_timeout+0x204/0x240 [] bdi_writeback_thread+0x102/0x2b0 [] ? wb_do_writeback+0x260/0x260 [] kthread+0xc0/0xd0 [] ? kthread_worker_fn+0x1b0/0x1b0 [] ret_from_fork+0x7c/0xb0 [] ? kthread_worker_fn+0x1b0/0x1b0 The above trace was triggered by "dd if=/dev/zero of=/dev/sr0 bs=2048 count=32768" It was previously working by accident, since another bug introduced by 4dce8ba94c7 (libata: Use 'bool' return value for ata_id_XXX) caused all drives to use maxsect=65535. Cc: stable@vger.kernel.org Signed-off-by: Shan Hai Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 4 ++++ include/linux/libata.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 497adea1f0d6..0075944a64dc 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2439,6 +2439,9 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_LBA48) + dev->max_sectors = ATA_MAX_SECTORS_LBA48; + if (ap->ops->dev_config) ap->ops->dev_config(dev); @@ -4100,6 +4103,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, + { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, /* Devices we expect to fail diagnostics */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 91c9d109e5f1..eae7a053dc51 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -398,6 +398,7 @@ enum { ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ ATA_HORKAGE_DUMP_ID = (1 << 16), /* dump IDENTIFY data */ + ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit From d76a3a77113db020d9bb1e894822869410450bd9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 3 Apr 2013 22:02:52 -0400 Subject: ext4/jbd2: don't wait (forever) for stale tid caused by wraparound In the case where an inode has a very stale transaction id (tid) in i_datasync_tid or i_sync_tid, it's possible that after a very large (2**31) number of transactions, that the tid number space might wrap, causing tid_geq()'s calculations to fail. Commit deeeaf13 "jbd2: fix fsync() tid wraparound bug", later modified by commit e7b04ac0 "jbd2: don't wake kjournald unnecessarily", attempted to fix this problem, but it only avoided kjournald spinning forever by fixing the logic in jbd2_log_start_commit(). Unfortunately, in the codepaths in fs/ext4/fsync.c and fs/ext4/inode.c that might call jbd2_log_start_commit() with a stale tid, those functions will subsequently call jbd2_log_wait_commit() with the same stale tid, and then wait for a very long time. To fix this, we replace the calls to jbd2_log_start_commit() and jbd2_log_wait_commit() with a call to a new function, jbd2_complete_transaction(), which will correctly handle stale tid's. As a bonus, jbd2_complete_transaction() will avoid locking j_state_lock for writing unless a commit needs to be started. This should have a small (but probably not measurable) improvement for ext4's scalability. Signed-off-by: "Theodore Ts'o" Reported-by: Ben Hutchings Reported-by: George Barnett Cc: stable@vger.kernel.org --- fs/ext4/fsync.c | 3 +-- fs/ext4/inode.c | 3 +-- fs/jbd2/journal.c | 31 +++++++++++++++++++++++++++++++ include/linux/jbd2.h | 1 + 4 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 3278e64e57b6..e0ba8a408def 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -166,8 +166,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (journal->j_flags & JBD2_BARRIER && !jbd2_trans_will_send_data_barrier(journal, commit_tid)) needs_barrier = true; - jbd2_log_start_commit(journal, commit_tid); - ret = jbd2_log_wait_commit(journal, commit_tid); + ret = jbd2_complete_transaction(journal, commit_tid); if (needs_barrier) { err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); if (!ret) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 56ebd662033b..addba9e0a1a4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -210,8 +210,7 @@ void ext4_evict_inode(struct inode *inode) journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; - jbd2_log_start_commit(journal, commit_tid); - jbd2_log_wait_commit(journal, commit_tid); + jbd2_complete_transaction(journal, commit_tid); filemap_write_and_wait(&inode->i_data); } truncate_inode_pages(&inode->i_data, 0); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ed10991ab006..886ec2faa9b4 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -709,6 +709,37 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) return err; } +/* + * When this function returns the transaction corresponding to tid + * will be completed. If the transaction has currently running, start + * committing that transaction before waiting for it to complete. If + * the transaction id is stale, it is by definition already completed, + * so just return SUCCESS. + */ +int jbd2_complete_transaction(journal_t *journal, tid_t tid) +{ + int need_to_wait = 1; + + read_lock(&journal->j_state_lock); + if (journal->j_running_transaction && + journal->j_running_transaction->t_tid == tid) { + if (journal->j_commit_request != tid) { + /* transaction not yet started, so request it */ + read_unlock(&journal->j_state_lock); + jbd2_log_start_commit(journal, tid); + goto wait_commit; + } + } else if (!(journal->j_committing_transaction && + journal->j_committing_transaction->t_tid == tid)) + need_to_wait = 0; + read_unlock(&journal->j_state_lock); + if (!need_to_wait) + return 0; +wait_commit: + return jbd2_log_wait_commit(journal, tid); +} +EXPORT_SYMBOL(jbd2_complete_transaction); + /* * Log buffer allocation routines: */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 50e5a5e6a712..f0289754b464 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1200,6 +1200,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); int jbd2_journal_force_commit_nested(journal_t *journal); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); +int jbd2_complete_transaction(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); -- cgit From 794446c6946513c684d448205fbd76fa35f38b72 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 3 Apr 2013 22:06:52 -0400 Subject: jbd2: fix race between jbd2_journal_remove_checkpoint and ->j_commit_callback The following race is possible: [kjournald2] other_task jbd2_journal_commit_transaction() j_state = T_FINISHED; spin_unlock(&journal->j_list_lock); ->jbd2_journal_remove_checkpoint() ->jbd2_journal_free_transaction(); ->kmem_cache_free(transaction) ->j_commit_callback(journal, transaction); -> USE_AFTER_FREE WARNING: at lib/list_debug.c:62 __list_del_entry+0x1c0/0x250() Hardware name: list_del corruption. prev->next should be ffff88019a4ec198, but was 6b6b6b6b6b6b6b6b Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode sg xhci_hcd button sd_mod crc_t10dif aesni_intel ablk_helper cryptd lrw aes_x86_64 xts gf128mul ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod Pid: 16400, comm: jbd2/dm-1-8 Tainted: G W 3.8.0-rc3+ #107 Call Trace: [] warn_slowpath_common+0xad/0xf0 [] warn_slowpath_fmt+0x46/0x50 [] ? ext4_journal_commit_callback+0x99/0xc0 [] __list_del_entry+0x1c0/0x250 [] ext4_journal_commit_callback+0x6f/0xc0 [] jbd2_journal_commit_transaction+0x23a6/0x2570 [] ? try_to_del_timer_sync+0x82/0xa0 [] ? del_timer_sync+0x91/0x1e0 [] kjournald2+0x19f/0x6a0 [] ? wake_up_bit+0x40/0x40 [] ? bit_spin_lock+0x80/0x80 [] kthread+0x10e/0x120 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 In order to demonstrace this issue one should mount ext4 with mount -o discard option on SSD disk. This makes callback longer and race window becomes wider. In order to fix this we should mark transaction as finished only after callbacks have completed Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/jbd2/commit.c | 50 ++++++++++++++++++++++++++++---------------------- include/linux/jbd2.h | 1 + 2 files changed, 29 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 750c70148eff..0f53946f13c1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -382,7 +382,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int space_left = 0; int first_tag = 0; int tag_flag; - int i, to_free = 0; + int i; int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; @@ -1134,7 +1134,7 @@ restart_loop: journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; spin_unlock(&journal->j_history_lock); - commit_transaction->t_state = T_FINISHED; + commit_transaction->t_state = T_COMMIT_CALLBACK; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; journal->j_committing_transaction = NULL; @@ -1149,38 +1149,44 @@ restart_loop: journal->j_average_commit_time*3) / 4; else journal->j_average_commit_time = commit_time; + write_unlock(&journal->j_state_lock); - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { - __jbd2_journal_drop_transaction(journal, commit_transaction); - to_free = 1; + if (journal->j_checkpoint_transactions == NULL) { + journal->j_checkpoint_transactions = commit_transaction; + commit_transaction->t_cpnext = commit_transaction; + commit_transaction->t_cpprev = commit_transaction; } else { - if (journal->j_checkpoint_transactions == NULL) { - journal->j_checkpoint_transactions = commit_transaction; - commit_transaction->t_cpnext = commit_transaction; - commit_transaction->t_cpprev = commit_transaction; - } else { - commit_transaction->t_cpnext = - journal->j_checkpoint_transactions; - commit_transaction->t_cpprev = - commit_transaction->t_cpnext->t_cpprev; - commit_transaction->t_cpnext->t_cpprev = - commit_transaction; - commit_transaction->t_cpprev->t_cpnext = + commit_transaction->t_cpnext = + journal->j_checkpoint_transactions; + commit_transaction->t_cpprev = + commit_transaction->t_cpnext->t_cpprev; + commit_transaction->t_cpnext->t_cpprev = + commit_transaction; + commit_transaction->t_cpprev->t_cpnext = commit_transaction; - } } spin_unlock(&journal->j_list_lock); - + /* Drop all spin_locks because commit_callback may be block. + * __journal_remove_checkpoint() can not destroy transaction + * under us because it is not marked as T_FINISHED yet */ if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); - if (to_free) - jbd2_journal_free_transaction(commit_transaction); + write_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); + commit_transaction->t_state = T_FINISHED; + /* Recheck checkpoint lists after j_list_lock was dropped */ + if (commit_transaction->t_checkpoint_list == NULL && + commit_transaction->t_checkpoint_io_list == NULL) { + __jbd2_journal_drop_transaction(journal, commit_transaction); + jbd2_journal_free_transaction(commit_transaction); + } + spin_unlock(&journal->j_list_lock); + write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_done_commit); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f0289754b464..f9fe88957b7a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -480,6 +480,7 @@ struct transaction_s T_COMMIT, T_COMMIT_DFLUSH, T_COMMIT_JFLUSH, + T_COMMIT_CALLBACK, T_FINISHED } t_state; -- cgit From c31ad081e8734aab3fb45d2f32e9969994dd076e Mon Sep 17 00:00:00 2001 From: Arve Hjønnevåg Date: Tue, 22 May 2012 16:33:23 -0700 Subject: pstore/ram: Allow specifying ecc parameters in platform data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow specifying ecc parameters in platform data Signed-off-by: Arve Hjønnevåg [jstultz: Tweaked commit subject & add commit message] Signed-off-by: John Stultz Acked-by: Kees Cook Signed-off-by: Anton Vorontsov --- fs/pstore/ram.c | 15 ++++++----- fs/pstore/ram_core.c | 64 ++++++++++++++++++++++++---------------------- include/linux/pstore_ram.h | 14 +++++++--- 3 files changed, 52 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 38babb3a9384..a5ee252c264e 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -83,7 +83,7 @@ struct ramoops_context { size_t console_size; size_t ftrace_size; int dump_oops; - int ecc_size; + struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; unsigned int dump_write_cnt; unsigned int dump_read_cnt; @@ -322,7 +322,8 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, for (i = 0; i < cxt->max_dump_cnt; i++) { size_t sz = cxt->record_size; - cxt->przs[i] = persistent_ram_new(*paddr, sz, 0, cxt->ecc_size); + cxt->przs[i] = persistent_ram_new(*paddr, sz, 0, + &cxt->ecc_info); if (IS_ERR(cxt->przs[i])) { err = PTR_ERR(cxt->przs[i]); dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", @@ -352,7 +353,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return -ENOMEM; } - *prz = persistent_ram_new(*paddr, sz, sig, cxt->ecc_size); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); @@ -406,7 +407,7 @@ static int ramoops_probe(struct platform_device *pdev) cxt->console_size = pdata->console_size; cxt->ftrace_size = pdata->ftrace_size; cxt->dump_oops = pdata->dump_oops; - cxt->ecc_size = pdata->ecc_size; + cxt->ecc_info = pdata->ecc_info; paddr = cxt->phys_addr; @@ -464,9 +465,9 @@ static int ramoops_probe(struct platform_device *pdev) record_size = pdata->record_size; dump_oops = pdata->dump_oops; - pr_info("attached 0x%lx@0x%llx, ecc: %d\n", + pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", cxt->size, (unsigned long long)cxt->phys_addr, - cxt->ecc_size); + cxt->ecc_info.ecc_size, cxt->ecc_info.block_size); return 0; @@ -538,7 +539,7 @@ static void ramoops_register_dummy(void) * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC * (using 1 byte for ECC isn't much of use anyway). */ - dummy_data->ecc_size = ramoops_ecc == 1 ? 16 : ramoops_ecc; + dummy_data->ecc_info.ecc_size = ramoops_ecc == 1 ? 16 : ramoops_ecc; dummy = platform_device_register_data(NULL, "ramoops", -1, dummy_data, sizeof(struct ramoops_platform_data)); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index e5afa222c213..c6f641c10179 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -82,12 +82,12 @@ static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, uint8_t *data, size_t len, uint8_t *ecc) { int i; - uint16_t par[prz->ecc_size]; + uint16_t par[prz->ecc_info.ecc_size]; /* Initialize the parity buffer */ memset(par, 0, sizeof(par)); encode_rs8(prz->rs_decoder, data, len, par, 0); - for (i = 0; i < prz->ecc_size; i++) + for (i = 0; i < prz->ecc_info.ecc_size; i++) ecc[i] = par[i]; } @@ -95,9 +95,9 @@ static int persistent_ram_decode_rs8(struct persistent_ram_zone *prz, void *data, size_t len, uint8_t *ecc) { int i; - uint16_t par[prz->ecc_size]; + uint16_t par[prz->ecc_info.ecc_size]; - for (i = 0; i < prz->ecc_size; i++) + for (i = 0; i < prz->ecc_info.ecc_size; i++) par[i] = ecc[i]; return decode_rs8(prz->rs_decoder, data, par, len, NULL, 0, NULL, 0, NULL); @@ -110,15 +110,15 @@ static void notrace persistent_ram_update_ecc(struct persistent_ram_zone *prz, uint8_t *buffer_end = buffer->data + prz->buffer_size; uint8_t *block; uint8_t *par; - int ecc_block_size = prz->ecc_block_size; - int ecc_size = prz->ecc_size; - int size = prz->ecc_block_size; + int ecc_block_size = prz->ecc_info.block_size; + int ecc_size = prz->ecc_info.ecc_size; + int size = ecc_block_size; - if (!prz->ecc_size) + if (!ecc_size) return; block = buffer->data + (start & ~(ecc_block_size - 1)); - par = prz->par_buffer + (start / ecc_block_size) * prz->ecc_size; + par = prz->par_buffer + (start / ecc_block_size) * ecc_size; do { if (block + ecc_block_size > buffer_end) @@ -133,7 +133,7 @@ static void persistent_ram_update_header_ecc(struct persistent_ram_zone *prz) { struct persistent_ram_buffer *buffer = prz->buffer; - if (!prz->ecc_size) + if (!prz->ecc_info.ecc_size) return; persistent_ram_encode_rs8(prz, (uint8_t *)buffer, sizeof(*buffer), @@ -146,14 +146,14 @@ static void persistent_ram_ecc_old(struct persistent_ram_zone *prz) uint8_t *block; uint8_t *par; - if (!prz->ecc_size) + if (!prz->ecc_info.ecc_size) return; block = buffer->data; par = prz->par_buffer; while (block < buffer->data + buffer_size(prz)) { int numerr; - int size = prz->ecc_block_size; + int size = prz->ecc_info.block_size; if (block + size > buffer->data + prz->buffer_size) size = buffer->data + prz->buffer_size - block; numerr = persistent_ram_decode_rs8(prz, block, size, par); @@ -166,45 +166,49 @@ static void persistent_ram_ecc_old(struct persistent_ram_zone *prz) block); prz->bad_blocks++; } - block += prz->ecc_block_size; - par += prz->ecc_size; + block += prz->ecc_info.block_size; + par += prz->ecc_info.ecc_size; } } static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, - int ecc_size) + struct persistent_ram_ecc_info *ecc_info) { int numerr; struct persistent_ram_buffer *buffer = prz->buffer; int ecc_blocks; size_t ecc_total; - int ecc_symsize = 8; - int ecc_poly = 0x11d; - if (!ecc_size) + if (!ecc_info || !ecc_info->ecc_size) return 0; - prz->ecc_block_size = 128; - prz->ecc_size = ecc_size; + prz->ecc_info.block_size = ecc_info->block_size ?: 128; + prz->ecc_info.ecc_size = ecc_info->ecc_size ?: 16; + prz->ecc_info.symsize = ecc_info->symsize ?: 8; + prz->ecc_info.poly = ecc_info->poly ?: 0x11d; - ecc_blocks = DIV_ROUND_UP(prz->buffer_size - prz->ecc_size, - prz->ecc_block_size + prz->ecc_size); - ecc_total = (ecc_blocks + 1) * prz->ecc_size; + ecc_blocks = DIV_ROUND_UP(prz->buffer_size - prz->ecc_info.ecc_size, + prz->ecc_info.block_size + + prz->ecc_info.ecc_size); + ecc_total = (ecc_blocks + 1) * prz->ecc_info.ecc_size; if (ecc_total >= prz->buffer_size) { pr_err("%s: invalid ecc_size %u (total %zu, buffer size %zu)\n", - __func__, prz->ecc_size, ecc_total, prz->buffer_size); + __func__, prz->ecc_info.ecc_size, + ecc_total, prz->buffer_size); return -EINVAL; } prz->buffer_size -= ecc_total; prz->par_buffer = buffer->data + prz->buffer_size; - prz->par_header = prz->par_buffer + ecc_blocks * prz->ecc_size; + prz->par_header = prz->par_buffer + + ecc_blocks * prz->ecc_info.ecc_size; /* * first consecutive root is 0 * primitive element to generate roots = 1 */ - prz->rs_decoder = init_rs(ecc_symsize, ecc_poly, 0, 1, prz->ecc_size); + prz->rs_decoder = init_rs(prz->ecc_info.symsize, prz->ecc_info.poly, + 0, 1, prz->ecc_info.ecc_size); if (prz->rs_decoder == NULL) { pr_info("persistent_ram: init_rs failed\n"); return -EINVAL; @@ -392,11 +396,11 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, } static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, - int ecc_size) + struct persistent_ram_ecc_info *ecc_info) { int ret; - ret = persistent_ram_init_ecc(prz, ecc_size); + ret = persistent_ram_init_ecc(prz, ecc_info); if (ret) return ret; @@ -445,7 +449,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) } struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, int ecc_size) + u32 sig, struct persistent_ram_ecc_info *ecc_info) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -460,7 +464,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, if (ret) goto err; - ret = persistent_ram_post_init(prz, sig, ecc_size); + ret = persistent_ram_post_init(prz, sig, ecc_info); if (ret) goto err; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index cb6ab5feab67..9974975d40db 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -26,6 +26,13 @@ struct persistent_ram_buffer; struct rs_control; +struct persistent_ram_ecc_info { + int block_size; + int ecc_size; + int symsize; + int poly; +}; + struct persistent_ram_zone { phys_addr_t paddr; size_t size; @@ -39,15 +46,14 @@ struct persistent_ram_zone { struct rs_control *rs_decoder; int corrected_bytes; int bad_blocks; - int ecc_block_size; - int ecc_size; + struct persistent_ram_ecc_info ecc_info; char *old_log; size_t old_log_size; }; struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, int ecc_size); + u32 sig, struct persistent_ram_ecc_info *ecc_info); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); @@ -74,7 +80,7 @@ struct ramoops_platform_data { unsigned long console_size; unsigned long ftrace_size; int dump_oops; - int ecc_size; + struct persistent_ram_ecc_info ecc_info; }; #endif -- cgit From 3daf37260e965aa4bb060db99c2ed10b28109e04 Mon Sep 17 00:00:00 2001 From: Tony Prisk Date: Sat, 23 Mar 2013 17:02:15 +1300 Subject: of: Add support for reading a u32 from a multi-value property. This patch adds an of_property_read_u32_index() function to allow reading a single indexed u32 value from a property containing multiple u32 values. Signed-off-by: Tony Prisk Reviewed-by: Stephen Warren Acked-by: Linus Walleij Acked-by: Rob Herring --- drivers/of/base.c | 33 +++++++++++++++++++++++++++++++++ include/linux/of.h | 9 +++++++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index 321d3ef05006..f6c89ed38db9 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -745,6 +745,39 @@ struct device_node *of_find_node_by_phandle(phandle handle) } EXPORT_SYMBOL(of_find_node_by_phandle); +/** + * of_property_read_u32_index - Find and read a u32 from a multi-value property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @index: index of the u32 in the list of values + * @out_value: pointer to return value, modified only if no error. + * + * Search for a property in a device node and read nth 32-bit value from + * it. Returns 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_value is modified only if a valid u32 value can be decoded. + */ +int of_property_read_u32_index(const struct device_node *np, + const char *propname, + u32 index, u32 *out_value) +{ + struct property *prop = of_find_property(np, propname, NULL); + + if (!prop) + return -EINVAL; + if (!prop->value) + return -ENODATA; + if (((index + 1) * sizeof(*out_value)) > prop->length) + return -EOVERFLOW; + + *out_value = be32_to_cpup(((__be32 *)prop->value) + index); + return 0; +} +EXPORT_SYMBOL_GPL(of_property_read_u32_index); + /** * of_property_read_u8_array - Find and read an array of u8 from a property. * diff --git a/include/linux/of.h b/include/linux/of.h index a0f129284948..c0747a44eaff 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -235,6 +235,9 @@ extern struct device_node *of_find_node_with_property( extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); +extern int of_property_read_u32_index(const struct device_node *np, + const char *propname, + u32 index, u32 *out_value); extern int of_property_read_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz); extern int of_property_read_u16_array(const struct device_node *np, @@ -394,6 +397,12 @@ static inline struct device_node *of_find_compatible_node( return NULL; } +static inline int of_property_read_u32_index(const struct device_node *np, + const char *propname, u32 index, u32 *out_value) +{ + return -ENOSYS; +} + static inline int of_property_read_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz) { -- cgit From 3af4ae1e4b57a663fff9cfe711c84fefb6ec966f Mon Sep 17 00:00:00 2001 From: Tony Prisk Date: Wed, 3 Apr 2013 07:20:34 +1300 Subject: video: vt8500: Remove unused platform_data/video-vt8500lcdfb.h With the conversion to devicetree only for arch-vt8500, this header is no longer required. This patch removes the #include from the two framebuffer drivers that used it, and the header file. Signed-off-by: Tony Prisk Reviewed-by: Jean-Christophe Plagniol-Villard Signed-off-by: Tomi Valkeinen --- drivers/video/vt8500lcdfb.c | 2 -- drivers/video/wm8505fb.c | 2 -- include/linux/platform_data/video-vt8500lcdfb.h | 31 ------------------------- 3 files changed, 35 deletions(-) delete mode 100644 include/linux/platform_data/video-vt8500lcdfb.h (limited to 'include/linux') diff --git a/drivers/video/vt8500lcdfb.c b/drivers/video/vt8500lcdfb.c index aa2579c2364a..2ff2312a16ac 100644 --- a/drivers/video/vt8500lcdfb.c +++ b/drivers/video/vt8500lcdfb.c @@ -30,8 +30,6 @@ #include #include -#include - #include "vt8500lcdfb.h" #include "wmt_ge_rops.h" diff --git a/drivers/video/wm8505fb.c b/drivers/video/wm8505fb.c index 4dd0580f96fd..fe9afd60a018 100644 --- a/drivers/video/wm8505fb.c +++ b/drivers/video/wm8505fb.c @@ -32,8 +32,6 @@ #include #include -#include - #include "wm8505fb_regs.h" #include "wmt_ge_rops.h" diff --git a/include/linux/platform_data/video-vt8500lcdfb.h b/include/linux/platform_data/video-vt8500lcdfb.h deleted file mode 100644 index 7f399c370fe0..000000000000 --- a/include/linux/platform_data/video-vt8500lcdfb.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * VT8500/WM8505 Frame Buffer platform data definitions - * - * Copyright (C) 2010 Ed Spiridonov - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _VT8500FB_H -#define _VT8500FB_H - -#include - -struct vt8500fb_platform_data { - struct fb_videomode mode; - u32 xres_virtual; - u32 yres_virtual; - u32 bpp; - unsigned long video_mem_phys; - void *video_mem_virt; - unsigned long video_mem_len; -}; - -#endif /* _VT8500FB_H */ -- cgit From 0908ad6e56b5a6e86745680bc324bdbfac64d0b6 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 22 Mar 2013 20:46:27 +0530 Subject: uprobes: Add trap variant helper Some architectures like powerpc have multiple variants of the trap instruction. Introduce an additional helper is_trap_insn() for run-time handling of non-uprobe traps on such architectures. While there, change is_swbp_at_addr() to is_trap_at_addr() for reading clarity. With this change, the uprobe registration path will supercede any trap instruction inserted at the requested location, while taking care of delivering the SIGTRAP for cases where the trap notification came in for an address without a uprobe. See [1] for a more detailed explanation. [1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2013-March/104771.html This change was suggested by Oleg Nesterov. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 34 +++++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 02b83db8e2c5..19612881399a 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -100,6 +100,7 @@ struct uprobes_state { extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); +extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 26bc2e24e9e3..ca9012930ce7 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -173,6 +173,20 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn) return *insn == UPROBE_SWBP_INSN; } +/** + * is_trap_insn - check if instruction is breakpoint instruction. + * @insn: instruction to be checked. + * Default implementation of is_trap_insn + * Returns true if @insn is a breakpoint instruction. + * + * This function is needed for the case where an architecture has multiple + * trap instructions (like powerpc). + */ +bool __weak is_trap_insn(uprobe_opcode_t *insn) +{ + return is_swbp_insn(insn); +} + static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) { void *kaddr = kmap_atomic(page); @@ -185,6 +199,15 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t uprobe_opcode_t old_opcode; bool is_swbp; + /* + * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. + * We do not check if it is any other 'trap variant' which could + * be conditional trap instruction such as the one powerpc supports. + * + * The logic is that we do not care if the underlying instruction + * is a trap variant; uprobes always wins over any other (gdb) + * breakpoint. + */ copy_opcode(page, vaddr, &old_opcode); is_swbp = is_swbp_insn(&old_opcode); @@ -204,7 +227,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * Expect the breakpoint instruction to be the smallest size instruction for * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction - * supported by that architecture then we need to modify is_swbp_at_addr and + * supported by that architecture then we need to modify is_trap_at_addr and * write_opcode accordingly. This would never be a problem for archs that * have fixed length instructions. */ @@ -550,7 +573,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, goto out; ret = -ENOTSUPP; - if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) + if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) goto out; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); @@ -1431,7 +1454,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) clear_bit(MMF_HAS_UPROBES, &mm->flags); } -static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) +static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) { struct page *page; uprobe_opcode_t opcode; @@ -1452,7 +1475,8 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) copy_opcode(page, vaddr, &opcode); put_page(page); out: - return is_swbp_insn(&opcode); + /* This needs to return true for any variant of the trap insn */ + return is_trap_insn(&opcode); } static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) @@ -1472,7 +1496,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) } if (!uprobe) - *is_swbp = is_swbp_at_addr(mm, bp_vaddr); + *is_swbp = is_trap_at_addr(mm, bp_vaddr); } else { *is_swbp = -EFAULT; } -- cgit From 4aa02c7cbb6816913554dc18ff750a70a4ace796 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 13 Mar 2013 14:03:12 +0800 Subject: video: mxsfb: remove fb_phys/fb_size from platform_data There is no in-tree users of mxsfb_platform_data fb_phys/fb_size. With CMA support in the kernel, there is no real need for platform to reserve memory and pass address and size into driver via platform_data. So let's remove fb_phys/fb_size from mxsfb_platform_data to ease full device tree adoption. Signed-off-by: Shawn Guo --- drivers/video/mxsfb.c | 39 +++++++-------------------------------- include/linux/mxsfb.h | 9 --------- 2 files changed, 7 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/mxsfb.c b/drivers/video/mxsfb.c index 69fb3f1d1e12..9e8740bade32 100644 --- a/drivers/video/mxsfb.c +++ b/drivers/video/mxsfb.c @@ -168,7 +168,6 @@ struct mxsfb_info { unsigned ld_intf_width; unsigned dotclk_delay; const struct mxsfb_devdata *devdata; - int mapped; u32 sync; }; @@ -686,7 +685,7 @@ static int mxsfb_init_fbinfo(struct mxsfb_info *host) struct mxsfb_platform_data *pdata = host->pdev->dev.platform_data; dma_addr_t fb_phys; void *fb_virt; - unsigned fb_size = pdata->fb_size; + unsigned fb_size; fb_info->fbops = &mxsfb_ops; fb_info->flags = FBINFO_FLAG_DEFAULT | FBINFO_READS_FAST; @@ -706,30 +705,12 @@ static int mxsfb_init_fbinfo(struct mxsfb_info *host) host->ld_intf_width = pdata->ld_intf_width; /* Memory allocation for framebuffer */ - if (pdata->fb_phys) { - if (!fb_size) - return -EINVAL; - - fb_phys = pdata->fb_phys; + fb_size = SZ_2M; + fb_virt = alloc_pages_exact(fb_size, GFP_DMA); + if (!fb_virt) + return -ENOMEM; - if (!request_mem_region(fb_phys, fb_size, host->pdev->name)) - return -ENOMEM; - - fb_virt = ioremap(fb_phys, fb_size); - if (!fb_virt) { - release_mem_region(fb_phys, fb_size); - return -ENOMEM; - } - host->mapped = 1; - } else { - if (!fb_size) - fb_size = SZ_2M; /* default */ - fb_virt = alloc_pages_exact(fb_size, GFP_DMA); - if (!fb_virt) - return -ENOMEM; - - fb_phys = virt_to_phys(fb_virt); - } + fb_phys = virt_to_phys(fb_virt); fb_info->fix.smem_start = fb_phys; fb_info->screen_base = fb_virt; @@ -745,13 +726,7 @@ static void mxsfb_free_videomem(struct mxsfb_info *host) { struct fb_info *fb_info = &host->fb_info; - if (host->mapped) { - iounmap(fb_info->screen_base); - release_mem_region(fb_info->fix.smem_start, - fb_info->screen_size); - } else { - free_pages_exact(fb_info->screen_base, fb_info->fix.smem_len); - } + free_pages_exact(fb_info->screen_base, fb_info->fix.smem_len); } static struct platform_device_id mxsfb_devtype[] = { diff --git a/include/linux/mxsfb.h b/include/linux/mxsfb.h index f80af8674342..93696404ee55 100644 --- a/include/linux/mxsfb.h +++ b/include/linux/mxsfb.h @@ -35,15 +35,6 @@ struct mxsfb_platform_data { unsigned dotclk_delay; /* refer manual HW_LCDIF_VDCTRL4 register */ unsigned ld_intf_width; /* refer STMLCDIF_* macros */ - - unsigned fb_size; /* Size of the video memory. If zero a - * default will be used - */ - unsigned long fb_phys; /* physical address for the video memory. If - * zero the framebuffer memory will be dynamically - * allocated. If specified,fb_size must also be specified. - * fb_phys must be unused by Linux. - */ u32 sync; /* sync mask, contains MXSFB specifics not * carried in fb_info->var.sync */ -- cgit From 36f3e99649baa77b2d22e385b2ea09e8f308c905 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 13 Mar 2013 14:28:19 +0800 Subject: video: mxsfb: remove dotclk_delay from platform_data There is no in-tree mxsfb users using mxsfb_platform_data dotclk_delay. Let's remove it from mxsfb_platform_data to ease full device tree adoption of mxsfb driver. If later we have platform/board need to configure this parameter, we can add it into device tree bindings. Signed-off-by: Shawn Guo --- drivers/video/mxsfb.c | 1 - include/linux/mxsfb.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/mxsfb.c b/drivers/video/mxsfb.c index 9e8740bade32..a89901c7f5e9 100644 --- a/drivers/video/mxsfb.c +++ b/drivers/video/mxsfb.c @@ -701,7 +701,6 @@ static int mxsfb_init_fbinfo(struct mxsfb_info *host) var->accel_flags = 0; var->vmode = FB_VMODE_NONINTERLACED; - host->dotclk_delay = pdata->dotclk_delay; host->ld_intf_width = pdata->ld_intf_width; /* Memory allocation for framebuffer */ diff --git a/include/linux/mxsfb.h b/include/linux/mxsfb.h index 93696404ee55..b78465cdb26e 100644 --- a/include/linux/mxsfb.h +++ b/include/linux/mxsfb.h @@ -33,7 +33,6 @@ struct mxsfb_platform_data { unsigned default_bpp; - unsigned dotclk_delay; /* refer manual HW_LCDIF_VDCTRL4 register */ unsigned ld_intf_width; /* refer STMLCDIF_* macros */ u32 sync; /* sync mask, contains MXSFB specifics not * carried in fb_info->var.sync -- cgit From c8b5cfc8797203d5e952592be00a5acacf5cef6a Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 14 Mar 2013 13:21:56 +0800 Subject: video: mxsfb: remove mxsfb_platform_data None of mxsfb users uses mxsfb_platform_data now. Let's remove it from mxsfb driver. As the result, include/linux/mxsfb.h gets deleted with a few macros moved into mxsfb.c. Along with the change, the typo "FAILING" in macro name is fixed to be "FALLING". Signed-off-by: Shawn Guo --- drivers/video/mxsfb.c | 41 +++++++++++++++++------------------------ include/linux/mxsfb.h | 42 ------------------------------------------ 2 files changed, 17 insertions(+), 66 deletions(-) delete mode 100644 include/linux/mxsfb.h (limited to 'include/linux') diff --git a/drivers/video/mxsfb.c b/drivers/video/mxsfb.c index e5ceba54d22f..eac7c1ace7a5 100644 --- a/drivers/video/mxsfb.c +++ b/drivers/video/mxsfb.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include