From cf1c4a094f46ace5bf00078e2db73491ddf018fe Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Tue, 19 Feb 2013 11:35:59 -0800 Subject: netfilter: ipset: timeout values corrupted on set resize If a resize is triggered on a set with timeouts enabled, the timeout values will get corrupted when copying them to the new set. This occured b/c the wrong timeout value is supplied to type_pf_elem_tadd(). This also adds simple debug statement similar to the one in type_pf_resize(). Signed-off-by: Josh Hunt Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_ahash.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h index ef9acd3c8450..01d25e6fc792 100644 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -854,6 +854,8 @@ type_pf_tresize(struct ip_set *set, bool retried) retry: ret = 0; htable_bits++; + pr_debug("attempt to resize set %s from %u to %u, t %p\n", + set->name, orig->htable_bits, htable_bits, orig); if (!htable_bits) { /* In case we have plenty of memory :-) */ pr_warning("Cannot increase the hashsize of set %s further\n", @@ -873,7 +875,7 @@ retry: data = ahash_tdata(n, j); m = hbucket(t, HKEY(data, h->initval, htable_bits)); ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0, - type_pf_data_timeout(data)); + ip_set_timeout_get(type_pf_data_timeout(data))); if (ret < 0) { read_unlock_bh(&set->lock); ahash_destroy(t); -- cgit From 4d4c4e24cf48400a24d33feffc7cca4f4e8cabe1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 22 Feb 2013 00:05:07 +0100 Subject: irq: Remove IRQ_EXIT_OFFSET workaround The IRQ_EXIT_OFFSET trick was used to make sure the irq doesn't get preempted after we substract the HARDIRQ_OFFSET until we are entirely done with any code in irq_exit(). This workaround was necessary because some archs may call irq_exit() with irqs enabled and there is still some code in the end of this function that is not covered by the HARDIRQ_OFFSET but want to stay non-preemptible. Now that irq are always disabled in irq_exit(), the whole code is guaranteed not to be preempted. We can thus remove this hack. Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Paul E. McKenney --- include/linux/hardirq.h | 2 -- kernel/softirq.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 29eb805ea4a6..c1d6555d2567 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -118,10 +118,8 @@ #ifdef CONFIG_PREEMPT_COUNT # define preemptible() (preempt_count() == 0 && !irqs_disabled()) -# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) #else # define preemptible() 0 -# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET #endif #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) diff --git a/kernel/softirq.c b/kernel/softirq.c index 24a921bcf04f..f42ff97e1f8f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -343,7 +343,7 @@ void irq_exit(void) account_irq_exit_time(current); trace_hardirq_exit(); - sub_preempt_count(IRQ_EXIT_OFFSET); + sub_preempt_count(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); @@ -353,7 +353,6 @@ void irq_exit(void) tick_nohz_irq_exit(); #endif rcu_irq_exit(); - sched_preempt_enable_no_resched(); #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED local_irq_restore(flags); #endif -- cgit From 46c498c2cdee5efe44f617bcd4f388179be36115 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Feb 2013 18:44:33 +0100 Subject: stop_machine: Mark per cpu stopper enabled early commit 14e568e78 (stop_machine: Use smpboot threads) introduced the following regression: Before this commit the stopper enabled bit was set in the online notifier. CPU0 CPU1 cpu_up cpu online hotplug_notifier(ONLINE) stopper(CPU1)->enabled = true; ... stop_machine() The conversion to smpboot threads moved the enablement to the wakeup path of the parked thread. The majority of users seem to have the following working order: CPU0 CPU1 cpu_up cpu online unpark_threads() wakeup(stopper[CPU1]) .... stopper thread runs stopper(CPU1)->enabled = true; stop_machine() But Konrad and Sander have observed: CPU0 CPU1 cpu_up cpu online unpark_threads() wakeup(stopper[CPU1]) .... stop_machine() stopper thread runs stopper(CPU1)->enabled = true; Now the stop machinery kicks CPU0 into the stop loop, where it gets stuck forever because the queue code saw stopper(CPU1)->enabled == false, so CPU0 waits for CPU1 to enter stomp_machine, but the CPU1 stopper work got discarded due to enabled == false. Add a pre_unpark function to the smpboot thread descriptor and call it before waking the thread. This fixes the problem at hand, but the stop_machine code should be more robust. The stopper->enabled flag smells fishy at best. Thanks to Konrad for going through a loop of debug patches and providing the information to decode this issue. Reported-and-tested-by: Konrad Rzeszutek Wilk Reported-and-tested-by: Sander Eikelenboom Cc: Srivatsa S. Bhat Cc: Rusty Russell Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1302261843240.22263@ionos Signed-off-by: Thomas Gleixner --- include/linux/smpboot.h | 4 ++++ kernel/smpboot.c | 2 ++ kernel/stop_machine.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index c65dee059913..13e929679550 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -24,6 +24,9 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) + * @pre_unpark: Optional unpark function, called before the thread is + * unparked (cpu online). This is not guaranteed to be + * called on the target cpu of the thread. Careful! * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ @@ -37,6 +40,7 @@ struct smp_hotplug_thread { void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); + void (*pre_unpark)(unsigned int cpu); bool selfparking; const char *thread_comm; }; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d4abac261779..8eaed9aa9cf0 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -209,6 +209,8 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp { struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + if (ht->pre_unpark) + ht->pre_unpark(cpu); kthread_unpark(tsk); } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 95d178c62d5a..c09f2955ae30 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -336,7 +336,7 @@ static struct smp_hotplug_thread cpu_stop_threads = { .create = cpu_stop_create, .setup = cpu_stop_unpark, .park = cpu_stop_park, - .unpark = cpu_stop_unpark, + .pre_unpark = cpu_stop_unpark, .selfparking = true, }; -- cgit From 79ffef1fe213851f44bfccf037170a140e929f85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Feb 2013 07:05:03 +0000 Subject: tcp: avoid wakeups for pure ACK TCP prequeue mechanism purpose is to let incoming packets being processed by the thread currently blocked in tcp_recvmsg(), instead of behalf of the softirq handler, to better adapt flow control on receiver host capacity to schedule the consumer. But in typical request/answer workloads, we send request, then block to receive the answer. And before the actual answer, TCP stack receives the ACK packets acknowledging the request. Processing pure ACK on behalf of the thread blocked in tcp_recvmsg() is a waste of resources, as thread has to immediately sleep again because it got no payload. This patch avoids the extra context switches and scheduler overhead. Before patch : a:~# echo 0 >/proc/sys/net/ipv4/tcp_low_latency a:~# perf stat ./super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k 231676 Performance counter stats for './super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k': 116251.501765 task-clock # 11.369 CPUs utilized 5,025,463 context-switches # 0.043 M/sec 1,074,511 CPU-migrations # 0.009 M/sec 216,923 page-faults # 0.002 M/sec 311,636,972,396 cycles # 2.681 GHz 260,507,138,069 stalled-cycles-frontend # 83.59% frontend cycles idle 155,590,092,840 stalled-cycles-backend # 49.93% backend cycles idle 100,101,255,411 instructions # 0.32 insns per cycle # 2.60 stalled cycles per insn 16,535,930,999 branches # 142.243 M/sec 646,483,591 branch-misses # 3.91% of all branches 10.225482774 seconds time elapsed After patch : a:~# echo 0 >/proc/sys/net/ipv4/tcp_low_latency a:~# perf stat ./super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k 233297 Performance counter stats for './super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k': 91084.870855 task-clock # 8.887 CPUs utilized 2,485,916 context-switches # 0.027 M/sec 815,520 CPU-migrations # 0.009 M/sec 216,932 page-faults # 0.002 M/sec 245,195,022,629 cycles # 2.692 GHz 202,635,777,041 stalled-cycles-frontend # 82.64% frontend cycles idle 124,280,372,407 stalled-cycles-backend # 50.69% backend cycles idle 83,457,289,618 instructions # 0.34 insns per cycle # 2.43 stalled cycles per insn 13,431,472,361 branches # 147.461 M/sec 504,470,665 branch-misses # 3.76% of all branches 10.249594448 seconds time elapsed Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Cc: Yuchung Cheng Cc: Andi Kleen Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 23f2e98d4b65..cf0694d4ad60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1045,6 +1045,10 @@ static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (sysctl_tcp_low_latency || !tp->ucopy.task) return false; + if (skb->len <= tcp_hdrlen(skb) && + skb_queue_len(&tp->ucopy.prequeue) == 0) + return false; + __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; if (tp->ucopy.memory > sk->sk_rcvbuf) { -- cgit From 5838b032fd69ae47565ddc50062decf9055e1628 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 28 Feb 2013 18:12:47 -0600 Subject: regulator: core: update kernel documentation for regulator_desc commit df367931 (regulator: core: Provide regmap get/set bypass operations) introduced regulator_[gs]et_bypass_regmap However structure documentation for regulator_desc needs an update. ./scripts/kernel-doc include/linux/regulator/driver.h >/dev/null generates: Warning(include/linux/regulator/driver.h:233): No description found for parameter 'bypass_reg' Warning(include/linux/regulator/driver.h:233): No description found for parameter 'bypass_mask' Cc: Liam Girdwood Cc: Mark Brown Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 23070fd83872..7df93f52db08 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -199,6 +199,8 @@ enum regulator_type { * output when using regulator_set_voltage_sel_regmap * @enable_reg: Register for control when using regmap enable/disable ops * @enable_mask: Mask for control when using regmap enable/disable ops + * @bypass_reg: Register for control when using regmap set_bypass + * @bypass_mask: Mask for control when using regmap set_bypass * * @enable_time: Time taken for initial enable of regulator (in uS). */ -- cgit From 7f78e0351394052e1a6293e175825eb5c7869507 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 2 Mar 2013 19:39:14 -0800 Subject: fs: Limit sys_mount to only request filesystem modules. Modify the request_module to prefix the file system type with "fs-" and add aliases to all of the filesystems that can be built as modules to match. A common practice is to build all of the kernel code and leave code that is not commonly needed as modules, with the result that many users are exposed to any bug anywhere in the kernel. Looking for filesystems with a fs- prefix limits the pool of possible modules that can be loaded by mount to just filesystems trivially making things safer with no real cost. Using aliases means user space can control the policy of which filesystem modules are auto-loaded by editing /etc/modprobe.d/*.conf with blacklist and alias directives. Allowing simple, safe, well understood work-arounds to known problematic software. This also addresses a rare but unfortunate problem where the filesystem name is not the same as it's module name and module auto-loading would not work. While writing this patch I saw a handful of such cases. The most significant being autofs that lives in the module autofs4. This is relevant to user namespaces because we can reach the request module in get_fs_type() without having any special permissions, and people get uncomfortable when a user specified string (in this case the filesystem type) goes all of the way to request_module. After having looked at this issue I don't think there is any particular reason to perform any filtering or permission checks beyond making it clear in the module request that we want a filesystem module. The common pattern in the kernel is to call request_module() without regards to the users permissions. In general all a filesystem module does once loaded is call register_filesystem() and go to sleep. Which means there is not much attack surface exposed by loading a filesytem module unless the filesystem is mounted. In a user namespace filesystems are not mounted unless .fs_flags = FS_USERNS_MOUNT, which most filesystems do not set today. Acked-by: Serge Hallyn Acked-by: Kees Cook Reported-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- arch/ia64/kernel/perfmon.c | 1 + arch/powerpc/platforms/cell/spufs/inode.c | 1 + arch/s390/hypfs/inode.c | 1 + drivers/firmware/efivars.c | 1 + drivers/infiniband/hw/ipath/ipath_fs.c | 1 + drivers/infiniband/hw/qib/qib_fs.c | 1 + drivers/misc/ibmasm/ibmasmfs.c | 1 + drivers/mtd/mtdchar.c | 1 + drivers/oprofile/oprofilefs.c | 1 + drivers/staging/ccg/f_fs.c | 1 + drivers/usb/gadget/f_fs.c | 1 + drivers/usb/gadget/inode.c | 1 + drivers/xen/xenfs/super.c | 1 + fs/9p/vfs_super.c | 1 + fs/adfs/super.c | 1 + fs/affs/super.c | 1 + fs/afs/super.c | 1 + fs/autofs4/init.c | 1 + fs/befs/linuxvfs.c | 1 + fs/bfs/inode.c | 1 + fs/binfmt_misc.c | 1 + fs/btrfs/super.c | 1 + fs/ceph/super.c | 1 + fs/coda/inode.c | 1 + fs/configfs/mount.c | 1 + fs/cramfs/inode.c | 1 + fs/debugfs/inode.c | 1 + fs/devpts/inode.c | 1 + fs/ecryptfs/main.c | 1 + fs/efs/super.c | 1 + fs/exofs/super.c | 1 + fs/ext2/super.c | 1 + fs/ext3/super.c | 1 + fs/ext4/super.c | 5 +++-- fs/f2fs/super.c | 1 + fs/fat/namei_msdos.c | 1 + fs/fat/namei_vfat.c | 1 + fs/filesystems.c | 2 +- fs/freevxfs/vxfs_super.c | 2 +- fs/fuse/control.c | 1 + fs/fuse/inode.c | 2 ++ fs/gfs2/ops_fstype.c | 4 +++- fs/hfs/super.c | 1 + fs/hfsplus/super.c | 1 + fs/hppfs/hppfs.c | 1 + fs/hugetlbfs/inode.c | 1 + fs/isofs/inode.c | 3 +-- fs/jffs2/super.c | 1 + fs/jfs/super.c | 1 + fs/logfs/super.c | 1 + fs/minix/inode.c | 1 + fs/ncpfs/inode.c | 1 + fs/nfs/super.c | 3 ++- fs/nfsd/nfsctl.c | 1 + fs/nilfs2/super.c | 1 + fs/ntfs/super.c | 1 + fs/ocfs2/dlmfs/dlmfs.c | 1 + fs/omfs/inode.c | 1 + fs/openpromfs/inode.c | 1 + fs/qnx4/inode.c | 1 + fs/qnx6/inode.c | 1 + fs/reiserfs/super.c | 1 + fs/romfs/super.c | 1 + fs/sysv/super.c | 3 ++- fs/ubifs/super.c | 1 + fs/ufs/super.c | 1 + fs/xfs/xfs_super.c | 1 + include/linux/fs.h | 2 ++ net/sunrpc/rpc_pipe.c | 4 +--- 69 files changed, 77 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 433f5e8a2cd1..2eda28414abb 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -619,6 +619,7 @@ static struct file_system_type pfm_fs_type = { .mount = pfmfs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("pfmfs"); DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 863184b182f4..3f3bb4cdbbec 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -749,6 +749,7 @@ static struct file_system_type spufs_type = { .mount = spufs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("spufs"); static int __init spufs_init(void) { diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 8538015ed4a0..5f7d7ba2874c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,6 +456,7 @@ static struct file_system_type hypfs_type = { .mount = hypfs_mount, .kill_sb = hypfs_kill_super }; +MODULE_ALIAS_FS("s390_hypfs"); static const struct super_operations hypfs_s_ops = { .statfs = simple_statfs, diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 7320bf891706..3edade07b249 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -1234,6 +1234,7 @@ static struct file_system_type efivarfs_type = { .mount = efivarfs_mount, .kill_sb = efivarfs_kill_sb, }; +MODULE_ALIAS_FS("efivarfs"); /* * Handle negative dentry. diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a479375a8fd8..e0c404bdc4a8 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -410,6 +410,7 @@ static struct file_system_type ipathfs_fs_type = { .mount = ipathfs_mount, .kill_sb = ipathfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init ipath_init_ipathfs(void) { diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 644bd6f6467c..f247fc6e6182 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -604,6 +604,7 @@ static struct file_system_type qibfs_fs_type = { .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init qib_init_qibfs(void) { diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 6673e578b3e9..ce5b75616b45 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -110,6 +110,7 @@ static struct file_system_type ibmasmfs_type = { .mount = ibmasmfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("ibmasmfs"); static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent) { diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 82c06165d3d2..92ab30ab00dc 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1238,6 +1238,7 @@ static struct file_system_type mtd_inodefs_type = { .mount = mtd_inodefs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("mtd_inodefs"); static int __init init_mtdchar(void) { diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 445ffda715ad..7c12d9c2b230 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -276,6 +276,7 @@ static struct file_system_type oprofilefs_type = { .mount = oprofilefs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("oprofilefs"); int __init oprofilefs_register(void) diff --git a/drivers/staging/ccg/f_fs.c b/drivers/staging/ccg/f_fs.c index 8adc79d1b402..f6373dade7fb 100644 --- a/drivers/staging/ccg/f_fs.c +++ b/drivers/staging/ccg/f_fs.c @@ -1223,6 +1223,7 @@ static struct file_system_type ffs_fs_type = { .mount = ffs_fs_mount, .kill_sb = ffs_fs_kill_sb, }; +MODULE_ALIAS_FS("functionfs"); /* Driver's main init/cleanup functions *************************************/ diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index 38388d7844fc..c377ff84bf2c 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -1235,6 +1235,7 @@ static struct file_system_type ffs_fs_type = { .mount = ffs_fs_mount, .kill_sb = ffs_fs_kill_sb, }; +MODULE_ALIAS_FS("functionfs"); /* Driver's main init/cleanup functions *************************************/ diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 8ac840f25ba9..e2b2e9cf254a 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -2105,6 +2105,7 @@ static struct file_system_type gadgetfs_type = { .mount = gadgetfs_mount, .kill_sb = gadgetfs_kill_sb, }; +MODULE_ALIAS_FS("gadgetfs"); /*----------------------------------------------------------------------*/ diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index ec0abb6df3c3..71679875f056 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -75,6 +75,7 @@ static struct file_system_type xenfs_type = { .mount = xenfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("xenfs"); static int __init xenfs_init(void) { diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 91dad63e5a2d..2756dcd5de6e 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -365,3 +365,4 @@ struct file_system_type v9fs_fs_type = { .owner = THIS_MODULE, .fs_flags = FS_RENAME_DOES_D_MOVE, }; +MODULE_ALIAS_FS("9p"); diff --git a/fs/adfs/super.c b/fs/adfs/super.c index d57122935793..0ff4bae2c2a2 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -524,6 +524,7 @@ static struct file_system_type adfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("adfs"); static int __init init_adfs_fs(void) { diff --git a/fs/affs/super.c b/fs/affs/super.c index b84dc7352502..45161a832bbc 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -622,6 +622,7 @@ static struct file_system_type affs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("affs"); static int __init init_affs_fs(void) { diff --git a/fs/afs/super.c b/fs/afs/super.c index 7c31ec399575..c4861557e385 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -45,6 +45,7 @@ struct file_system_type afs_fs_type = { .kill_sb = afs_kill_super, .fs_flags = 0, }; +MODULE_ALIAS_FS("afs"); static const struct super_operations afs_super_ops = { .statfs = afs_statfs, diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index cddc74b9cdb2..b3db517e89ec 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -26,6 +26,7 @@ static struct file_system_type autofs_fs_type = { .mount = autofs_mount, .kill_sb = autofs4_kill_sb, }; +MODULE_ALIAS_FS("autofs"); static int __init init_autofs4_fs(void) { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index c8f4e25eb9e2..8615ee89ab55 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -951,6 +951,7 @@ static struct file_system_type befs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("befs"); static int __init init_befs_fs(void) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 737aaa3f7090..5e376bb93419 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -473,6 +473,7 @@ static struct file_system_type bfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("bfs"); static int __init init_bfs_fs(void) { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index fecbbf3f8ff2..751df5e4f61a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -720,6 +720,7 @@ static struct file_system_type bm_fs_type = { .mount = bm_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("binfmt_misc"); static int __init init_misc_binfmt(void) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 68a29a1ea068..f6b88595f858 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1558,6 +1558,7 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("btrfs"); /* * used by btrfsctl to scan devices when no FS is mounted diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9fe17c6c2876..6ddc0bca56b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -952,6 +952,7 @@ static struct file_system_type ceph_fs_type = { .kill_sb = ceph_kill_sb, .fs_flags = FS_RENAME_DOES_D_MOVE, }; +MODULE_ALIAS_FS("ceph"); #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) diff --git a/fs/coda/inode.c b/fs/coda/inode.c index dada9d0abede..4dcc0d81a7aa 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -329,4 +329,5 @@ struct file_system_type coda_fs_type = { .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("coda"); diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index aee0a7ebbd8e..7f26c3cf75ae 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -114,6 +114,7 @@ static struct file_system_type configfs_fs_type = { .mount = configfs_do_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("configfs"); struct dentry *configfs_pin_fs(void) { diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 3ceb9ec976e1..35b1c7bd18b7 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -573,6 +573,7 @@ static struct file_system_type cramfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("cramfs"); static int __init init_cramfs_fs(void) { diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 0c4f80b447fb..4888cb3fdef7 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -299,6 +299,7 @@ static struct file_system_type debug_fs_type = { .mount = debug_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("debugfs"); static struct dentry *__create_file(const char *name, umode_t mode, struct dentry *parent, void *data, diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 073d30b9d1ac..79b662985efe 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -510,6 +510,7 @@ static struct file_system_type devpts_fs_type = { .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, #endif }; +MODULE_ALIAS_FS("devpts"); /* * The normal naming convention is simply /dev/pts/; this conforms diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 4e0886c9e5c4..e924cf45aad9 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -629,6 +629,7 @@ static struct file_system_type ecryptfs_fs_type = { .kill_sb = ecryptfs_kill_block_super, .fs_flags = 0 }; +MODULE_ALIAS_FS("ecryptfs"); /** * inode_info_init_once diff --git a/fs/efs/super.c b/fs/efs/super.c index 2002431ef9a0..c6f57a74a559 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -33,6 +33,7 @@ static struct file_system_type efs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("efs"); static struct pt_types sgi_pt_types[] = { {0x00, "SGI vh"}, diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 5e59280d42d7..9d9763328734 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -1010,6 +1010,7 @@ static struct file_system_type exofs_type = { .mount = exofs_mount, .kill_sb = generic_shutdown_super, }; +MODULE_ALIAS_FS("exofs"); static int __init init_exofs(void) { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7f68c8114026..288534920fe5 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1536,6 +1536,7 @@ static struct file_system_type ext2_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext2"); static int __init init_ext2_fs(void) { diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5546ca225ffe..1d6e2ed85322 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -3068,6 +3068,7 @@ static struct file_system_type ext3_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext3"); static int __init init_ext3_fs(void) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5e6c87836193..34e855219231 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -90,6 +90,7 @@ static struct file_system_type ext2_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext2"); #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) #else #define IS_EXT2_SB(sb) (0) @@ -104,6 +105,7 @@ static struct file_system_type ext3_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) #else #define IS_EXT3_SB(sb) (0) @@ -5152,7 +5154,6 @@ static inline int ext2_feature_set_ok(struct super_block *sb) return 0; return 1; } -MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } @@ -5185,7 +5186,6 @@ static inline int ext3_feature_set_ok(struct super_block *sb) return 0; return 1; } -MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } @@ -5199,6 +5199,7 @@ static struct file_system_type ext4_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext4"); static int __init ext4_init_feat_adverts(void) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c117649a035..fea6e582a2ed 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -687,6 +687,7 @@ static struct file_system_type f2fs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("f2fs"); static int __init init_inodecache(void) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e2cfda94a28d..081b759cff83 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -668,6 +668,7 @@ static struct file_system_type msdos_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("msdos"); static int __init init_msdos_fs(void) { diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index ac959d655e7d..2da952036a3d 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1073,6 +1073,7 @@ static struct file_system_type vfat_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("vfat"); static int __init init_vfat_fs(void) { diff --git a/fs/filesystems.c b/fs/filesystems.c index da165f6adcbf..92567d95ba6a 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -273,7 +273,7 @@ struct file_system_type *get_fs_type(const char *name) int len = dot ? dot - name : strlen(name); fs = __get_fs_type(name, len); - if (!fs && (request_module("%.*s", len, name) == 0)) + if (!fs && (request_module("fs-%.*s", len, name) == 0)) fs = __get_fs_type(name, len); if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index fed2c8afb3a9..455074308069 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -52,7 +52,6 @@ MODULE_AUTHOR("Christoph Hellwig"); MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */ static void vxfs_put_super(struct super_block *); @@ -258,6 +257,7 @@ static struct file_system_type vxfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("vxfs"); /* makes mount -t vxfs autoload the module */ static int __init vxfs_init(void) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index b7978b9f75ef..a0b0855d00a9 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -341,6 +341,7 @@ static struct file_system_type fuse_ctl_fs_type = { .mount = fuse_ctl_mount, .kill_sb = fuse_ctl_kill_sb, }; +MODULE_ALIAS_FS("fusectl"); int __init fuse_ctl_init(void) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index df00993ed108..137185c3884f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1117,6 +1117,7 @@ static struct file_system_type fuse_fs_type = { .mount = fuse_mount, .kill_sb = fuse_kill_sb_anon, }; +MODULE_ALIAS_FS("fuse"); #ifdef CONFIG_BLOCK static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, @@ -1146,6 +1147,7 @@ static struct file_system_type fuseblk_fs_type = { .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; +MODULE_ALIAS_FS("fuseblk"); static inline int register_fuseblk(void) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1b612be4b873..60ede2a0f43f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1425,6 +1426,7 @@ struct file_system_type gfs2_fs_type = { .kill_sb = gfs2_kill_sb, .owner = THIS_MODULE, }; +MODULE_ALIAS_FS("gfs2"); struct file_system_type gfs2meta_fs_type = { .name = "gfs2meta", @@ -1432,4 +1434,4 @@ struct file_system_type gfs2meta_fs_type = { .mount = gfs2_mount_meta, .owner = THIS_MODULE, }; - +MODULE_ALIAS_FS("gfs2meta"); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index e93ddaadfd1e..bbaaa8a4ee64 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -466,6 +466,7 @@ static struct file_system_type hfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("hfs"); static void hfs_init_once(void *p) { diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 974c26f96fae..7b87284e46dc 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -654,6 +654,7 @@ static struct file_system_type hfsplus_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("hfsplus"); static void hfsplus_init_once(void *p) { diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 74f55703be49..126d3c2e2dee 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -748,6 +748,7 @@ static struct file_system_type hppfs_type = { .kill_sb = kill_anon_super, .fs_flags = 0, }; +MODULE_ALIAS_FS("hppfs"); static int __init init_hppfs(void) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7f94e0cbc69c..84e3d856e91d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -896,6 +896,7 @@ static struct file_system_type hugetlbfs_fs_type = { .mount = hugetlbfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("hugetlbfs"); static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 67ce52507d7d..a67f16e846a2 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1556,6 +1556,7 @@ static struct file_system_type iso9660_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("iso9660"); static int __init init_iso9660_fs(void) { @@ -1593,5 +1594,3 @@ static void __exit exit_iso9660_fs(void) module_init(init_iso9660_fs) module_exit(exit_iso9660_fs) MODULE_LICENSE("GPL"); -/* Actual filesystem name is iso9660, as requested in filesystems.c */ -MODULE_ALIAS("iso9660"); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d3d8799e2187..0defb1cc2a35 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -356,6 +356,7 @@ static struct file_system_type jffs2_fs_type = { .mount = jffs2_mount, .kill_sb = jffs2_kill_sb, }; +MODULE_ALIAS_FS("jffs2"); static int __init init_jffs2_fs(void) { diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 060ba638becb..2003e830ed1c 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -833,6 +833,7 @@ static struct file_system_type jfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("jfs"); static void init_once(void *foo) { diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 345c24b8a6f8..54360293bcb5 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -608,6 +608,7 @@ static struct file_system_type logfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("logfs"); static int __init logfs_init(void) { diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 99541cceb584..df122496f328 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -660,6 +660,7 @@ static struct file_system_type minix_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("minix"); static int __init init_minix_fs(void) { diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 7dafd6899a62..26910c8154da 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -1051,6 +1051,7 @@ static struct file_system_type ncp_fs_type = { .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("ncpfs"); static int __init init_ncp_fs(void) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 17b32b722457..95cdcb208dfb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -294,6 +294,7 @@ struct file_system_type nfs_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { @@ -333,6 +334,7 @@ struct file_system_type nfs4_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs4"); EXPORT_SYMBOL_GPL(nfs4_fs_type); static int __init register_nfs4_fs(void) @@ -2717,6 +2719,5 @@ module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); -MODULE_ALIAS("nfs4"); #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 13a21c8fca49..f33455b4d957 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1090,6 +1090,7 @@ static struct file_system_type nfsd_fs_type = { .mount = nfsd_mount, .kill_sb = nfsd_umount, }; +MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3c991dc84f2f..c7d1f9f18b09 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1361,6 +1361,7 @@ struct file_system_type nilfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("nilfs2"); static void nilfs_inode_init_once(void *obj) { diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 4a8289f8b16c..82650d52d916 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3079,6 +3079,7 @@ static struct file_system_type ntfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ntfs"); /* Stable names for the slab caches. */ static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache"; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 4c5fc8d77dc2..12bafb7265ce 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -640,6 +640,7 @@ static struct file_system_type dlmfs_fs_type = { .mount = dlmfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("ocfs2_dlmfs"); static int __init init_dlmfs_fs(void) { diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 25d715c7c87a..d8b0afde2179 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -572,6 +572,7 @@ static struct file_system_type omfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("omfs"); static int __init init_omfs_fs(void) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index ae47fa7efb9d..75885ffde44e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -432,6 +432,7 @@ static struct file_system_type openprom_fs_type = { .mount = openprom_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("openpromfs"); static void op_inode_init_once(void *data) { diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 43098bb5723a..2e8caa62da78 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -412,6 +412,7 @@ static struct file_system_type qnx4_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("qnx4"); static int __init init_qnx4_fs(void) { diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 57199a52a351..8d941edfefa1 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -672,6 +672,7 @@ static struct file_system_type qnx6_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("qnx6"); static int __init init_qnx6_fs(void) { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 418bdc3a57da..194113b1b11b 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2434,6 +2434,7 @@ struct file_system_type reiserfs_fs_type = { .kill_sb = reiserfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("reiserfs"); MODULE_DESCRIPTION("ReiserFS journaled filesystem"); MODULE_AUTHOR("Hans Reiser "); diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 7e8d3a80bdab..15cbc41ee365 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -599,6 +599,7 @@ static struct file_system_type romfs_fs_type = { .kill_sb = romfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("romfs"); /* * inode storage initialiser diff --git a/fs/sysv/super.c b/fs/sysv/super.c index a38e87bdd78d..a39938b1feea 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -545,6 +545,7 @@ static struct file_system_type sysv_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("sysv"); static struct file_system_type v7_fs_type = { .owner = THIS_MODULE, @@ -553,6 +554,7 @@ static struct file_system_type v7_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("v7"); static int __init init_sysv_fs(void) { @@ -586,5 +588,4 @@ static void __exit exit_sysv_fs(void) module_init(init_sysv_fs) module_exit(exit_sysv_fs) -MODULE_ALIAS("v7"); MODULE_LICENSE("GPL"); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ddc0f6ae65e9..ac838b844936 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2174,6 +2174,7 @@ static struct file_system_type ubifs_fs_type = { .mount = ubifs_mount, .kill_sb = kill_ubifs_super, }; +MODULE_ALIAS_FS("ubifs"); /* * Inode slab cache constructor. diff --git a/fs/ufs/super.c b/fs/ufs/super.c index dc8e3a861d0f..329f2f53b7ed 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1500,6 +1500,7 @@ static struct file_system_type ufs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ufs"); static int __init init_ufs_fs(void) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c407121873b4..ea341cea68cb 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1561,6 +1561,7 @@ static struct file_system_type xfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_zones(void) diff --git a/include/linux/fs.h b/include/linux/fs.h index 74a907b8b950..2c28271ab9d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,6 +1825,8 @@ struct file_system_type { struct lock_class_key i_mutex_dir_key; }; +#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) + extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7b9b40224a27..a0f48a51e14e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1174,6 +1174,7 @@ static struct file_system_type rpc_pipe_fs_type = { .mount = rpc_mount, .kill_sb = rpc_kill_sb, }; +MODULE_ALIAS_FS("rpc_pipefs"); static void init_once(void *foo) @@ -1218,6 +1219,3 @@ void unregister_rpc_pipefs(void) kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } - -/* Make 'mount -t rpc_pipefs ...' autoload this module. */ -MODULE_ALIAS("rpc_pipefs"); -- cgit From 290502bee239062499297916bb7d21d205e99d62 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Feb 2013 00:39:37 -0800 Subject: eCryptfs: allow userspace messaging to be disabled When the userspace messaging (for the less common case of userspace key wrap/unwrap via ecryptfsd) is not needed, allow eCryptfs to build with it removed. This saves on kernel code size and reduces potential attack surface by removing the /dev/ecryptfs node. Signed-off-by: Kees Cook Signed-off-by: Tyler Hicks --- fs/ecryptfs/Kconfig | 8 ++++++++ fs/ecryptfs/Makefile | 7 +++++-- fs/ecryptfs/ecryptfs_kernel.h | 40 ++++++++++++++++++++++++++++++++++++++-- fs/ecryptfs/keystore.c | 4 ++-- include/linux/ecryptfs.h | 12 ++---------- 5 files changed, 55 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index cc16562654de..1f63120b669a 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig @@ -12,3 +12,11 @@ config ECRYPT_FS To compile this file system support as a module, choose M here: the module will be called ecryptfs. + +config ECRYPT_FS_MESSAGING + bool "Enable notifications for userspace key wrap/unwrap" + depends on ECRYPT_FS + help + Enables the /dev/ecryptfs entry for use by ecryptfsd. This allows + for userspace to wrap/unwrap file encryption keys by other + backends, like OpenSSL. diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 2cc9ee4ad2eb..49678a69947d 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -1,7 +1,10 @@ # -# Makefile for the Linux 2.6 eCryptfs +# Makefile for the Linux eCryptfs # obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o +ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \ + crypto.o keystore.o kthread.o debug.o + +ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index cfb4b9fed520..a9df69efadc2 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -172,6 +172,19 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24 #define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32) +#ifdef CONFIG_ECRYPT_FS_MESSAGING +# define ECRYPTFS_VERSIONING_MASK_MESSAGING (ECRYPTFS_VERSIONING_DEVMISC \ + | ECRYPTFS_VERSIONING_PUBKEY) +#else +# define ECRYPTFS_VERSIONING_MASK_MESSAGING 0 +#endif + +#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ + | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ + | ECRYPTFS_VERSIONING_XATTR \ + | ECRYPTFS_VERSIONING_MULTKEY \ + | ECRYPTFS_VERSIONING_MASK_MESSAGING \ + | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) struct ecryptfs_key_sig { struct list_head crypt_stat_list; char keysig[ECRYPTFS_SIG_SIZE_HEX + 1]; @@ -399,7 +412,9 @@ struct ecryptfs_daemon { struct hlist_node euid_chain; }; +#ifdef CONFIG_ECRYPT_FS_MESSAGING extern struct mutex ecryptfs_daemon_hash_mux; +#endif static inline size_t ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) @@ -604,6 +619,7 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); +#ifdef CONFIG_ECRYPT_FS_MESSAGING int ecryptfs_process_response(struct ecryptfs_daemon *daemon, struct ecryptfs_message *msg, u32 seq); int ecryptfs_send_message(char *data, int data_len, @@ -612,6 +628,24 @@ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, struct ecryptfs_message **emsg); int ecryptfs_init_messaging(void); void ecryptfs_release_messaging(void); +#else +static inline int ecryptfs_init_messaging(void) +{ + return 0; +} +static inline void ecryptfs_release_messaging(void) +{ } +static inline int ecryptfs_send_message(char *data, int data_len, + struct ecryptfs_msg_ctx **msg_ctx) +{ + return -ENOTCONN; +} +static inline int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, + struct ecryptfs_message **emsg) +{ + return -ENOMSG; +} +#endif void ecryptfs_write_header_metadata(char *virt, @@ -649,12 +683,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); -int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, size_t *length_size); int ecryptfs_write_packet_length(char *dest, size_t size, size_t *packet_size_length); +#ifdef CONFIG_ECRYPT_FS_MESSAGING int ecryptfs_init_ecryptfs_miscdev(void); void ecryptfs_destroy_ecryptfs_miscdev(void); int ecryptfs_send_miscdev(char *data, size_t data_size, @@ -663,6 +696,9 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); int ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file); +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); +#endif int ecryptfs_init_kthread(void); void ecryptfs_destroy_kthread(void); int ecryptfs_privileged_open(struct file **lower_file, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 5aceff202dc0..7d52806c2119 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1168,7 +1168,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { ecryptfs_printk(KERN_ERR, "Error sending message to " - "ecryptfsd\n"); + "ecryptfsd: %d\n", rc); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); @@ -1988,7 +1988,7 @@ pki_encrypt_session_key(struct key *auth_tok_key, rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { ecryptfs_printk(KERN_ERR, "Error sending message to " - "ecryptfsd\n"); + "ecryptfsd: %d\n", rc); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h index 2224a8c0cb64..8d5ab998a222 100644 --- a/include/linux/ecryptfs.h +++ b/include/linux/ecryptfs.h @@ -6,9 +6,8 @@ #define ECRYPTFS_VERSION_MINOR 0x04 #define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 /* These flags indicate which features are supported by the kernel - * module; userspace tools such as the mount helper read - * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine - * how to behave. */ + * module; userspace tools such as the mount helper read the feature + * bits from a sysfs handle in order to determine how to behave. */ #define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 #define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 @@ -19,13 +18,6 @@ #define ECRYPTFS_VERSIONING_HMAC 0x00000080 #define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 #define ECRYPTFS_VERSIONING_GCM 0x00000200 -#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ - | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ - | ECRYPTFS_VERSIONING_PUBKEY \ - | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY \ - | ECRYPTFS_VERSIONING_DEVMISC \ - | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 -- cgit From 53540098b23c3884b4a0b4f220b9d977bc496af3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2013 22:35:20 +0100 Subject: ACPI / glue: Add .match() callback to struct acpi_bus_type USB uses the .find_bridge() callback from struct acpi_bus_type incorrectly, because as a result of the way it is used by USB every device in the system that doesn't have a bus type or parent is passed to usb_acpi_find_device() for inspection. What USB actually needs, though, is to call usb_acpi_find_device() for USB ports that don't have a bus type defined, but have usb_port_device_type as their device type, as well as for USB devices. To fix that replace the struct bus_type pointer in struct acpi_bus_type used for matching devices to specific subsystems with a .match() callback to be used for this purpose and update the users of struct acpi_bus_type, including USB, accordingly. Define the .match() callback routine for USB, usb_acpi_bus_match(), in such a way that it will cover both USB devices and USB ports and remove the now redundant .find_bridge() callback pointer from usb_acpi_bus. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Yinghai Lu Acked-by: Jeff Garzik --- drivers/acpi/glue.c | 39 +++++++++++++-------------------------- drivers/ata/libata-acpi.c | 1 + drivers/pci/pci-acpi.c | 8 +++++++- drivers/pnp/pnpacpi/core.c | 8 +++++++- drivers/scsi/scsi_lib.c | 7 ++++++- drivers/usb/core/usb-acpi.c | 9 +++++++-- include/acpi/acpi_bus.h | 3 ++- 7 files changed, 43 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index ef6f155469b5..b94d14721af3 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -36,12 +36,11 @@ int register_acpi_bus_type(struct acpi_bus_type *type) { if (acpi_disabled) return -ENODEV; - if (type && type->bus && type->find_device) { + if (type && type->match && type->find_device) { down_write(&bus_type_sem); list_add_tail(&type->list, &bus_type_list); up_write(&bus_type_sem); - printk(KERN_INFO PREFIX "bus type %s registered\n", - type->bus->name); + printk(KERN_INFO PREFIX "bus type %s registered\n", type->name); return 0; } return -ENODEV; @@ -56,24 +55,21 @@ int unregister_acpi_bus_type(struct acpi_bus_type *type) down_write(&bus_type_sem); list_del_init(&type->list); up_write(&bus_type_sem); - printk(KERN_INFO PREFIX "ACPI bus type %s unregistered\n", - type->bus->name); + printk(KERN_INFO PREFIX "bus type %s unregistered\n", + type->name); return 0; } return -ENODEV; } EXPORT_SYMBOL_GPL(unregister_acpi_bus_type); -static struct acpi_bus_type *acpi_get_bus_type(struct bus_type *type) +static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) { struct acpi_bus_type *tmp, *ret = NULL; - if (!type) - return NULL; - down_read(&bus_type_sem); list_for_each_entry(tmp, &bus_type_list, list) { - if (tmp->bus == type) { + if (tmp->match(dev)) { ret = tmp; break; } @@ -261,26 +257,17 @@ err: static int acpi_platform_notify(struct device *dev) { - struct acpi_bus_type *type; + struct acpi_bus_type *type = acpi_get_bus_type(dev); acpi_handle handle; int ret; ret = acpi_bind_one(dev, NULL); - if (ret && (!dev->bus || !dev->parent)) { - /* bridge devices genernally haven't bus or parent */ - ret = acpi_find_bridge_device(dev, &handle); - if (!ret) { - ret = acpi_bind_one(dev, handle); - if (ret) - goto out; - } - } - - type = acpi_get_bus_type(dev->bus); if (ret) { - if (!type || !type->find_device) { - DBG("No ACPI bus support for %s\n", dev_name(dev)); - ret = -EINVAL; + if (!type) { + ret = acpi_find_bridge_device(dev, &handle); + if (!ret) + ret = acpi_bind_one(dev, handle); + goto out; } @@ -316,7 +303,7 @@ static int acpi_platform_notify_remove(struct device *dev) { struct acpi_bus_type *type; - type = acpi_get_bus_type(dev->bus); + type = acpi_get_bus_type(dev); if (type && type->cleanup) type->cleanup(dev); diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 0ea1018280bd..c832a5ca09ad 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -1150,6 +1150,7 @@ static int ata_acpi_find_dummy(struct device *dev, acpi_handle *handle) } static struct acpi_bus_type ata_acpi_bus = { + .name = "ATA", .find_bridge = ata_acpi_find_dummy, .find_device = ata_acpi_find_device, }; diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 39c937f9b426..dee5dddaa292 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -331,8 +331,14 @@ static void pci_acpi_cleanup(struct device *dev) } } +static bool pci_acpi_bus_match(struct device *dev) +{ + return dev->bus == &pci_bus_type; +} + static struct acpi_bus_type acpi_pci_bus = { - .bus = &pci_bus_type, + .name = "PCI", + .match = pci_acpi_bus_match, .find_device = acpi_pci_find_device, .setup = pci_acpi_setup, .cleanup = pci_acpi_cleanup, diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 8813fc03aa09..55cd459a3908 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -353,8 +353,14 @@ static int __init acpi_pnp_find_device(struct device *dev, acpi_handle * handle) /* complete initialization of a PNPACPI device includes having * pnpdev->dev.archdata.acpi_handle point to its ACPI sibling. */ +static bool acpi_pnp_bus_match(struct device *dev) +{ + return dev->bus == &pnp_bus_type; +} + static struct acpi_bus_type __initdata acpi_pnp_bus = { - .bus = &pnp_bus_type, + .name = "PNP", + .match = acpi_pnp_bus_match, .find_device = acpi_pnp_find_device, }; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 765398c063c7..c31187d79343 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -71,9 +71,14 @@ struct kmem_cache *scsi_sdb_cache; #ifdef CONFIG_ACPI #include +static bool acpi_scsi_bus_match(struct device *dev) +{ + return dev->bus == &scsi_bus_type; +} + int scsi_register_acpi_bus_type(struct acpi_bus_type *bus) { - bus->bus = &scsi_bus_type; + bus->match = acpi_scsi_bus_match; return register_acpi_bus_type(bus); } EXPORT_SYMBOL_GPL(scsi_register_acpi_bus_type); diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index cef4252bb31a..b6f4bad3f756 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -210,9 +210,14 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle) return 0; } +static bool usb_acpi_bus_match(struct device *dev) +{ + return is_usb_device(dev) || is_usb_port(dev); +} + static struct acpi_bus_type usb_acpi_bus = { - .bus = &usb_bus_type, - .find_bridge = usb_acpi_find_device, + .name = "USB", + .match = usb_acpi_bus_match, .find_device = usb_acpi_find_device, }; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e65278f560c4..c751d7de3a5f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -437,7 +437,8 @@ void acpi_remove_dir(struct acpi_device *); */ struct acpi_bus_type { struct list_head list; - struct bus_type *bus; + const char *name; + bool (*match)(struct device *dev); /* For general devices under the bus */ int (*find_device) (struct device *, acpi_handle *); /* For bridges, such as PCI root bridge, IDE controller */ -- cgit From 924144818cf0edc5d9d70d3a44e7cbbf4544796c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2013 22:35:44 +0100 Subject: ACPI / glue: Drop .find_bridge() callback from struct acpi_bus_type After PCI and USB have stopped using the .find_bridge() callback in struct acpi_bus_type, the only remaining user of it is SATA, but SATA only pretends to be a user, because it points that callback to a stub always returning -ENODEV. For this reason, drop the SATA's dummy .find_bridge() callback and remove .find_bridge(), which is not used any more, from struct acpi_bus_type entirely. Signed-off-by: Rafael J. Wysocki Acked-by: Yinghai Lu Acked-by: Jeff Garzik --- drivers/acpi/glue.c | 26 +------------------------- drivers/ata/libata-acpi.c | 6 ------ include/acpi/acpi_bus.h | 3 --- 3 files changed, 1 insertion(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index b94d14721af3..40a84cc6740c 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -78,22 +78,6 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) return ret; } -static int acpi_find_bridge_device(struct device *dev, acpi_handle * handle) -{ - struct acpi_bus_type *tmp; - int ret = -ENODEV; - - down_read(&bus_type_sem); - list_for_each_entry(tmp, &bus_type_list, list) { - if (tmp->find_bridge && !tmp->find_bridge(dev, handle)) { - ret = 0; - break; - } - } - up_read(&bus_type_sem); - return ret; -} - static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used, void *addr_p, void **ret_p) { @@ -262,15 +246,7 @@ static int acpi_platform_notify(struct device *dev) int ret; ret = acpi_bind_one(dev, NULL); - if (ret) { - if (!type) { - ret = acpi_find_bridge_device(dev, &handle); - if (!ret) - ret = acpi_bind_one(dev, handle); - - goto out; - } - + if (ret && type) { ret = type->find_device(dev, &handle); if (ret) { DBG("Unable to get handle for %s\n", dev_name(dev)); diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index c832a5ca09ad..beea3115577e 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -1144,14 +1144,8 @@ static int ata_acpi_find_device(struct device *dev, acpi_handle *handle) return -ENODEV; } -static int ata_acpi_find_dummy(struct device *dev, acpi_handle *handle) -{ - return -ENODEV; -} - static struct acpi_bus_type ata_acpi_bus = { .name = "ATA", - .find_bridge = ata_acpi_find_dummy, .find_device = ata_acpi_find_device, }; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c751d7de3a5f..22ba56e834e2 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -439,10 +439,7 @@ struct acpi_bus_type { struct list_head list; const char *name; bool (*match)(struct device *dev); - /* For general devices under the bus */ int (*find_device) (struct device *, acpi_handle *); - /* For bridges, such as PCI root bridge, IDE controller */ - int (*find_bridge) (struct device *, acpi_handle *); void (*setup)(struct device *); void (*cleanup)(struct device *); }; -- cgit From 13bcf01b33e6e19a7fe7ff396f9ed02803e225ec Mon Sep 17 00:00:00 2001 From: Christopher Harvey Date: Thu, 7 Mar 2013 10:42:25 -0500 Subject: drm: Documentation typo fixes Signed-off-by: Christopher Harvey Signed-off-by: Dave Airlie --- include/drm/drm_crtc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 8839b3a24660..e3e0d651c6ca 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -443,12 +443,12 @@ struct drm_crtc { * @dpms: set power state (see drm_crtc_funcs above) * @save: save connector state * @restore: restore connector state - * @reset: reset connector after state has been invalidate (e.g. resume) + * @reset: reset connector after state has been invalidated (e.g. resume) * @detect: is this connector active? * @fill_modes: fill mode list for this connector - * @set_property: property for this connector may need update + * @set_property: property for this connector may need an update * @destroy: make object go away - * @force: notify the driver the connector is forced on + * @force: notify the driver that the connector is forced on * * Each CRTC may have one or more connectors attached to it. The functions * below allow the core DRM code to control connectors, enumerate available modes, -- cgit From 22dfab7fd7fd5a8a2c5556ca0a8fd35fc959abc8 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 7 Mar 2013 19:43:33 -0800 Subject: Input: atmel_mxt_ts - Support for touchpad variant This same driver can be used by atmel based touchscreens and touchpads (buttonpads). Platform data may specify a device is a touchpad using the is_tp flag. This will cause the driver to perform some touchpad specific initializations, such as: * register input device name "Atmel maXTouch Touchpad" instead of Touchscreen. * register BTN_LEFT & BTN_TOOL_* event types. * register axis resolution (as a fixed constant, for now) * register BUTTONPAD property * process GPIO buttons using reportid T19 Input event GPIO mapping is done by the platform data key_map array. key_map[x] should contain the KEY or BTN code to send when processing GPIOx from T19. To specify a GPIO as not an input source, populate with KEY_RESERVED, or 0. Signed-off-by: Daniel Kurtz Signed-off-by: Benson Leung Signed-off-by: Nick Dyer Tested-by: Olof Johansson Signed-off-by: Linus Torvalds --- drivers/input/touchscreen/atmel_mxt_ts.c | 64 +++++++++++++++++++++++++++++++- include/linux/i2c/atmel_mxt_ts.h | 5 +++ 2 files changed, 67 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index d04f810cb1dd..8ed279c56d27 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -181,6 +181,12 @@ #define MXT_FWRESET_TIME 175 /* msec */ +/* MXT_SPT_GPIOPWM_T19 field */ +#define MXT_GPIO0_MASK 0x04 +#define MXT_GPIO1_MASK 0x08 +#define MXT_GPIO2_MASK 0x10 +#define MXT_GPIO3_MASK 0x20 + /* Command to unlock bootloader */ #define MXT_UNLOCK_CMD_MSB 0xaa #define MXT_UNLOCK_CMD_LSB 0xdc @@ -212,6 +218,8 @@ /* Touchscreen absolute values */ #define MXT_MAX_AREA 0xff +#define MXT_PIXELS_PER_MM 20 + struct mxt_info { u8 family_id; u8 variant_id; @@ -243,6 +251,8 @@ struct mxt_data { const struct mxt_platform_data *pdata; struct mxt_object *object_table; struct mxt_info info; + bool is_tp; + unsigned int irq; unsigned int max_x; unsigned int max_y; @@ -251,6 +261,7 @@ struct mxt_data { u8 T6_reportid; u8 T9_reportid_min; u8 T9_reportid_max; + u8 T19_reportid; }; static bool mxt_object_readable(unsigned int type) @@ -502,6 +513,21 @@ static int mxt_write_object(struct mxt_data *data, return mxt_write_reg(data->client, reg + offset, val); } +static void mxt_input_button(struct mxt_data *data, struct mxt_message *message) +{ + struct input_dev *input = data->input_dev; + bool button; + int i; + + /* Active-low switch */ + for (i = 0; i < MXT_NUM_GPIO; i++) { + if (data->pdata->key_map[i] == KEY_RESERVED) + continue; + button = !(message->message[0] & MXT_GPIO0_MASK << i); + input_report_key(input, data->pdata->key_map[i], button); + } +} + static void mxt_input_touchevent(struct mxt_data *data, struct mxt_message *message, int id) { @@ -585,6 +611,9 @@ static irqreturn_t mxt_interrupt(int irq, void *dev_id) int id = reportid - data->T9_reportid_min; mxt_input_touchevent(data, &message, id); update_input = true; + } else if (message.reportid == data->T19_reportid) { + mxt_input_button(data, &message); + update_input = true; } else { mxt_dump_message(dev, &message); } @@ -764,6 +793,9 @@ static int mxt_get_object_table(struct mxt_data *data) data->T9_reportid_min = min_id; data->T9_reportid_max = max_id; break; + case MXT_SPT_GPIOPWM_T19: + data->T19_reportid = min_id; + break; } } @@ -777,7 +809,7 @@ static void mxt_free_object_table(struct mxt_data *data) data->T6_reportid = 0; data->T9_reportid_min = 0; data->T9_reportid_max = 0; - + data->T19_reportid = 0; } static int mxt_initialize(struct mxt_data *data) @@ -1115,9 +1147,13 @@ static int mxt_probe(struct i2c_client *client, goto err_free_mem; } - input_dev->name = "Atmel maXTouch Touchscreen"; + data->is_tp = pdata && pdata->is_tp; + + input_dev->name = (data->is_tp) ? "Atmel maXTouch Touchpad" : + "Atmel maXTouch Touchscreen"; snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0", client->adapter->nr, client->addr); + input_dev->phys = data->phys; input_dev->id.bustype = BUS_I2C; @@ -1140,6 +1176,29 @@ static int mxt_probe(struct i2c_client *client, __set_bit(EV_KEY, input_dev->evbit); __set_bit(BTN_TOUCH, input_dev->keybit); + if (data->is_tp) { + int i; + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); + __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit); + + for (i = 0; i < MXT_NUM_GPIO; i++) + if (pdata->key_map[i] != KEY_RESERVED) + __set_bit(pdata->key_map[i], input_dev->keybit); + + __set_bit(BTN_TOOL_FINGER, input_dev->keybit); + __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUINTTAP, input_dev->keybit); + + input_abs_set_res(input_dev, ABS_X, MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_Y, MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_MT_POSITION_X, + MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_MT_POSITION_Y, + MXT_PIXELS_PER_MM); + } + /* For single touch */ input_set_abs_params(input_dev, ABS_X, 0, data->max_x, 0, 0); @@ -1258,6 +1317,7 @@ static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume); static const struct i2c_device_id mxt_id[] = { { "qt602240_ts", 0 }, { "atmel_mxt_ts", 0 }, + { "atmel_mxt_tp", 0 }, { "mXT224", 0 }, { } }; diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index f027f7a63511..99e379b74398 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -15,6 +15,9 @@ #include +/* For key_map array */ +#define MXT_NUM_GPIO 4 + /* Orient */ #define MXT_NORMAL 0x0 #define MXT_DIAGONAL 0x1 @@ -39,6 +42,8 @@ struct mxt_platform_data { unsigned int voltage; unsigned char orient; unsigned long irqflags; + bool is_tp; + const unsigned int key_map[MXT_NUM_GPIO]; }; #endif /* __LINUX_ATMEL_MXT_TS_H */ -- cgit From 49d0de082c31de34cc896c14eec5f1c2ade0415a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 14 Feb 2013 16:42:34 -0800 Subject: rcu: Fix hlist_bl_set_first_rcu() annotation Abhi noticed that we were getting a complaint from the RCU subsystem about access of an RCU protected list under the write side bit lock. This commit adds additional annotation to check both the RCU read lock and the write side bit lock before printing a message. Reported by: Abhijith Das Signed-off-by: Steven Whitehouse Tested-by: Abhijith Das Signed-off-by: Paul E. McKenney --- include/linux/list_bl.h | 5 +++++ include/linux/rculist_bl.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 31f9d75adc5b..2eb88556c5c5 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -125,6 +125,11 @@ static inline void hlist_bl_unlock(struct hlist_bl_head *b) __bit_spin_unlock(0, (unsigned long *)b); } +static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) +{ + return bit_spin_is_locked(0, (unsigned long *)b); +} + /** * hlist_bl_for_each_entry - iterate over list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index cf1244fbf3b6..4f216c59e7db 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -20,7 +20,7 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) { return (struct hlist_bl_node *) - ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK); + ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); } /** -- cgit From 09c7b890622d72b5e004cc249bbe610e8b928ddf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Feb 2013 15:55:02 -0800 Subject: rcu: Add event tracing for no-CBs CPUs' grace periods Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcutree_plugin.h | 30 +++++++++++++++++++++++++ 2 files changed, 85 insertions(+) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 1918e832da4f..cdfed6d386eb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -71,6 +71,58 @@ TRACE_EVENT(rcu_grace_period, __entry->rcuname, __entry->gpnum, __entry->gpevent) ); +/* + * Tracepoint for no-callbacks grace-period events. The caller should + * pull the data from the rcu_node structure, other than rcuname, which + * comes from the rcu_state structure, and event, which is one of the + * following: + * + * "Startleaf": Request a nocb grace period based on leaf-node data. + * "Startedleaf": Leaf-node start proved sufficient. + * "Startedleafroot": Leaf-node start proved sufficient after checking root. + * "Startedroot": Requested a nocb grace period based on root-node data. + * "StartWait": Start waiting for the requested grace period. + * "ResumeWait": Resume waiting after signal. + * "EndWait": Complete wait. + * "Cleanup": Clean up rcu_node structure after previous GP. + * "CleanupMore": Clean up, and another no-CB GP is needed. + */ +TRACE_EVENT(rcu_nocb_grace_period, + + TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed, + unsigned long c, u8 level, int grplo, int grphi, + char *gpevent), + + TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(unsigned long, completed) + __field(unsigned long, c) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(char *, gpevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->completed = completed; + __entry->c = c; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->gpevent = gpevent; + ), + + TP_printk("%s %lu %lu %lu %u %d %d %s", + __entry->rcuname, __entry->gpnum, __entry->completed, + __entry->c, __entry->level, __entry->grplo, __entry->grphi, + __entry->gpevent) +); + /* * Tracepoint for grace-period-initialization events. These are * distinguished by the type of RCU, the new grace-period number, the @@ -601,6 +653,9 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ qsmask) do { } while (0) +#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \ + level, grplo, grphi, event) \ + do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7225a5a14cef..e32236e83dda 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2201,6 +2201,9 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); rnp->n_nocb_gp_requests[c & 0x1] = 0; needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; + trace_rcu_nocb_grace_period(rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + needmore ? "CleanupMore" : "Cleanup"); return needmore; } @@ -2347,6 +2350,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) /* Count our request for a grace period. */ rnp->n_nocb_gp_requests[c & 0x1]++; + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "Startleaf"); if (rnp->gpnum != rnp->completed) { @@ -2355,6 +2361,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * is in progress, so we are done. When this grace * period ends, our request will be acted upon. */ + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, rnp->grphi, + "Startedleaf"); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { @@ -2366,6 +2376,11 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (rnp != rnp_root) raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ if (rnp_root->gpnum != rnp_root->completed) { + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleafroot"); raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ } else { @@ -2381,6 +2396,11 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp_root->completed + 1; rnp->n_nocb_gp_requests[c & 0x1]++; rnp_root->n_nocb_gp_requests[c & 0x1]++; + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedroot"); local_save_flags(flags1); rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ } @@ -2396,6 +2416,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2403,7 +2426,14 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, rnp->grphi, + "ResumeWait"); } + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } -- cgit From c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Dec 2012 11:30:36 -0800 Subject: rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 28 ++- include/linux/rcupdate.h | 1 + init/Kconfig | 17 +- kernel/rcutree.c | 28 +-- kernel/rcutree.h | 12 +- kernel/rcutree_plugin.h | 374 ++++++++++-------------------------- kernel/rcutree_trace.c | 2 - 7 files changed, 149 insertions(+), 313 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a17ba16c8fc8..22303b2e74bc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2490,6 +2490,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. leaf rcu_node structure. Useful for very large systems. + rcutree.jiffies_till_first_fqs= [KNL,BOOT] + Set delay from grace-period initialization to + first attempt to force quiescent states. + Units are jiffies, minimum value is zero, + and maximum value is HZ. + + rcutree.jiffies_till_next_fqs= [KNL,BOOT] + Set delay between subsequent attempts to force + quiescent states. Units are jiffies, minimum + value is one, and maximum value is HZ. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. @@ -2504,16 +2515,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] Set timeout for RCU CPU stall warning messages. - rcutree.jiffies_till_first_fqs= [KNL,BOOT] - Set delay from grace-period initialization to - first attempt to force quiescent states. - Units are jiffies, minimum value is zero, - and maximum value is HZ. + rcutree.rcu_idle_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.jiffies_till_next_fqs= [KNL,BOOT] - Set delay between subsequent attempts to force - quiescent states. Units are jiffies, minimum - value is one, and maximum value is HZ. + rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). + Lazy RCU callbacks are those which RCU can + prove do nothing more than free memory. rcutorture.fqs_duration= [KNL,BOOT] Set duration of force_quiescent_state bursts. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..9ed2c9a4de45 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#define ulong2long(a) (*(long *)(&(a))) /* Exported common interfaces */ diff --git a/init/Kconfig b/init/Kconfig index 717584064a7e..a3a2304fa6d2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -582,13 +582,16 @@ config RCU_FAST_NO_HZ depends on NO_HZ && SMP default n help - This option causes RCU to attempt to accelerate grace periods in - order to allow CPUs to enter dynticks-idle state more quickly. - On the other hand, this option increases the overhead of the - dynticks-idle checking, thus degrading scheduling latency. - - Say Y if energy efficiency is critically important, and you don't - care about real-time response. + This option permits CPUs to enter dynticks-idle state even if + they have RCU callbacks queued, and prevents RCU from waking + these CPUs up more than roughly once every four jiffies (by + default, you can adjust this using the rcutree.rcu_idle_gp_delay + parameter), thus improving energy efficiency. On the other + hand, this option increases the duration of RCU grace periods, + for example, slowing down synchronize_rcu(). + + Say Y if energy efficiency is critically important, and you + don't care about increased grace-period durations. Say N if you are unsure. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2015bce749f9..7b1d7769872a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu) } /* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. + * Return true if the specified CPU has any callback. If all_lazy is + * non-NULL, store an indication of whether all callbacks are lazy. + * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int rcu_cpu_has_callbacks(int cpu) +static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) { + bool al = true; + bool hc = false; + struct rcu_data *rdp; struct rcu_state *rsp; - /* RCU callbacks either ready or pending? */ - for_each_rcu_flavor(rsp) - if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) - return 1; - return 0; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->qlen != rdp->qlen_lazy) + al = false; + if (rdp->nxtlist) + hc = true; + } + if (all_lazy) + *all_lazy = al; + return hc; } /* @@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); - rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* Add CPU to rcu_node bitmasks. */ @@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, */ for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); - rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b6c2335efbdf..96a27f922e92 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -88,18 +88,13 @@ struct rcu_dynticks { int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ #ifdef CONFIG_RCU_FAST_NO_HZ - int dyntick_drain; /* Prepare-for-idle state variable. */ - unsigned long dyntick_holdoff; - /* No retries for the jiffy of failure. */ - struct timer_list idle_gp_timer; - /* Wake up CPU sleeping with callbacks. */ - unsigned long idle_gp_timer_expires; - /* When to wake up CPU (for repost). */ - bool idle_first_pass; /* First pass of attempt to go idle? */ + bool all_lazy; /* Are all CPU's CBs lazy? */ unsigned long nonlazy_posted; /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + unsigned long last_accelerate; + /* Last jiffy CBs were accelerated. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; @@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ static void __cpuinit rcu_prepare_kthreads(int cpu); -static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); static void rcu_idle_count_callbacks_posted(void); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 28185ad18df3..d318f9f18be5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1543,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(cpu); -} - -/* - * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ + return rcu_cpu_has_callbacks(cpu, NULL); } /* @@ -1587,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void) * * The following three proprocessor symbols control this state machine: * - * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt - * to satisfy RCU. Beyond this point, it is better to incur a periodic - * scheduling-clock interrupt than to loop through the state machine - * at full power. - * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are - * optional if RCU does not need anything immediately from this - * CPU, even if this CPU still has RCU callbacks queued. The first - * times through the state machine are mandatory: we need to give - * the state machine a chance to communicate a quiescent state - * to the RCU core. * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted * to sleep in dyntick-idle mode with RCU callbacks pending. This * is sized to be roughly one RCU grace period. Those energy-efficiency @@ -1612,15 +1595,9 @@ static void rcu_idle_count_callbacks_posted(void) * adjustment, they can be converted into kernel config parameters, though * making the state machine smarter might be a better option. */ -#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ -#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -static int rcu_idle_flushes = RCU_IDLE_FLUSHES; -module_param(rcu_idle_flushes, int, 0644); -static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; -module_param(rcu_idle_opt_flushes, int, 0644); static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; module_param(rcu_idle_gp_delay, int, 0644); static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; @@ -1629,178 +1606,97 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Does the specified flavor of RCU have non-lazy callbacks pending on - * the specified CPU? Both RCU flavor and CPU are specified by the - * rcu_data structure. - */ -static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) -{ - return rdp->qlen != rdp->qlen_lazy; -} - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/* - * Are there non-lazy RCU-preempt callbacks? (There cannot be if there - * is no RCU-preempt in the kernel.) + * Try to advance callbacks for all flavors of RCU on the current CPU. + * Afterwards, if there are any callbacks ready for immediate invocation, + * return true. */ -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +static bool rcu_try_advance_all_cbs(void) { - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - - return __rcu_cpu_has_nonlazy_callbacks(rdp); -} - -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + bool cbs_ready = false; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct rcu_state *rsp; -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) -{ - return 0; -} + for_each_rcu_flavor(rsp) { + rdp = this_cpu_ptr(rsp->rda); + rnp = rdp->mynode; -#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ + /* + * Don't bother checking unless a grace period has + * completed since we last checked and there are + * callbacks not yet ready to invoke. + */ + if (rdp->completed != rnp->completed && + rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) + rcu_process_gp_end(rsp, rdp); -/* - * Does any flavor of RCU have non-lazy callbacks on the specified CPU? - */ -static bool rcu_cpu_has_nonlazy_callbacks(int cpu) -{ - return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || - __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_cpu_has_nonlazy_callbacks(cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + cbs_ready = true; + } + return cbs_ready; } /* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! + * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready + * to invoke. If the CPU has callbacks, try to advance them. Tell the + * caller to set the timeout based on whether or not there are non-lazy + * callbacks. * - * The delta_jiffies argument is used to store the time when RCU is - * going to need the CPU again if it still has callbacks. The reason - * for this is that rcu_prepare_for_idle() might need to post a timer, - * but if so, it will do so after tick_nohz_stop_sched_tick() has set - * the wakeup time for this CPU. This means that RCU's timer can be - * delayed until the wakeup time, which defeats the purpose of posting - * a timer. + * The caller must have disabled interrupts. */ -int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +int rcu_needs_cpu(int cpu, unsigned long *dj) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - /* Flag a new idle sojourn to the idle-entry state machine. */ - rdtp->idle_first_pass = 1; + /* Snapshot to detect later posting of non-lazy callback. */ + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; + /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - *delta_jiffies = ULONG_MAX; + if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { + *dj = ULONG_MAX; return 0; } - if (rdtp->dyntick_holdoff == jiffies) { - /* RCU recently tried and failed, so don't try again. */ - *delta_jiffies = 1; + + /* Attempt to advance callbacks. */ + if (rcu_try_advance_all_cbs()) { + /* Some ready to invoke, so initiate later invocation. */ + invoke_rcu_core(); return 1; } - /* Set up for the possibility that RCU will post a timer. */ - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, - rcu_idle_gp_delay) - jiffies; + rdtp->last_accelerate = jiffies; + + /* Request timer delay depending on laziness, and round. */ + if (rdtp->all_lazy) { + *dj = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; - *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; + *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; } return 0; } /* - * Handler for smp_call_function_single(). The only point of this - * handler is to wake the CPU up, so the handler does only tracing. - */ -void rcu_idle_demigrate(void *unused) -{ - trace_rcu_prep_idle("Demigrate"); -} - -/* - * Timer handler used to force CPU to start pushing its remaining RCU - * callbacks in the case where it entered dyntick-idle mode with callbacks - * pending. The hander doesn't really need to do anything because the - * real work is done upon re-entry to idle, or by the next scheduling-clock - * interrupt should idle not be re-entered. - * - * One special case: the timer gets migrated without awakening the CPU - * on which the timer was scheduled on. In this case, we must wake up - * that CPU. We do so with smp_call_function_single(). - */ -static void rcu_idle_gp_timer_func(unsigned long cpu_in) -{ - int cpu = (int)cpu_in; - - trace_rcu_prep_idle("Timer"); - if (cpu != smp_processor_id()) - smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); - else - WARN_ON_ONCE(1); /* Getting here can hang the system... */ -} - -/* - * Initialize the timer used to pull CPUs out of dyntick-idle mode. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - rdtp->dyntick_holdoff = jiffies - 1; - setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); - rdtp->idle_gp_timer_expires = jiffies - 1; - rdtp->idle_first_pass = 1; -} - -/* - * Clean up for exit from idle. Because we are exiting from idle, there - * is no longer any point to ->idle_gp_timer, so cancel it. This will - * do nothing if this timer is not active, so just cancel it unconditionally. - */ -static void rcu_cleanup_after_idle(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - del_timer(&rdtp->idle_gp_timer); - trace_rcu_prep_idle("Cleanup after idle"); - rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); -} - -/* - * Check to see if any RCU-related work can be done by the current CPU, - * and if so, schedule a softirq to get it done. This function is part - * of the RCU implementation; it is -not- an exported member of the RCU API. - * - * The idea is for the current CPU to clear out all work required by the - * RCU core for the current grace period, so that this CPU can be permitted - * to enter dyntick-idle mode. In some cases, it will need to be awakened - * at the end of the grace period by whatever CPU ends the grace period. - * This allows CPUs to go dyntick-idle more quickly, and to reduce the - * number of wakeups by a modest integer factor. - * - * Because it is not legal to invoke rcu_process_callbacks() with irqs - * disabled, we do one pass of force_quiescent_state(), then do a - * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked - * later. The ->dyntick_drain field controls the sequencing. + * Prepare a CPU for idle from an RCU perspective. The first major task + * is to sense whether nohz mode has been enabled or disabled via sysfs. + * The second major task is to check to see if a non-lazy callback has + * arrived at a CPU that previously had only lazy callbacks. The third + * major task is to accelerate (that is, assign grace-period numbers to) + * any recently arrived callbacks. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { - struct timer_list *tp; + struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct rcu_node *rnp; + struct rcu_state *rsp; int tne; /* Handle nohz enablement switches conservatively. */ tne = ACCESS_ONCE(tick_nohz_enabled); if (tne != rdtp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(cpu)) + if (rcu_cpu_has_callbacks(cpu, NULL)) invoke_rcu_core(); /* force nohz to see update. */ rdtp->tick_nohz_enabled_snap = tne; return; @@ -1808,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu) if (!tne) return; - /* Adaptive-tick mode, where usermode execution is idle to RCU. */ - if (!is_idle_task(current)) { - rdtp->dyntick_holdoff = jiffies - 1; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("User dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else if (rcu_cpu_has_callbacks(cpu)) { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("User dyntick with lazy callbacks"); - } else { - return; - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + /* If this is a no-CBs CPU, no callbacks, just return. */ + if (is_nocb_cpu(cpu)) return; - } /* - * If this is an idle re-entry, for example, due to use of - * RCU_NONIDLE() or the new idle-loop tracing API within the idle - * loop, then don't take any state-machine actions, unless the - * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the ->idle_gp_timer if this CPU has callbacks - * pending. + * If a non-lazy callback arrived at a CPU having only lazy + * callbacks, invoke RCU core for the side-effect of recalculating + * idle duration on re-entry to idle. */ - if (!rdtp->idle_first_pass && - (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { - if (rcu_cpu_has_callbacks(cpu)) { - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - } + if (rdtp->all_lazy && + rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { + invoke_rcu_core(); return; } - rdtp->idle_first_pass = 0; - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; /* - * If there are no callbacks on this CPU, enter dyntick-idle mode. - * Also reset state to avoid prejudicing later attempts. + * If we have not yet accelerated this jiffy, accelerate all + * callbacks on this CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - rdtp->dyntick_holdoff = jiffies - 1; - rdtp->dyntick_drain = 0; - trace_rcu_prep_idle("No callbacks"); + if (rdtp->last_accelerate == jiffies) return; + rdtp->last_accelerate = jiffies; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (!*rdp->nxttail[RCU_DONE_TAIL]) + continue; + rnp = rdp->mynode; + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rcu_accelerate_cbs(rsp, rnp, rdp); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } +} - /* - * If in holdoff mode, just return. We will presumably have - * refrained from disabling the scheduling-clock tick. - */ - if (rdtp->dyntick_holdoff == jiffies) { - trace_rcu_prep_idle("In holdoff"); - return; - } +/* + * Clean up for exit from idle. Attempt to advance callbacks based on + * any grace periods that elapsed while the CPU was idle, and if any + * callbacks are now ready to invoke, initiate invocation. + */ +static void rcu_cleanup_after_idle(int cpu) +{ + struct rcu_data *rdp; + struct rcu_state *rsp; - /* Check and update the ->dyntick_drain sequencing. */ - if (rdtp->dyntick_drain <= 0) { - /* First time through, initialize the counter. */ - rdtp->dyntick_drain = rcu_idle_flushes; - } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && - !rcu_pending(cpu) && - !local_softirq_pending()) { - /* Can we go dyntick-idle despite still having callbacks? */ - rdtp->dyntick_drain = 0; - rdtp->dyntick_holdoff = jiffies; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("Dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("Dyntick with lazy callbacks"); - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; - return; /* Nothing more to do immediately. */ - } else if (--(rdtp->dyntick_drain) <= 0) { - /* We have hit the limit, so time to give up. */ - rdtp->dyntick_holdoff = jiffies; - trace_rcu_prep_idle("Begin holdoff"); - invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ + if (is_nocb_cpu(cpu)) return; - } - - /* - * Do one step of pushing the remaining RCU callbacks through - * the RCU core state machine. - */ -#ifdef CONFIG_TREE_PREEMPT_RCU - if (per_cpu(rcu_preempt_data, cpu).nxtlist) { - rcu_preempt_qs(cpu); - force_quiescent_state(&rcu_preempt_state); - } -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - if (per_cpu(rcu_sched_data, cpu).nxtlist) { - rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state); - } - if (per_cpu(rcu_bh_data, cpu).nxtlist) { - rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state); - } - - /* - * If RCU callbacks are still pending, RCU still needs this CPU. - * So try forcing the callbacks through the grace period. - */ - if (rcu_cpu_has_callbacks(cpu)) { - trace_rcu_prep_idle("More callbacks"); - invoke_rcu_core(); - } else { - trace_rcu_prep_idle("Callbacks drained"); + rcu_try_advance_all_cbs(); + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + invoke_rcu_core(); } } @@ -2034,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier); static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - struct timer_list *tltp = &rdtp->idle_gp_timer; - char c; + unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; - c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; - if (timer_pending(tltp)) - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, c, tltp->expires - jiffies); - else - sprintf(cp, "drain=%d %c timer not pending", - rdtp->dyntick_drain, c); + sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", + rdtp->last_accelerate & 0xffff, jiffies & 0xffff, + ulong2long(nlpd), + rdtp->all_lazy ? 'L' : '.', + rdtp->tick_nohz_enabled_snap ? '.' : 'D'); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 0d095dcaa670..49099e81c87b 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,8 +46,6 @@ #define RCU_TREE_NONCORE #include "rcutree.h" -#define ulong2long(a) (*(long *)(&(a))) - static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) { -- cgit From bd9f0686fc8c9a01c6850b1c611d1c9ad80b86d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 21:51:20 -0800 Subject: rcu: Repurpose no-CBs event tracing to future-GP events Dyntick-idle CPUs need to be able to pre-announce their need for grace periods. This can be done using something similar to the mechanism used by no-CB CPUs to announce their need for grace periods. This commit moves in this direction by renaming the no-CBs grace-period event tracing to suit the new future-grace-period needs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 16 ++++++------ kernel/rcutree_plugin.h | 62 ++++++++++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index cdfed6d386eb..59ebcc89f148 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -72,10 +72,10 @@ TRACE_EVENT(rcu_grace_period, ); /* - * Tracepoint for no-callbacks grace-period events. The caller should - * pull the data from the rcu_node structure, other than rcuname, which - * comes from the rcu_state structure, and event, which is one of the - * following: + * Tracepoint for future grace-period events, including those for no-callbacks + * CPUs. The caller should pull the data from the rcu_node structure, + * other than rcuname, which comes from the rcu_state structure, and event, + * which is one of the following: * * "Startleaf": Request a nocb grace period based on leaf-node data. * "Startedleaf": Leaf-node start proved sufficient. @@ -87,7 +87,7 @@ TRACE_EVENT(rcu_grace_period, * "Cleanup": Clean up rcu_node structure after previous GP. * "CleanupMore": Clean up, and another no-CB GP is needed. */ -TRACE_EVENT(rcu_nocb_grace_period, +TRACE_EVENT(rcu_future_grace_period, TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed, unsigned long c, u8 level, int grplo, int grphi, @@ -653,9 +653,9 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ qsmask) do { } while (0) -#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \ - level, grplo, grphi, event) \ - do { } while (0) +#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ + level, grplo, grphi, event) \ + do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index d318f9f18be5..df50502eca2c 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2034,9 +2034,9 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); rnp->n_nocb_gp_requests[c & 0x1] = 0; needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; - trace_rcu_nocb_grace_period(rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - needmore ? "CleanupMore" : "Cleanup"); + trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + needmore ? "CleanupMore" : "Cleanup"); return needmore; } @@ -2183,9 +2183,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) /* Count our request for a grace period. */ rnp->n_nocb_gp_requests[c & 0x1]++; - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "Startleaf"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "Startleaf"); if (rnp->gpnum != rnp->completed) { @@ -2194,10 +2194,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * is in progress, so we are done. When this grace * period ends, our request will be acted upon. */ - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, rnp->grphi, - "Startedleaf"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleaf"); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { @@ -2209,11 +2209,12 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (rnp != rnp_root) raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ if (rnp_root->gpnum != rnp_root->completed) { - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedleafroot"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, + rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleafroot"); raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ } else { @@ -2229,11 +2230,12 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp_root->completed + 1; rnp->n_nocb_gp_requests[c & 0x1]++; rnp_root->n_nocb_gp_requests[c & 0x1]++; - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedroot"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, + rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedroot"); local_save_flags(flags1); rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ } @@ -2249,9 +2251,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "StartWait"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2259,14 +2261,14 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, rnp->grphi, - "ResumeWait"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, + rnp->grphi, "ResumeWait"); } - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "EndWait"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } -- cgit