From 07fca0e57fca925032526349f4370f97ed580cc9 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 10 Jul 2010 08:35:00 +0200 Subject: tracing: Properly align linker defined symbols We define a number of symbols in the linker scipt like this: __start_syscalls_metadata = .; *(__syscalls_metadata) But we do not know the alignment of "." when we assign the __start_syscalls_metadata symbol. gcc started to uses bigger alignment for structs (32 bytes), so we saw situations where the linker due to alignment constraints increased the value of "." after the symbol assignment. This resulted in boot fails. Fix this by forcing a 32 byte alignment of "." before the assignment. This patch introduces the forced alignment for ftrace_events and syscalls_metadata. It may be required in more places. Reported-by: Zeev Tarantov Signed-off-by: Sam Ravnborg LKML-Reference: <20100710063459.GA14596@merkur.ravnborg.org> Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 48c5299cbf26..4b5902ad0d5d 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -63,6 +63,12 @@ /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) +/* + * Align to a 32 byte boundary equal to the + * alignment gcc 4.5 uses for a struct + */ +#define STRUCT_ALIGN() . = ALIGN(32) + /* The actual configuration determine if the init/exit sections * are handled as text/data or they can be discarded (which * often happens at runtime) @@ -166,7 +172,11 @@ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ + \ + STRUCT_ALIGN(); \ FTRACE_EVENTS() \ + \ + STRUCT_ALIGN(); \ TRACE_SYSCALLS() /* -- cgit v1.2.3-73-gaa49b From 8a35747a5d13b99e076b0222729e0caa48cb69b6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 21 Jul 2010 21:44:31 +0000 Subject: macvtap: Limit packet queue length Mark Wagner reported OOM symptoms when sending UDP traffic over a macvtap link to a kvm receiver. This appears to be caused by the fact that macvtap packet queues are unlimited in length. This means that if the receiver can't keep up with the rate of flow, then we will hit OOM. Of course it gets worse if the OOM killer then decides to kill the receiver. This patch imposes a cap on the packet queue length, in the same way as the tuntap driver, using the device TX queue length. Please note that macvtap currently has no way of giving congestion notification, that means the software device TX queue cannot be used and packets will always be dropped once the macvtap driver queue fills up. This shouldn't be a great problem for the scenario where macvtap is used to feed a kvm receiver, as the traffic is most likely external in origin so congestion notification can't be applied anyway. Of course, if anybody decides to complain about guest-to-guest UDP packet loss down the track, then we may have to revisit this. Incidentally, this patch also fixes a real memory leak when macvtap_get_queue fails. Chris Wright noticed that for this patch to work, we need a non-zero TX queue length. This patch includes his work to change the default macvtap TX queue length to 500. Reported-by: Mark Wagner Signed-off-by: Herbert Xu Acked-by: Chris Wright Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 10 ++++++++-- drivers/net/macvtap.c | 18 ++++++++++++++++-- include/linux/if_macvlan.h | 2 ++ 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 87e8d4cb4057..f15fe2cf72ae 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -499,7 +499,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static void macvlan_setup(struct net_device *dev) +void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); @@ -508,6 +508,12 @@ static void macvlan_setup(struct net_device *dev) dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, dev->ethtool_ops = &macvlan_ethtool_ops; +} +EXPORT_SYMBOL_GPL(macvlan_common_setup); + +static void macvlan_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); dev->tx_queue_len = 0; } @@ -705,7 +711,6 @@ int macvlan_link_register(struct rtnl_link_ops *ops) /* common fields */ ops->priv_size = sizeof(struct macvlan_dev); ops->get_tx_queues = macvlan_get_tx_queues; - ops->setup = macvlan_setup; ops->validate = macvlan_validate; ops->maxtype = IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -719,6 +724,7 @@ EXPORT_SYMBOL_GPL(macvlan_link_register); static struct rtnl_link_ops macvlan_link_ops = { .kind = "macvlan", + .setup = macvlan_setup, .newlink = macvlan_newlink, .dellink = macvlan_dellink, }; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a8a94e2f6ddc..ff02b836c3c4 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -180,11 +180,18 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) { struct macvtap_queue *q = macvtap_get_queue(dev, skb); if (!q) - return -ENOLINK; + goto drop; + + if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) + goto drop; skb_queue_tail(&q->sk.sk_receive_queue, skb); wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); - return 0; + return NET_RX_SUCCESS; + +drop: + kfree_skb(skb); + return NET_RX_DROP; } /* @@ -235,8 +242,15 @@ static void macvtap_dellink(struct net_device *dev, macvlan_dellink(dev, head); } +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .kind = "macvtap", + .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, }; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 9ea047aca795..1ffaeffeff74 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -67,6 +67,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, } } +extern void macvlan_common_setup(struct net_device *dev); + extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], int (*receive)(struct sk_buff *skb), -- cgit v1.2.3-73-gaa49b From 718be4aaf3613cf7c2d097f925abc3d3553c0605 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 22 Jul 2010 16:54:27 -0400 Subject: ACPI: skip checking BM_STS if the BIOS doesn't ask for it It turns out that there is a bit in the _CST for Intel FFH C3 that tells the OS if we should be checking BM_STS or not. Linux has been unconditionally checking BM_STS. If the chip-set is configured to enable BM_STS, it can retard or completely prevent entry into deep C-states -- as illustrated by turbostat: http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/ ref: Intel Processor Vendor-Specific ACPI Interface Specification table 4 "_CST FFH GAS Field Encoding" Bit 1: Set to 1 if OSPM should use Bus Master avoidance for this C-state https://bugzilla.kernel.org/show_bug.cgi?id=15886 Signed-off-by: Len Brown --- arch/x86/kernel/acpi/cstate.c | 9 +++++++++ drivers/acpi/processor_idle.c | 2 +- include/acpi/processor.h | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2e837f5080fe..fb7a5f052e2b 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, percpu_entry->states[cx->index].eax = cx->address; percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; } + + /* + * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared, + * then we should skip checking BM_STS for this C-state. + * ref: "Intel Processor Vendor-Specific ACPI Interface Specification" + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2)) + cx->bm_sts_skip = 1; + return retval; } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index b1b385692f46..b351342f1faf 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -947,7 +947,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, if (acpi_idle_suspend) return(acpi_idle_enter_c1(dev, state)); - if (acpi_idle_bm_check()) { + if (!cx->bm_sts_skip && acpi_idle_bm_check()) { if (dev->safe_state) { dev->last_state = dev->safe_state; return dev->safe_state->enter(dev, dev->safe_state); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index da565a48240e..a68ca8a11a53 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -48,7 +48,7 @@ struct acpi_power_register { u8 space_id; u8 bit_width; u8 bit_offset; - u8 reserved; + u8 access_size; u64 address; } __attribute__ ((packed)); @@ -63,6 +63,7 @@ struct acpi_processor_cx { u32 power; u32 usage; u64 time; + u8 bm_sts_skip; char desc[ACPI_CX_DESC_LEN]; }; -- cgit v1.2.3-73-gaa49b From da5e37efe8704fc2b354626467f80f73c5e3c020 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 13 Jul 2010 11:39:42 +0200 Subject: vmlinux.lds: fix .data..init_task output section (fix popwerpc boot) The .data..init_task output section was missing a load offset causing a popwerpc target to fail to boot. Sean MacLennan tracked it down to the definition of INIT_TASK_DATA_SECTION(). There are only two users of INIT_TASK_DATA_SECTION() in the kernel today: cris and popwerpc. cris do not support relocatable kernels and is thus not impacted by this change. Fix INIT_TASK_DATA_SECTION() to specify load offset like all other output sections. Reported-by: Sean MacLennan Signed-off-by: Sam Ravnborg Signed-off-by: Benjamin Herrenschmidt --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 48c5299cbf26..cdfff74e9739 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -435,7 +435,7 @@ */ #define INIT_TASK_DATA_SECTION(align) \ . = ALIGN(align); \ - .data..init_task : { \ + .data..init_task : AT(ADDR(.data..init_task) - LOAD_OFFSET) { \ INIT_TASK_DATA(align) \ } -- cgit v1.2.3-73-gaa49b From 72ad5d77fb981963edae15eee8196c80238f5ed0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Jul 2010 22:59:09 +0200 Subject: ACPI / Sleep: Allow the NVS saving to be skipped during suspend to RAM Commit 2a6b69765ad794389f2fc3e14a0afa1a995221c2 (ACPI: Store NVS state even when entering suspend to RAM) caused the ACPI suspend code save the NVS area during suspend and restore it during resume unconditionally, although it is known that some systems need to use acpi_sleep=s4_nonvs for hibernation to work. To allow the affected systems to avoid saving and restoring the NVS area during suspend to RAM and resume, introduce kernel command line option acpi_sleep=nonvs and make acpi_sleep=s4_nonvs work as its alias temporarily (add acpi_sleep=s4_nonvs to the feature removal file). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16396 . Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: tomas m Signed-off-by: Len Brown --- Documentation/feature-removal-schedule.txt | 7 ++++++ Documentation/kernel-parameters.txt | 4 ++-- arch/x86/kernel/acpi/sleep.c | 9 ++++++-- drivers/acpi/sleep.c | 35 +++++++++++++++--------------- include/linux/acpi.h | 2 +- 5 files changed, 34 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index c268783bc4e7..1571c0c83dba 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -647,3 +647,10 @@ Who: Stefan Richter ---------------------------- +What: The acpi_sleep=s4_nonvs command line option +When: 2.6.37 +Files: arch/x86/kernel/acpi/sleep.c +Why: superseded by acpi_sleep=nonvs +Who: Rafael J. Wysocki + +---------------------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4ddb58df081e..2b2407d9a6d0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -254,8 +254,8 @@ and is between 256 and 4096 characters. It is defined in the file control method, with respect to putting devices into low power states, to be enforced (the ACPI 2.0 ordering of _PTS is used by default). - s4_nonvs prevents the kernel from saving/restoring the - ACPI NVS memory during hibernation. + nonvs prevents the kernel from saving/restoring the + ACPI NVS memory during suspend/hibernation and resume. sci_force_enable causes the kernel to set SCI_EN directly on resume from S1/S3 (which is against the ACPI spec, but some broken systems don't work without it). diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b91..fcc3c61fdecc 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -157,9 +157,14 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); + if (strncmp(str, "s4_nonvs", 8) == 0) { + pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " + "please use acpi_sleep=nonvs instead"); + acpi_nvs_nosave(); + } #endif + if (strncmp(str, "nonvs", 5) == 0) + acpi_nvs_nosave(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); str = strchr(str, ','); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 5b7c52e4a00f..2862c781b372 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -81,6 +81,20 @@ static int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * and to restore them during the subsequent resume. Windows does that also for + * suspend to RAM. However, it is known that this mechanism does not work on + * all machines, so we allow the user to disable it with the help of the + * 'acpi_sleep=nonvs' kernel command line option. + */ +static bool nvs_nosave; + +void __init acpi_nvs_nosave(void) +{ + nvs_nosave = true; +} + /* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' @@ -197,8 +211,7 @@ static int acpi_suspend_begin(suspend_state_t pm_state) u32 acpi_state = acpi_suspend_states[pm_state]; int error = 0; - error = suspend_nvs_alloc(); - + error = nvs_nosave ? 0 : suspend_nvs_alloc(); if (error) return error; @@ -388,20 +401,6 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION -/* - * The ACPI specification wants us to save NVS memory regions during hibernation - * and to restore them during the subsequent resume. However, it is not certain - * if this mechanism is going to work on all machines, so we allow the user to - * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line - * option. - */ -static bool s4_no_nvs; - -void __init acpi_s4_no_nvs(void) -{ - s4_no_nvs = true; -} - static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; static bool nosigcheck; @@ -415,7 +414,7 @@ static int acpi_hibernation_begin(void) { int error; - error = s4_no_nvs ? 0 : suspend_nvs_alloc(); + error = nvs_nosave ? 0 : suspend_nvs_alloc(); if (!error) { acpi_target_sleep_state = ACPI_STATE_S4; acpi_sleep_tts_switch(acpi_target_sleep_state); @@ -510,7 +509,7 @@ static int acpi_hibernation_begin_old(void) error = acpi_sleep_prepare(ACPI_STATE_S4); if (!error) { - if (!s4_no_nvs) + if (!nvs_nosave) error = suspend_nvs_alloc(); if (!error) acpi_target_sleep_state = ACPI_STATE_S4; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 224a38c960d4..ccf94dc5acdf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -253,7 +253,7 @@ int acpi_resources_are_enforced(void); #ifdef CONFIG_PM_SLEEP void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); -void __init acpi_s4_no_nvs(void); +void __init acpi_nvs_nosave(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3-73-gaa49b From 3b87956ea645fb4de7e59c7d0aa94de04be72615 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 22 Jul 2010 18:45:04 +0000 Subject: net sched: fix race in mirred device removal This fixes hang when target device of mirred packet classifier action is removed. If a mirror or redirection action is configured to cause packets to go to another device, the classifier holds a ref count, but was assuming the adminstrator cleaned up all redirections before removing. The fix is to add a notifier and cleanup during unregister. The new list is implicitly protected by RTNL mutex because it is held during filter add/delete as well as notifier. Signed-off-by: Stephen Hemminger Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_mirred.h | 1 + net/sched/act_mirred.c | 43 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index ceac661cdfd5..cfe2943690ff 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -9,6 +9,7 @@ struct tcf_mirred { int tcfm_ifindex; int tcfm_ok_push; struct net_device *tcfm_dev; + struct list_head tcfm_list; }; #define to_mirred(pc) \ container_of(pc, struct tcf_mirred, common) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c0b6863e3b87..1980b71c283f 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -33,6 +33,7 @@ static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); +static LIST_HEAD(mirred_list); static struct tcf_hashinfo mirred_hash_info = { .htab = tcf_mirred_ht, @@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) m->tcf_bindcnt--; m->tcf_refcnt--; if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - dev_put(m->tcfm_dev); + list_del(&m->tcfm_list); + if (m->tcfm_dev) + dev_put(m->tcfm_dev); tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } @@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, m->tcfm_ok_push = ok_push; } spin_unlock_bh(&m->tcf_lock); - if (ret == ACT_P_CREATED) + if (ret == ACT_P_CREATED) { + list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(pc, &mirred_hash_info); + } return ret; } @@ -162,9 +167,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, m->tcf_tm.lastuse = jiffies; dev = m->tcfm_dev; + if (!dev) { + printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + goto out; + } + if (!(dev->flags & IFF_UP)) { if (net_ratelimit()) - pr_notice("tc mirred to Houston: device %s is gone!\n", + pr_notice("tc mirred to Houston: device %s is down\n", dev->name); goto out; } @@ -232,6 +242,28 @@ nla_put_failure: return -1; } +static int mirred_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct tcf_mirred *m; + + if (event == NETDEV_UNREGISTER) + list_for_each_entry(m, &mirred_list, tcfm_list) { + if (m->tcfm_dev == dev) { + dev_put(dev); + m->tcfm_dev = NULL; + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block mirred_device_notifier = { + .notifier_call = mirred_device_event, +}; + + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, @@ -252,12 +284,17 @@ MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { + int err = register_netdevice_notifier(&mirred_device_notifier); + if (err) + return err; + pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { + unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); } -- cgit v1.2.3-73-gaa49b From 7d14831e21060fbfbfe8453460ac19205f4ce1c2 Mon Sep 17 00:00:00 2001 From: Anuj Aggarwal Date: Mon, 12 Jul 2010 17:54:06 +0530 Subject: regulator: tps6507x: allow driver to use DEFDCDC{2,3}_HIGH register Acked-by: Mark Brown In TPS6507x, depending on the status of DEFDCDC{2,3} pin either DEFDCDC{2,3}_LOW or DEFDCDC{2,3}_HIGH register needs to be read or programmed to change the output voltage. The current driver assumes DEFDCDC{2,3} pins are always tied low and thus operates only on DEFDCDC{2,3}_LOW register. This need not always be the case (as is found on OMAP-L138 EVM). Unfortunately, software cannot read the status of DEFDCDC{2,3} pins. So, this information is passed through platform data depending on how the board is wired. Signed-off-by: Anuj Aggarwal Signed-off-by: Sekhar Nori Signed-off-by: Liam Girdwood --- drivers/regulator/tps6507x-regulator.c | 36 +++++++++++++++++++++++++++------- include/linux/regulator/tps6507x.h | 32 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 include/linux/regulator/tps6507x.h (limited to 'include') diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c index 14b4576281c5..8152d65220f5 100644 --- a/drivers/regulator/tps6507x-regulator.c +++ b/drivers/regulator/tps6507x-regulator.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -101,9 +102,12 @@ struct tps_info { unsigned max_uV; u8 table_len; const u16 *table; + + /* Does DCDC high or the low register defines output voltage? */ + bool defdcdc_default; }; -static const struct tps_info tps6507x_pmic_regs[] = { +static struct tps_info tps6507x_pmic_regs[] = { { .name = "VDCDC1", .min_uV = 725000, @@ -145,7 +149,7 @@ struct tps6507x_pmic { struct regulator_desc desc[TPS6507X_NUM_REGULATOR]; struct tps6507x_dev *mfd; struct regulator_dev *rdev[TPS6507X_NUM_REGULATOR]; - const struct tps_info *info[TPS6507X_NUM_REGULATOR]; + struct tps_info *info[TPS6507X_NUM_REGULATOR]; struct mutex io_lock; }; static inline int tps6507x_pmic_read(struct tps6507x_pmic *tps, u8 reg) @@ -341,10 +345,16 @@ static int tps6507x_pmic_dcdc_get_voltage(struct regulator_dev *dev) reg = TPS6507X_REG_DEFDCDC1; break; case TPS6507X_DCDC_2: - reg = TPS6507X_REG_DEFDCDC2_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC2_HIGH; + else + reg = TPS6507X_REG_DEFDCDC2_LOW; break; case TPS6507X_DCDC_3: - reg = TPS6507X_REG_DEFDCDC3_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC3_HIGH; + else + reg = TPS6507X_REG_DEFDCDC3_LOW; break; default: return -EINVAL; @@ -370,10 +380,16 @@ static int tps6507x_pmic_dcdc_set_voltage(struct regulator_dev *dev, reg = TPS6507X_REG_DEFDCDC1; break; case TPS6507X_DCDC_2: - reg = TPS6507X_REG_DEFDCDC2_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC2_HIGH; + else + reg = TPS6507X_REG_DEFDCDC2_LOW; break; case TPS6507X_DCDC_3: - reg = TPS6507X_REG_DEFDCDC3_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC3_HIGH; + else + reg = TPS6507X_REG_DEFDCDC3_LOW; break; default: return -EINVAL; @@ -532,7 +548,7 @@ int tps6507x_pmic_probe(struct platform_device *pdev) { struct tps6507x_dev *tps6507x_dev = dev_get_drvdata(pdev->dev.parent); static int desc_id; - const struct tps_info *info = &tps6507x_pmic_regs[0]; + struct tps_info *info = &tps6507x_pmic_regs[0]; struct regulator_init_data *init_data; struct regulator_dev *rdev; struct tps6507x_pmic *tps; @@ -569,6 +585,12 @@ int tps6507x_pmic_probe(struct platform_device *pdev) for (i = 0; i < TPS6507X_NUM_REGULATOR; i++, info++, init_data++) { /* Register the regulators */ tps->info[i] = info; + if (init_data->driver_data) { + struct tps6507x_reg_platform_data *data = + init_data->driver_data; + tps->info[i]->defdcdc_default = data->defdcdc_default; + } + tps->desc[i].name = info->name; tps->desc[i].id = desc_id++; tps->desc[i].n_voltages = num_voltages[i]; diff --git a/include/linux/regulator/tps6507x.h b/include/linux/regulator/tps6507x.h new file mode 100644 index 000000000000..4892f591bab1 --- /dev/null +++ b/include/linux/regulator/tps6507x.h @@ -0,0 +1,32 @@ +/* + * tps6507x.h -- Voltage regulation for the Texas Instruments TPS6507X + * + * Copyright (C) 2010 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef REGULATOR_TPS6507X +#define REGULATOR_TPS6507X + +/** + * tps6507x_reg_platform_data - platform data for tps6507x + * @defdcdc_default: Defines whether DCDC high or the low register controls + * output voltage by default. Valid for DCDC2 and DCDC3 outputs only. + */ +struct tps6507x_reg_platform_data { + bool defdcdc_default; +}; + +#endif -- cgit v1.2.3-73-gaa49b From de09a9771a5346029f4d11e4ac886be7f9bfdd75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Jul 2010 12:45:49 +0100 Subject: CRED: Fix get_task_cred() and task_state() to not resurrect dead credentials It's possible for get_task_cred() as it currently stands to 'corrupt' a set of credentials by incrementing their usage count after their replacement by the task being accessed. What happens is that get_task_cred() can race with commit_creds(): TASK_1 TASK_2 RCU_CLEANER -->get_task_cred(TASK_2) rcu_read_lock() __cred = __task_cred(TASK_2) -->commit_creds() old_cred = TASK_2->real_cred TASK_2->real_cred = ... put_cred(old_cred) call_rcu(old_cred) [__cred->usage == 0] get_cred(__cred) [__cred->usage == 1] rcu_read_unlock() -->put_cred_rcu() [__cred->usage == 1] panic() However, since a tasks credentials are generally not changed very often, we can reasonably make use of a loop involving reading the creds pointer and using atomic_inc_not_zero() to attempt to increment it if it hasn't already hit zero. If successful, we can safely return the credentials in the knowledge that, even if the task we're accessing has released them, they haven't gone to the RCU cleanup code. We then change task_state() in procfs to use get_task_cred() rather than calling get_cred() on the result of __task_cred(), as that suffers from the same problem. Without this change, a BUG_ON in __put_cred() or in put_cred_rcu() can be tripped when it is noticed that the usage count is not zero as it ought to be, for example: kernel BUG at kernel/cred.c:168! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/kernel/mm/ksm/run CPU 0 Pid: 2436, comm: master Not tainted 2.6.33.3-85.fc13.x86_64 #1 0HR330/OptiPlex 745 RIP: 0010:[] [] __put_cred+0xc/0x45 RSP: 0018:ffff88019e7e9eb8 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffff880161514480 RCX: 00000000ffffffff RDX: 00000000ffffffff RSI: ffff880140c690c0 RDI: ffff880140c690c0 RBP: ffff88019e7e9eb8 R08: 00000000000000d0 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000040 R12: ffff880140c690c0 R13: ffff88019e77aea0 R14: 00007fff336b0a5c R15: 0000000000000001 FS: 00007f12f50d97c0(0000) GS:ffff880007400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f461bc000 CR3: 00000001b26ce000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process master (pid: 2436, threadinfo ffff88019e7e8000, task ffff88019e77aea0) Stack: ffff88019e7e9ec8 ffffffff810698cd ffff88019e7e9ef8 ffffffff81069b45 <0> ffff880161514180 ffff880161514480 ffff880161514180 0000000000000000 <0> ffff88019e7e9f28 ffffffff8106aace 0000000000000001 0000000000000246 Call Trace: [] put_cred+0x13/0x15 [] commit_creds+0x16b/0x175 [] set_current_groups+0x47/0x4e [] sys_setgroups+0xf6/0x105 [] system_call_fastpath+0x16/0x1b Code: 48 8d 71 ff e8 7e 4e 15 00 85 c0 78 0b 8b 75 ec 48 89 df e8 ef 4a 15 00 48 83 c4 18 5b c9 c3 55 8b 07 8b 07 48 89 e5 85 c0 74 04 <0f> 0b eb fe 65 48 8b 04 25 00 cc 00 00 48 3b b8 58 04 00 00 75 RIP [] __put_cred+0xc/0x45 RSP ---[ end trace df391256a100ebdd ]--- Signed-off-by: David Howells Acked-by: Jiri Olsa Signed-off-by: Linus Torvalds --- fs/proc/array.c | 2 +- include/linux/cred.h | 21 +-------------------- kernel/cred.c | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index 9b58d38bc911..fff6572676ae 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -176,7 +176,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, if (tracer) tpid = task_pid_nr_ns(tracer, ns); } - cred = get_cred((struct cred *) __task_cred(p)); + cred = get_task_cred(p); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" diff --git a/include/linux/cred.h b/include/linux/cred.h index 75c0fa881308..ce40cbc791e2 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -153,6 +153,7 @@ struct cred { extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); extern int copy_creds(struct task_struct *, unsigned long); +extern const struct cred *get_task_cred(struct task_struct *); extern struct cred *cred_alloc_blank(void); extern struct cred *prepare_creds(void); extern struct cred *prepare_exec_creds(void); @@ -281,26 +282,6 @@ static inline void put_cred(const struct cred *_cred) #define __task_cred(task) \ ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held()))) -/** - * get_task_cred - Get another task's objective credentials - * @task: The task to query - * - * Get the objective credentials of a task, pinning them so that they can't go - * away. Accessing a task's credentials directly is not permitted. - * - * The caller must make sure task doesn't go away, either by holding a ref on - * task or by holding tasklist_lock to prevent it from being unlinked. - */ -#define get_task_cred(task) \ -({ \ - struct cred *__cred; \ - rcu_read_lock(); \ - __cred = (struct cred *) __task_cred((task)); \ - get_cred(__cred); \ - rcu_read_unlock(); \ - __cred; \ -}) - /** * get_current_cred - Get the current task's subjective credentials * diff --git a/kernel/cred.c b/kernel/cred.c index a2d5504fbcc2..60bc8b1e32e6 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -209,6 +209,31 @@ void exit_creds(struct task_struct *tsk) } } +/** + * get_task_cred - Get another task's objective credentials + * @task: The task to query + * + * Get the objective credentials of a task, pinning them so that they can't go + * away. Accessing a task's credentials directly is not permitted. + * + * The caller must also make sure task doesn't get deleted, either by holding a + * ref on task or by holding tasklist_lock to prevent it from being unlinked. + */ +const struct cred *get_task_cred(struct task_struct *task) +{ + const struct cred *cred; + + rcu_read_lock(); + + do { + cred = __task_cred((task)); + BUG_ON(!cred); + } while (!atomic_inc_not_zero(&((struct cred *)cred)->usage)); + + rcu_read_unlock(); + return cred; +} + /* * Allocate blank credentials, such that the credentials can be filled in at a * later date without risk of ENOMEM. -- cgit v1.2.3-73-gaa49b From 8f92054e7ca1d3a3ae50fb42d2253ac8730d9b2a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Jul 2010 12:45:55 +0100 Subject: CRED: Fix __task_cred()'s lockdep check and banner comment Fix __task_cred()'s lockdep check by removing the following validation condition: lockdep_tasklist_lock_is_held() as commit_creds() does not take the tasklist_lock, and nor do most of the functions that call it, so this check is pointless and it can prevent detection of the RCU lock not being held if the tasklist_lock is held. Instead, add the following validation condition: task->exit_state >= 0 to permit the access if the target task is dead and therefore unable to change its own credentials. Fix __task_cred()'s comment to: (1) discard the bit that says that the caller must prevent the target task from being deleted. That shouldn't need saying. (2) Add a comment indicating the result of __task_cred() should not be passed directly to get_cred(), but rather than get_task_cred() should be used instead. Also put a note into the documentation to enforce this point there too. Signed-off-by: David Howells Acked-by: Jiri Olsa Cc: Paul E. McKenney Signed-off-by: Linus Torvalds --- Documentation/credentials.txt | 3 +++ include/linux/cred.h | 15 ++++++++++----- include/linux/sched.h | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt index a2db35287003..995baf379c07 100644 --- a/Documentation/credentials.txt +++ b/Documentation/credentials.txt @@ -417,6 +417,9 @@ reference on them using: This does all the RCU magic inside of it. The caller must call put_cred() on the credentials so obtained when they're finished with. + [*] Note: The result of __task_cred() should not be passed directly to + get_cred() as this may race with commit_cred(). + There are a couple of convenience functions to access bits of another task's credentials, hiding the RCU magic from the caller: diff --git a/include/linux/cred.h b/include/linux/cred.h index ce40cbc791e2..4d2c39573f36 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -274,13 +274,18 @@ static inline void put_cred(const struct cred *_cred) * @task: The task to query * * Access the objective credentials of a task. The caller must hold the RCU - * readlock. + * readlock or the task must be dead and unable to change its own credentials. * - * The caller must make sure task doesn't go away, either by holding a ref on - * task or by holding tasklist_lock to prevent it from being unlinked. + * The result of this function should not be passed directly to get_cred(); + * rather get_task_cred() should be used instead. */ -#define __task_cred(task) \ - ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held()))) +#define __task_cred(task) \ + ({ \ + const struct task_struct *__t = (task); \ + rcu_dereference_check(__t->real_cred, \ + rcu_read_lock_held() || \ + task_is_dead(__t)); \ + }) /** * get_current_cred - Get the current task's subjective credentials diff --git a/include/linux/sched.h b/include/linux/sched.h index 747fcaedddb7..0478888c6899 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -214,6 +214,7 @@ extern char ___assert_task_state[1 - 2*!!( #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) +#define task_is_dead(task) ((task)->exit_state != 0) #define task_is_stopped_or_traced(task) \ ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ -- cgit v1.2.3-73-gaa49b From b608b283a962caaa280756bc8563016a71712acf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Jul 2010 15:31:54 -0400 Subject: NFS: kswapd must not block in nfs_release_page See https://bugzilla.kernel.org/show_bug.cgi?id=16056 If other processes are blocked waiting for kswapd to free up some memory so that they can make progress, then we cannot allow kswapd to block on those processes. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/file.c | 13 +++++++++++-- fs/nfs/write.c | 4 ++-- include/linux/nfs_fs.h | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 36a5e74f51b4..f036153d9f50 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -493,11 +494,19 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) */ static int nfs_release_page(struct page *page, gfp_t gfp) { + struct address_space *mapping = page->mapping; + dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if ((gfp & GFP_KERNEL) == GFP_KERNEL) - nfs_wb_page(page->mapping->host, page); + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + int how = FLUSH_SYNC; + + /* Don't let kswapd deadlock waiting for OOM RPC calls */ + if (current_is_kswapd()) + how = 0; + nfs_commit_inode(mapping->host, how); + } /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 91679e2631ee..0a6c65a1f9d7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1379,7 +1379,7 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; -static int nfs_commit_inode(struct inode *inode, int how) +int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int may_wait = how & FLUSH_SYNC; @@ -1443,7 +1443,7 @@ out_mark_dirty: return ret; } #else -static int nfs_commit_inode(struct inode *inode, int how) +int nfs_commit_inode(struct inode *inode, int how) { return 0; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 77c2ae53431c..f6e2455f13d1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -493,6 +493,7 @@ extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); #endif -- cgit v1.2.3-73-gaa49b From 77a63f3d1e0a3e7ede8d10f569e8481b13ff47c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Aug 2010 13:40:40 -0400 Subject: NFS: Fix a typo in include/linux/nfs_fs.h nfs_commit_inode() needs to be defined irrespectively of whether or not we are supporting NFSv3 and NFSv4. Allow the compiler to optimise away code in the NFSv2-only case by converting it into an inlined stub function. Reported-and-tested-by: Ingo Molnar Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 5 ----- include/linux/nfs_fs.h | 6 ++++++ 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bb72ad34d51d..9f81bdd91c55 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1454,11 +1454,6 @@ out_mark_dirty: return ret; } #else -int nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} - static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { return 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f6e2455f13d1..bad4d121b16e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -496,6 +496,12 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); +#else +static inline int +nfs_commit_inode(struct inode *inode, int how) +{ + return 0; +} #endif static inline int -- cgit v1.2.3-73-gaa49b