From ef217b1f08fdfe94cd75308f5487bc311640105d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 1 Nov 2017 10:45:43 -0700 Subject: drm/vmwgfx: constify vmw_fence_ops vmw_fence_ops are not supposed to change at runtime. Functions "dma_fence_init" working with const vmw_fence_ops provided by . So mark the non-const structs as const. Signed-off-by: Arvind Yadav Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 3bbad22b3748..d6b1c509ae01 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -224,7 +224,7 @@ out: return ret; } -static struct dma_fence_ops vmw_fence_ops = { +static const struct dma_fence_ops vmw_fence_ops = { .get_driver_name = vmw_fence_get_driver_name, .get_timeline_name = vmw_fence_get_timeline_name, .enable_signaling = vmw_fence_enable_signaling, -- cgit From cef75036c40408ba3bc308bcb00a3d440da713fc Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 1 Nov 2017 10:47:05 -0700 Subject: drm/vmwgfx: Fix Ubuntu 17.10 Wayland black screen issue This is an extension of Commit 7c20d213dd3c ("drm/vmwgfx: Work around mode set failure in 2D VMs") With Wayland desktop and atomic mode set, during the mode setting process there is a moment when two framebuffer sized surfaces are being pinned. This was not an issue with Xorg. Since this only happens during a mode change, there should be no performance impact by increasing allowable mem_size. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom Cc: stable@vger.kernel.org --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index e84fee3ec4f3..184340d486c3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -721,7 +721,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) * allocation taken by fbdev */ if (!(dev_priv->capabilities & SVGA_CAP_3D)) - mem_size *= 2; + mem_size *= 3; dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE; dev_priv->prim_bb_mem = -- cgit From f5ce817951f38023588b2b8308beca79abe20507 Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Wed, 1 Nov 2017 23:32:33 +0200 Subject: ALSA: usb-audio: support new Amanero Combo384 firmware version Support DSD_U32_BE sample format on new Amanero Combo384 firmware version on older VID/PID. Fixes: 3eff682d765b ("ALSA: usb-audio: Support both DSD LE/BE Amanero firmware versions") Signed-off-by: Jussi Laako Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4f5f18f22974..20624320b753 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1375,6 +1375,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x199: return SNDRV_PCM_FMTBIT_DSD_U32_LE; case 0x19b: + case 0x203: return SNDRV_PCM_FMTBIT_DSD_U32_BE; default: break; -- cgit From cb79a180f2e7eb51de5a4848652893197637bccb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Nov 2017 20:30:49 +0100 Subject: xfrm: defer daddr pointer assignment after spi parsing syzbot reports: BUG: KASAN: use-after-free in __xfrm_state_lookup+0x695/0x6b0 Read of size 4 at addr ffff8801d434e538 by task syzkaller647520/2991 [..] __xfrm_state_lookup+0x695/0x6b0 net/xfrm/xfrm_state.c:833 xfrm_state_lookup+0x8a/0x160 net/xfrm/xfrm_state.c:1592 xfrm_input+0x8e5/0x22f0 net/xfrm/xfrm_input.c:302 The use-after-free is the ipv4 destination address, which points to an skb head area that has been reallocated: pskb_expand_head+0x36b/0x1210 net/core/skbuff.c:1494 __pskb_pull_tail+0x14a/0x17c0 net/core/skbuff.c:1877 pskb_may_pull include/linux/skbuff.h:2102 [inline] xfrm_parse_spi+0x3d3/0x4d0 net/xfrm/xfrm_input.c:170 xfrm_input+0xce2/0x22f0 net/xfrm/xfrm_input.c:291 so the real bug is that xfrm_parse_spi() uses pskb_may_pull, but for now do smaller workaround that makes xfrm_input fetch daddr after spi parsing. Reported-by: syzbot Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 8ac9d32fb79d..1c6051cb7733 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -265,8 +265,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto lock; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; /* if tunnel is present override skb->mark value with tunnel i_key */ @@ -293,6 +291,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); do { if (skb->sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); -- cgit From cf37966751747727629fe51fd4a1d4edd8457c60 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 16:46:01 +0100 Subject: xfrm: do unconditional template resolution before pcpu cache check Stephen Smalley says: Since 4.14-rc1, the selinux-testsuite has been encountering sporadic failures during testing of labeled IPSEC. git bisect pointed to commit ec30d ("xfrm: add xdst pcpu cache"). The xdst pcpu cache is only checking that the policies are the same, but does not validate that the policy, state, and flow match with respect to security context labeling. As a result, the wrong SA could be used and the receiver could end up performing permission checking and providing SO_PEERSEC or SCM_SECURITY values for the wrong security context. This fix makes it so that we always do the template resolution, and then checks that the found states match those in the pcpu bundle. This has the disadvantage of doing a bit more work (lookup in state hash table) if we can reuse the xdst entry (we only avoid xdst alloc/free) but we don't add a lot of extra work in case we can't reuse. xfrm_pol_dead() check is removed, reasoning is that xfrm_tmpl_resolve does all needed checks. Cc: Paul Moore Fixes: ec30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache") Reported-by: Stephen Smalley Tested-by: Stephen Smalley Signed-off-by: Florian Westphal Acked-by: Paul Moore Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8cafb3c0a4ac..a2e531bf4f97 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1787,19 +1787,23 @@ void xfrm_policy_cache_flush(void) put_online_cpus(); } -static bool xfrm_pol_dead(struct xfrm_dst *xdst) +static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, + struct xfrm_state * const xfrm[], + int num) { - unsigned int num_pols = xdst->num_pols; - unsigned int pol_dead = 0, i; + const struct dst_entry *dst = &xdst->u.dst; + int i; - for (i = 0; i < num_pols; i++) - pol_dead |= xdst->pols[i]->walk.dead; + if (xdst->num_xfrms != num) + return false; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - if (pol_dead) - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + for (i = 0; i < num; i++) { + if (!dst || dst->xfrm != xfrm[i]) + return false; + dst = dst->child; + } - return pol_dead; + return xfrm_bundle_ok(xdst); } static struct xfrm_dst * @@ -1813,26 +1817,28 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, struct dst_entry *dst; int err; + /* Try to instantiate a bundle */ + err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); + if (err <= 0) { + if (err != 0 && err != -EAGAIN) + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); + return ERR_PTR(err); + } + xdst = this_cpu_read(xfrm_last_dst); if (xdst && xdst->u.dst.dev == dst_orig->dev && xdst->num_pols == num_pols && - !xfrm_pol_dead(xdst) && memcmp(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols) == 0 && - xfrm_bundle_ok(xdst)) { + xfrm_xdst_can_reuse(xdst, xfrm, err)) { dst_hold(&xdst->u.dst); + while (err > 0) + xfrm_state_put(xfrm[--err]); return xdst; } old = xdst; - /* Try to instantiate a bundle */ - err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); - if (err <= 0) { - if (err != 0 && err != -EAGAIN) - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); - return ERR_PTR(err); - } dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); if (IS_ERR(dst)) { -- cgit From c9f3f813d462c72dbe412cee6a5cbacf13c4ad5e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 2 Nov 2017 08:10:17 +0100 Subject: xfrm: Fix stack-out-of-bounds read in xfrm_state_find. When we do tunnel or beet mode, we pass saddr and daddr from the template to xfrm_state_find(), this is ok. On transport mode, we pass the addresses from the flowi, assuming that the IP addresses (and address family) don't change during transformation. This assumption is wrong in the IPv4 mapped IPv6 case, packet is IPv4 and template is IPv6. Fix this by using the addresses from the template unconditionally. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a2e531bf4f97..6eb228a70131 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1361,36 +1361,29 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct net *net = xp_net(policy); int nx; int i, error; - xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); - xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; - xfrm_address_t *remote = daddr; - xfrm_address_t *local = saddr; + xfrm_address_t *local; + xfrm_address_t *remote; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode == XFRM_MODE_TUNNEL || - tmpl->mode == XFRM_MODE_BEET) { - remote = &tmpl->id.daddr; - local = &tmpl->saddr; - if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, fl->flowi_oif, - &tmp, remote, - tmpl->encap_family, 0); - if (error) - goto fail; - local = &tmp; - } + remote = &tmpl->id.daddr; + local = &tmpl->saddr; + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, fl->flowi_oif, + &tmp, remote, + tmpl->encap_family, 0); + if (error) + goto fail; + local = &tmp; } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; - daddr = remote; - saddr = local; continue; } if (x) { -- cgit From 24de79e5008a928beb2c7ccc2396f15065613363 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 2 Nov 2017 19:26:22 +0530 Subject: cxgb4: update latest firmware version supported Change t4fw_version.h to update latest firmware version number to 1.16.63.0. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index f2d623a7aee0..123e2c1b65f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -37,7 +37,7 @@ #define T4FW_VERSION_MAJOR 0x01 #define T4FW_VERSION_MINOR 0x10 -#define T4FW_VERSION_MICRO 0x2D +#define T4FW_VERSION_MICRO 0x3F #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -46,7 +46,7 @@ #define T5FW_VERSION_MAJOR 0x01 #define T5FW_VERSION_MINOR 0x10 -#define T5FW_VERSION_MICRO 0x2D +#define T5FW_VERSION_MICRO 0x3F #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -55,7 +55,7 @@ #define T6FW_VERSION_MAJOR 0x01 #define T6FW_VERSION_MINOR 0x10 -#define T6FW_VERSION_MICRO 0x2D +#define T6FW_VERSION_MICRO 0x3F #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 -- cgit From 2b5ec1a5f9738ee7bf8f5ec0526e75e00362c48f Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Thu, 26 Oct 2017 16:57:05 +0800 Subject: netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed When run ipvs in two different network namespace at the same host, and one ipvs transport network traffic to the other network namespace ipvs. 'ipvs_property' flag will make the second ipvs take no effect. So we should clear 'ipvs_property' when SKB network namespace changed. Fixes: 621e84d6f373 ("dev: introduce skb_scrub_packet()") Signed-off-by: Ye Yin Signed-off-by: Wei Zhou Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++++ net/core/skbuff.c | 1 + 2 files changed, 8 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72299ef00061..d448a4804aea 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3770,6 +3770,13 @@ static inline void nf_reset_trace(struct sk_buff *skb) #endif } +static inline void ipvs_reset(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IP_VS) + skb->ipvs_property = 0; +#endif +} + /* Note: This doesn't put any conntrack and bridge info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 24656076906d..e140ba49b30a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4864,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) if (!xnet) return; + ipvs_reset(skb); skb_orphan(skb); skb->mark = 0; } -- cgit From baedf68a068ca29624f241426843635920f16e1d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 2 Nov 2017 21:26:59 +0100 Subject: net: usb: asix: fill null-ptr-deref in asix_suspend When asix_suspend() is called dev->driver_priv might not have been assigned a value, so we need to check that it's not NULL. Found by syzkaller. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc4-43422-geccacdd69a8c #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bb36300 task.stack: ffff88006bba8000 RIP: 0010:asix_suspend+0x76/0xc0 drivers/net/usb/asix_devices.c:629 RSP: 0018:ffff88006bbae718 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff880061ba3b80 RCX: 1ffff1000c34d644 RDX: 0000000000000001 RSI: 0000000000000402 RDI: 0000000000000008 RBP: ffff88006bbae738 R08: 1ffff1000d775cad R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800630a8b40 R13: 0000000000000000 R14: 0000000000000402 R15: ffff880061ba3b80 FS: 0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff33cf89000 CR3: 0000000061c0a000 CR4: 00000000000006f0 Call Trace: usb_suspend_interface drivers/usb/core/driver.c:1209 usb_suspend_both+0x27f/0x7e0 drivers/usb/core/driver.c:1314 usb_runtime_suspend+0x41/0x120 drivers/usb/core/driver.c:1852 __rpm_callback+0x339/0xb60 drivers/base/power/runtime.c:334 rpm_callback+0x106/0x220 drivers/base/power/runtime.c:461 rpm_suspend+0x465/0x1980 drivers/base/power/runtime.c:596 __pm_runtime_suspend+0x11e/0x230 drivers/base/power/runtime.c:1009 pm_runtime_put_sync_autosuspend ./include/linux/pm_runtime.h:251 usb_new_device+0xa37/0x1020 drivers/usb/core/hub.c:2487 hub_port_connect drivers/usb/core/hub.c:4903 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 worker_thread+0x221/0x1850 kernel/workqueue.c:2253 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: 8d 7c 24 20 48 89 fa 48 c1 ea 03 80 3c 02 00 75 5b 48 b8 00 00 00 00 00 fc ff df 4d 8b 6c 24 20 49 8d 7d 08 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 34 4d 8b 6d 08 4d 85 ed 74 0b e8 26 2b 51 fd 4c RIP: asix_suspend+0x76/0xc0 RSP: ffff88006bbae718 ---[ end trace dfc4f5649284342c ]--- Signed-off-by: Andrey Konovalov Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index b2ff88e69a81..743416be84f3 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->suspend) + if (priv && priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); -- cgit From d62d813c0d714a2d0aaf3d796a7a51ae60bf5470 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Fri, 3 Nov 2017 13:36:42 +0000 Subject: cpufreq: schedutil: Examine the correct CPU when we update util After commit 674e75411fc2 (sched: cpufreq: Allow remote cpufreq callbacks) we stopped to always read the utilization for the CPU we are running the governor on, and instead we read it for the CPU which we've been told has updated utilization. This is stored in sugov_cpu->cpu. The value is set in sugov_register() but we clear it in sugov_start() which leads to always looking at the utilization of CPU0 instead of the correct one. Fix this by consolidating the initialization code into sugov_start(). Fixes: 674e75411fc2 (sched: cpufreq: Allow remote cpufreq callbacks) Signed-off-by: Chris Redpath Reviewed-by: Patrick Bellasi Reviewed-by: Brendan Jackman Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9209d83ecdcf..ba0da243fdd8 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -649,6 +649,7 @@ static int sugov_start(struct cpufreq_policy *policy) struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); memset(sg_cpu, 0, sizeof(*sg_cpu)); + sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; @@ -714,11 +715,6 @@ struct cpufreq_governor *cpufreq_default_governor(void) static int __init sugov_register(void) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(sugov_cpu, cpu).cpu = cpu; - return cpufreq_register_governor(&schedutil_gov); } fs_initcall(sugov_register); -- cgit From 8f7dc9ae4a7aece9fbc3e6637bdfa38b36bcdf09 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 3 Nov 2017 16:49:00 +0100 Subject: l2tp: don't use l2tp_tunnel_find() in l2tp_ip and l2tp_ip6 Using l2tp_tunnel_find() in l2tp_ip_recv() is wrong for two reasons: * It doesn't take a reference on the returned tunnel, which makes the call racy wrt. concurrent tunnel deletion. * The lookup is only based on the tunnel identifier, so it can return a tunnel that doesn't match the packet's addresses or protocol. For example, a packet sent to an L2TPv3 over IPv6 tunnel can be delivered to an L2TPv2 over UDPv4 tunnel. This is worse than a simple cross-talk: when delivering the packet to an L2TP over UDP tunnel, the corresponding socket is UDP, where ->sk_backlog_rcv() is NULL. Calling sk_receive_skb() will then crash the kernel by trying to execute this callback. And l2tp_tunnel_find() isn't even needed here. __l2tp_ip_bind_lookup() properly checks the socket binding and connection settings. It was used as a fallback mechanism for finding tunnels that didn't have their data path registered yet. But it's not limited to this case and can be used to replace l2tp_tunnel_find() in the general case. Fix l2tp_ip6 in the same way. Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support") Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 24 +++++++++--------------- net/l2tp/l2tp_ip6.c | 24 +++++++++--------------- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4d322c1b7233..e4280b6568b4 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct iphdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -178,24 +179,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct iphdr *iph = (struct iphdr *) skb_network_header(skb); - - read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, - inet_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip_lock); - goto discard; - } + iph = (struct iphdr *)skb_network_header(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip_lock); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb), + tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip_lock); if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 88b397c30d86..8bcaa975b432 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct ipv6hdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -192,24 +193,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct ipv6hdr *iph = ipv6_hdr(skb); - - read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, - inet6_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip6_lock); - goto discard; - } + iph = ipv6_hdr(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip6_lock); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip6_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip6_lock); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; -- cgit From d09b9e60e06d431b008a878c4b1d48d6cce816ef Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Fri, 3 Nov 2017 17:46:55 -0700 Subject: tcp: fix DSACK-based undo on non-duplicate ACK Fixes DSACK-based undo when sender is in Open State and an ACK advances snd_una. Example scenario: - Sender goes into recovery and makes some spurious rtx. - It comes out of recovery and enters into open state. - It sends some more packets, let's say 4. - The receiver sends an ACK for the first two, but this ACK is lost. - The sender receives ack for first two, and DSACK for previous spurious rtx. Signed-off-by: Priyaranjan Jha Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Yousuk Seung Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5a87a00641d3..b2fc7163bd40 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -115,7 +115,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) -#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) +#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) -- cgit From 9b7d869ee5a77ed4a462372bb89af622e705bfb8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 5 Nov 2017 10:07:43 +0100 Subject: ALSA: timer: Limit max instances per timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we allow unlimited number of timer instances, and it may bring the system hogging way too much CPU when too many timer instances are opened and processed concurrently. This may end up with a soft-lockup report as triggered by syzkaller, especially when hrtimer backend is deployed. Since such insane number of instances aren't demanded by the normal use case of ALSA sequencer and it merely opens a risk only for abuse, this patch introduces the upper limit for the number of instances per timer backend. As default, it's set to 1000, but for the fine-grained timer like hrtimer, it's set to 100. Reported-by: syzbot Tested-by: Jérôme Glisse Cc: Signed-off-by: Takashi Iwai --- include/sound/timer.h | 2 ++ sound/core/hrtimer.c | 1 + sound/core/timer.c | 67 +++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/include/sound/timer.h b/include/sound/timer.h index c4d76ff056c6..7ae226ab6990 100644 --- a/include/sound/timer.h +++ b/include/sound/timer.h @@ -90,6 +90,8 @@ struct snd_timer { struct list_head ack_list_head; struct list_head sack_list_head; /* slow ack list head */ struct tasklet_struct task_queue; + int max_instances; /* upper limit of timer instances */ + int num_instances; /* current number of timer instances */ }; struct snd_timer_instance { diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index 1ac0c423903e..6e47b823bcaa 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -159,6 +159,7 @@ static int __init snd_hrtimer_init(void) timer->hw = hrtimer_hw; timer->hw.resolution = resolution; timer->hw.ticks = NANO_SEC / resolution; + timer->max_instances = 100; /* lower the limit */ err = snd_timer_global_register(timer); if (err < 0) { diff --git a/sound/core/timer.c b/sound/core/timer.c index 6cdd04a45962..15e82a656d96 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -180,7 +180,7 @@ static void snd_timer_request(struct snd_timer_id *tid) * * call this with register_mutex down. */ -static void snd_timer_check_slave(struct snd_timer_instance *slave) +static int snd_timer_check_slave(struct snd_timer_instance *slave) { struct snd_timer *timer; struct snd_timer_instance *master; @@ -190,16 +190,21 @@ static void snd_timer_check_slave(struct snd_timer_instance *slave) list_for_each_entry(master, &timer->open_list_head, open_list) { if (slave->slave_class == master->slave_class && slave->slave_id == master->slave_id) { + if (master->timer->num_instances >= + master->timer->max_instances) + return -EBUSY; list_move_tail(&slave->open_list, &master->slave_list_head); + master->timer->num_instances++; spin_lock_irq(&slave_active_lock); slave->master = master; slave->timer = master->timer; spin_unlock_irq(&slave_active_lock); - return; + return 0; } } } + return 0; } /* @@ -208,7 +213,7 @@ static void snd_timer_check_slave(struct snd_timer_instance *slave) * * call this with register_mutex down. */ -static void snd_timer_check_master(struct snd_timer_instance *master) +static int snd_timer_check_master(struct snd_timer_instance *master) { struct snd_timer_instance *slave, *tmp; @@ -216,7 +221,11 @@ static void snd_timer_check_master(struct snd_timer_instance *master) list_for_each_entry_safe(slave, tmp, &snd_timer_slave_list, open_list) { if (slave->slave_class == master->slave_class && slave->slave_id == master->slave_id) { + if (master->timer->num_instances >= + master->timer->max_instances) + return -EBUSY; list_move_tail(&slave->open_list, &master->slave_list_head); + master->timer->num_instances++; spin_lock_irq(&slave_active_lock); spin_lock(&master->timer->lock); slave->master = master; @@ -228,8 +237,11 @@ static void snd_timer_check_master(struct snd_timer_instance *master) spin_unlock_irq(&slave_active_lock); } } + return 0; } +static int snd_timer_close_locked(struct snd_timer_instance *timeri); + /* * open a timer instance * when opening a master, the slave id must be here given. @@ -240,6 +252,7 @@ int snd_timer_open(struct snd_timer_instance **ti, { struct snd_timer *timer; struct snd_timer_instance *timeri = NULL; + int err; if (tid->dev_class == SNDRV_TIMER_CLASS_SLAVE) { /* open a slave instance */ @@ -259,10 +272,14 @@ int snd_timer_open(struct snd_timer_instance **ti, timeri->slave_id = tid->device; timeri->flags |= SNDRV_TIMER_IFLG_SLAVE; list_add_tail(&timeri->open_list, &snd_timer_slave_list); - snd_timer_check_slave(timeri); + err = snd_timer_check_slave(timeri); + if (err < 0) { + snd_timer_close_locked(timeri); + timeri = NULL; + } mutex_unlock(®ister_mutex); *ti = timeri; - return 0; + return err; } /* open a master instance */ @@ -288,6 +305,10 @@ int snd_timer_open(struct snd_timer_instance **ti, return -EBUSY; } } + if (timer->num_instances >= timer->max_instances) { + mutex_unlock(®ister_mutex); + return -EBUSY; + } timeri = snd_timer_instance_new(owner, timer); if (!timeri) { mutex_unlock(®ister_mutex); @@ -314,25 +335,27 @@ int snd_timer_open(struct snd_timer_instance **ti, } list_add_tail(&timeri->open_list, &timer->open_list_head); - snd_timer_check_master(timeri); + timer->num_instances++; + err = snd_timer_check_master(timeri); + if (err < 0) { + snd_timer_close_locked(timeri); + timeri = NULL; + } mutex_unlock(®ister_mutex); *ti = timeri; - return 0; + return err; } EXPORT_SYMBOL(snd_timer_open); /* * close a timer instance + * call this with register_mutex down. */ -int snd_timer_close(struct snd_timer_instance *timeri) +static int snd_timer_close_locked(struct snd_timer_instance *timeri) { struct snd_timer *timer = NULL; struct snd_timer_instance *slave, *tmp; - if (snd_BUG_ON(!timeri)) - return -ENXIO; - - mutex_lock(®ister_mutex); list_del(&timeri->open_list); /* force to stop the timer */ @@ -340,6 +363,7 @@ int snd_timer_close(struct snd_timer_instance *timeri) timer = timeri->timer; if (timer) { + timer->num_instances--; /* wait, until the active callback is finished */ spin_lock_irq(&timer->lock); while (timeri->flags & SNDRV_TIMER_IFLG_CALLBACK) { @@ -355,6 +379,7 @@ int snd_timer_close(struct snd_timer_instance *timeri) list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) { list_move_tail(&slave->open_list, &snd_timer_slave_list); + timer->num_instances--; slave->master = NULL; slave->timer = NULL; list_del_init(&slave->ack_list); @@ -382,9 +407,24 @@ int snd_timer_close(struct snd_timer_instance *timeri) module_put(timer->module); } - mutex_unlock(®ister_mutex); return 0; } + +/* + * close a timer instance + */ +int snd_timer_close(struct snd_timer_instance *timeri) +{ + int err; + + if (snd_BUG_ON(!timeri)) + return -ENXIO; + + mutex_lock(®ister_mutex); + err = snd_timer_close_locked(timeri); + mutex_unlock(®ister_mutex); + return err; +} EXPORT_SYMBOL(snd_timer_close); unsigned long snd_timer_resolution(struct snd_timer_instance *timeri) @@ -856,6 +896,7 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid, spin_lock_init(&timer->lock); tasklet_init(&timer->task_queue, snd_timer_tasklet, (unsigned long)timer); + timer->max_instances = 1000; /* default limit per timer */ if (card != NULL) { timer->module = card->module; err = snd_device_new(card, SNDRV_DEV_TIMER, timer, &ops); -- cgit From b9dd05c7002ee0ca8b676428b2268c26399b5e31 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 2 Nov 2017 18:44:28 +0100 Subject: ARM: 8720/1: ensure dump_instr() checks addr_limit When CONFIG_DEBUG_USER is enabled, it's possible for a user to deliberately trigger dump_instr() with a chosen kernel address. Let's avoid problems resulting from this by using get_user() rather than __get_user(), ensuring that we don't erroneously access kernel memory. So that we can use the same code to dump user instructions and kernel instructions, the common dumping code is factored out to __dump_instr(), with the fs manipulated appropriately in dump_instr() around calls to this. Signed-off-by: Mark Rutland Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 948c648fea00..0fcd82f01388 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -154,30 +154,26 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, set_fs(fs); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); const int thumb = thumb_mode(regs); const int width = thumb ? 4 : 8; - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * Note that we now dump the code first, just in case the backtrace + * kills us. */ - fs = get_fs(); - set_fs(KERNEL_DS); for (i = -4; i < 1 + !!thumb; i++) { unsigned int val, bad; if (thumb) - bad = __get_user(val, &((u16 *)addr)[i]); + bad = get_user(val, &((u16 *)addr)[i]); else - bad = __get_user(val, &((u32 *)addr)[i]); + bad = get_user(val, &((u32 *)addr)[i]); if (!bad) p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ", @@ -188,8 +184,20 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + mm_segment_t fs; + + if (!user_mode(regs)) { + fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } #ifdef CONFIG_ARM_UNWIND -- cgit From 3510c7aa069aa83a2de6dab2b41401a198317bdc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 6 Nov 2017 20:16:50 +0100 Subject: ALSA: seq: Avoid invalid lockdep class warning The recent fix for adding rwsem nesting annotation was using the given "hop" argument as the lock subclass key. Although the idea itself works, it may trigger a kernel warning like: BUG: looking up invalid subclass: 8 .... since the lockdep has a smaller number of subclasses (8) than we currently allow for the hops there (10). The current definition is merely a sanity check for avoiding the too deep delivery paths, and the 8 hops are already enough. So, as a quick fix, just follow the max hops as same as the max lockdep subclasses. Fixes: 1f20f9ff57ca ("ALSA: seq: Fix nested rwsem annotation for lockdep splat") Reported-by: syzbot Cc: Signed-off-by: Takashi Iwai --- include/sound/seq_kernel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/sound/seq_kernel.h b/include/sound/seq_kernel.h index feb58d455560..4b9ee3009aa0 100644 --- a/include/sound/seq_kernel.h +++ b/include/sound/seq_kernel.h @@ -49,7 +49,8 @@ typedef union snd_seq_timestamp snd_seq_timestamp_t; #define SNDRV_SEQ_DEFAULT_CLIENT_EVENTS 200 /* max delivery path length */ -#define SNDRV_SEQ_MAX_HOPS 10 +/* NOTE: this shouldn't be greater than MAX_LOCKDEP_SUBCLASSES */ +#define SNDRV_SEQ_MAX_HOPS 8 /* max size of event size */ #define SNDRV_SEQ_MAX_EVENT_LEN 0x3fffffff -- cgit From 71630b7a832f699d6a6764ae75797e4e743ae348 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Nov 2017 23:56:57 +0100 Subject: ACPI / PM: Blacklist Low Power S0 Idle _DSM for Dell XPS13 9360 At least one Dell XPS13 9360 is reported to have serious issues with the Low Power S0 Idle _DSM interface and since this machine model generally can do ACPI S3 just fine, add a blacklist entry to disable that interface for Dell XPS13 9360. Fixes: 8110dd281e15 (ACPI / sleep: EC-based wakeup from suspend-to-idle on recent systems) Link: https://bugzilla.kernel.org/show_bug.cgi?id=196907 Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Rafael J. Wysocki Cc: 4.13+ # 4.13+ --- drivers/acpi/sleep.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6804ddab3052..8082871b409a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -160,6 +160,14 @@ static int __init init_nvs_nosave(const struct dmi_system_id *d) return 0; } +static bool acpi_sleep_no_lps0; + +static int __init init_no_lps0(const struct dmi_system_id *d) +{ + acpi_sleep_no_lps0 = true; + return 0; +} + static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { { .callback = init_old_suspend_ordering, @@ -343,6 +351,19 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "80E3"), }, }, + /* + * https://bugzilla.kernel.org/show_bug.cgi?id=196907 + * Some Dell XPS13 9360 cannot do suspend-to-idle using the Low Power + * S0 Idle firmware interface. + */ + { + .callback = init_no_lps0, + .ident = "Dell XPS13 9360", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"), + }, + }, {}, }; @@ -485,6 +506,7 @@ static void acpi_pm_end(void) } #else /* !CONFIG_ACPI_SLEEP */ #define acpi_target_sleep_state ACPI_STATE_S0 +#define acpi_sleep_no_lps0 (false) static inline void acpi_sleep_dmi_check(void) {} #endif /* CONFIG_ACPI_SLEEP */ @@ -863,6 +885,12 @@ static int lps0_device_attach(struct acpi_device *adev, if (lps0_device_handle) return 0; + if (acpi_sleep_no_lps0) { + acpi_handle_info(adev->handle, + "Low Power S0 Idle interface disabled\n"); + return 0; + } + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) return 0; -- cgit From 76ce7cfe35ef58f34e6ba85327afb5fbf6c3ff9b Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 27 Oct 2017 20:11:00 -0400 Subject: x86/smpboot: Make optimization of delay calibration work correctly If the TSC has constant frequency then the delay calibration can be skipped when it has been calibrated for a package already. This is checked in calibrate_delay_is_known(), but that function is buggy in two aspects: It returns 'false' if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC) which is obviously the reverse of the intended check and the check for the sibling mask cannot work either because the topology links have not been set up yet. Correct the condition and move the call to set_cpu_sibling_map() before invoking calibrate_delay() so the sibling check works correctly. [ tglx: Rewrote changelong ] Fixes: c25323c07345 ("x86/tsc: Use topology functions") Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: bob.picco@oracle.com Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171028001100.26603-1-pasha.tatashin@oracle.com --- arch/x86/kernel/smpboot.c | 11 ++++++----- arch/x86/kernel/tsc.c | 8 +++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ad59edd84de7..65a0ccdc3050 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -193,6 +193,12 @@ static void smp_callin(void) */ smp_store_cpu_info(cpuid); + /* + * The topology information must be up to date before + * calibrate_delay() and notify_cpu_starting(). + */ + set_cpu_sibling_map(raw_smp_processor_id()); + /* * Get our bogomips. * Update loops_per_jiffy in cpu_data. Previous call to @@ -203,11 +209,6 @@ static void smp_callin(void) cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); - /* - * This must be done before setting cpu_online_mask - * or calling notify_cpu_starting. - */ - set_cpu_sibling_map(raw_smp_processor_id()); wmb(); notify_cpu_starting(cpuid); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 796d96bb0821..ad2b925a808e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1346,12 +1346,10 @@ void __init tsc_init(void) unsigned long calibrate_delay_is_known(void) { int sibling, cpu = smp_processor_id(); - struct cpumask *mask = topology_core_cpumask(cpu); + int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); + const struct cpumask *mask = topology_core_cpumask(cpu); - if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - - if (!mask) + if (tsc_disabled || !constant_tsc || !mask) return 0; sibling = cpumask_any_but(mask, cpu); -- cgit From 132d358b183ac6ad8b3fea32ad5e0663456d18d1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 7 Nov 2017 16:05:24 +0100 Subject: ALSA: seq: Fix OSS sysex delivery in OSS emulation The SYSEX event delivery in OSS sequencer emulation assumed that the event is encoded in the variable-length data with the straight buffering. This was the normal behavior in the past, but during the development, the chained buffers were introduced for carrying more data, while the OSS code was left intact. As a result, when a SYSEX event with the chained buffer data is passed to OSS sequencer port, it may end up with the wrong memory access, as if it were having a too large buffer. This patch addresses the bug, by applying the buffer data expansion by the generic snd_seq_dump_var_event() helper function. Reported-by: syzbot Reported-by: Mark Salyzyn Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_midi.c | 4 +--- sound/core/seq/oss/seq_oss_readq.c | 29 +++++++++++++++++++++++++++++ sound/core/seq/oss/seq_oss_readq.h | 2 ++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index aaff9ee32695..b30b2139e3f0 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -612,9 +612,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq if (!dp->timer->running) len = snd_seq_oss_timer_start(dp->timer); if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { - if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) == SNDRV_SEQ_EVENT_LENGTH_VARIABLE) - snd_seq_oss_readq_puts(dp->readq, mdev->seq_device, - ev->data.ext.ptr, ev->data.ext.len); + snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev); } else { len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev); if (len > 0) diff --git a/sound/core/seq/oss/seq_oss_readq.c b/sound/core/seq/oss/seq_oss_readq.c index 046cb586fb2f..06b21226b4e7 100644 --- a/sound/core/seq/oss/seq_oss_readq.c +++ b/sound/core/seq/oss/seq_oss_readq.c @@ -117,6 +117,35 @@ snd_seq_oss_readq_puts(struct seq_oss_readq *q, int dev, unsigned char *data, in return 0; } +/* + * put MIDI sysex bytes; the event buffer may be chained, thus it has + * to be expanded via snd_seq_dump_var_event(). + */ +struct readq_sysex_ctx { + struct seq_oss_readq *readq; + int dev; +}; + +static int readq_dump_sysex(void *ptr, void *buf, int count) +{ + struct readq_sysex_ctx *ctx = ptr; + + return snd_seq_oss_readq_puts(ctx->readq, ctx->dev, buf, count); +} + +int snd_seq_oss_readq_sysex(struct seq_oss_readq *q, int dev, + struct snd_seq_event *ev) +{ + struct readq_sysex_ctx ctx = { + .readq = q, + .dev = dev + }; + + if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) + return 0; + return snd_seq_dump_var_event(ev, readq_dump_sysex, &ctx); +} + /* * copy an event to input queue: * return zero if enqueued diff --git a/sound/core/seq/oss/seq_oss_readq.h b/sound/core/seq/oss/seq_oss_readq.h index f1463f1f449e..8d033ca2d23f 100644 --- a/sound/core/seq/oss/seq_oss_readq.h +++ b/sound/core/seq/oss/seq_oss_readq.h @@ -44,6 +44,8 @@ void snd_seq_oss_readq_delete(struct seq_oss_readq *q); void snd_seq_oss_readq_clear(struct seq_oss_readq *readq); unsigned int snd_seq_oss_readq_poll(struct seq_oss_readq *readq, struct file *file, poll_table *wait); int snd_seq_oss_readq_puts(struct seq_oss_readq *readq, int dev, unsigned char *data, int len); +int snd_seq_oss_readq_sysex(struct seq_oss_readq *q, int dev, + struct snd_seq_event *ev); int snd_seq_oss_readq_put_event(struct seq_oss_readq *readq, union evrec *ev); int snd_seq_oss_readq_put_timestamp(struct seq_oss_readq *readq, unsigned long curt, int seq_mode); int snd_seq_oss_readq_pick(struct seq_oss_readq *q, union evrec *rec); -- cgit From a817e73fe693f0718b6210f4b959478877fb2e2f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 7 Nov 2017 09:04:32 -0800 Subject: Revert "scsi: make 'state' device attribute pollable" This reverts commit 8a97712e5314aefe16b3ffb4583a34deaa49de04. This commit added a call to sysfs_notify() from within scsi_device_set_state(), which in turn turns out to make libata very unhappy, because ata_eh_detach_dev() does spin_lock_irqsave(ap->lock, flags); .. if (ata_scsi_offline_dev(dev)) { dev->flags |= ATA_DFLAG_DETACHED; ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; } and ata_scsi_offline_dev() then does that scsi_device_set_state() to set it offline. So now we called sysfs_notify() from within a spinlocked region, which really doesn't work. The 0day robot reported this as: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:238 because sysfs_notify() ends up calling kernfs_find_and_get_ns() which then does mutex_lock(&kernfs_mutex).. The pollability of the device state isn't critical, so revert this all for now, and maybe we'll do it differently in the future. Reported-by: Fengguang Wu Acked-by: Tejun Heo Acked-by: Martin K. Petersen Acked-by: Hannes Reinecke Signed-off-by: Linus Torvalds --- drivers/scsi/scsi_lib.c | 3 --- drivers/scsi/scsi_transport_srp.c | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ad3ea24f0885..bcc1694cebcd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2685,7 +2685,6 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) } sdev->sdev_state = state; - sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state"); return 0; illegal: @@ -3109,7 +3108,6 @@ int scsi_internal_device_unblock_nowait(struct scsi_device *sdev, case SDEV_BLOCK: case SDEV_TRANSPORT_OFFLINE: sdev->sdev_state = new_state; - sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state"); break; case SDEV_CREATED_BLOCK: if (new_state == SDEV_TRANSPORT_OFFLINE || @@ -3117,7 +3115,6 @@ int scsi_internal_device_unblock_nowait(struct scsi_device *sdev, sdev->sdev_state = new_state; else sdev->sdev_state = SDEV_CREATED; - sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state"); break; case SDEV_CANCEL: case SDEV_OFFLINE: diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 4f6f01cf9968..36f6190931bc 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -556,11 +556,8 @@ int srp_reconnect_rport(struct srp_rport *rport) */ shost_for_each_device(sdev, shost) { mutex_lock(&sdev->state_mutex); - if (sdev->sdev_state == SDEV_OFFLINE) { + if (sdev->sdev_state == SDEV_OFFLINE) sdev->sdev_state = SDEV_RUNNING; - sysfs_notify(&sdev->sdev_gendev.kobj, - NULL, "state"); - } mutex_unlock(&sdev->state_mutex); } } else if (rport->state == SRP_RPORT_RUNNING) { -- cgit From fbc3edf7d7731d7a22c483c679700589bab936a3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 7 Nov 2017 17:37:24 +0100 Subject: drivers/ide-cd: Handle missing driver data during status check gracefully The 0day bot reports the below failure which happens occasionally, with their randconfig testing (once every ~100 boots). The Code points at the private pointer ->driver_data being NULL, which hints at a race of sorts where the private driver_data descriptor has disappeared by the time we get to run the workqueue. So let's check that pointer before we continue with issuing the command to the drive. This fix is of the brown paper bag nature but considering that IDE is long deprecated, let's do that so that random testing which happens to enable CONFIG_IDE during randconfig builds, doesn't fail because of this. Besides, failing the TEST_UNIT_READY command because the drive private data is gone is something which we could simply do anyway, to denote that there was a problem communicating with the device. BUG: unable to handle kernel NULL pointer dereference at 000001c0 IP: cdrom_check_status *pde = 00000000 Oops: 0000 [#1] SMP CPU: 1 PID: 155 Comm: kworker/1:2 Not tainted 4.14.0-rc8 #127 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Workqueue: events_freezable_power_ disk_events_workfn task: 4fe90980 task.stack: 507ac000 EIP: cdrom_check_status+0x2c/0x90 EFLAGS: 00210246 CPU: 1 EAX: 00000000 EBX: 4fefec00 ECX: 00000000 EDX: 00000000 ESI: 00000003 EDI: ffffffff EBP: 467a9340 ESP: 507aded0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 CR0: 80050033 CR2: 000001c0 CR3: 06e0f000 CR4: 00000690 Call Trace: ? ide_cdrom_check_events_real ? cdrom_check_events ? disk_check_events ? process_one_work ? process_one_work ? worker_thread ? kthread ? process_one_work ? __kthread_create_on_node ? ret_from_fork Code: 53 83 ec 14 89 c3 89 d1 be 03 00 00 00 65 a1 14 00 00 00 89 44 24 10 31 c0 8b 43 18 c7 44 24 04 00 00 00 00 c7 04 24 00 00 00 00 <8a> 80 c0 01 00 00 c7 44 24 08 00 00 00 00 83 e0 03 c7 44 24 0c EIP: cdrom_check_status+0x2c/0x90 SS:ESP: 0068:507aded0 CR2: 00000000000001c0 ---[ end trace 2410e586dd8f88b2 ]--- Reported-and-tested-by: Fengguang Wu Signed-off-by: Borislav Petkov Cc: "David S. Miller" Cc: Jens Axboe Cc: Bart Van Assche Signed-off-by: Linus Torvalds --- drivers/ide/ide-cd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index a7355ab3bb22..6ff0be8cbdc9 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -867,11 +867,16 @@ static void msf_from_bcd(struct atapi_msf *msf) int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) { struct cdrom_info *info = drive->driver_data; - struct cdrom_device_info *cdi = &info->devinfo; + struct cdrom_device_info *cdi; unsigned char cmd[BLK_MAX_CDB]; ide_debug_log(IDE_DBG_FUNC, "enter"); + if (!info) + return -EIO; + + cdi = &info->devinfo; + memset(cmd, 0, BLK_MAX_CDB); cmd[0] = GPCMD_TEST_UNIT_READY; -- cgit From ea4b3afe1eac8f88bb453798a084fba47a1f155a Mon Sep 17 00:00:00 2001 From: Jaedon Shin Date: Fri, 16 Jun 2017 20:03:01 +0900 Subject: MIPS: BMIPS: Fix missing cbr address Fix NULL pointer access in BMIPS3300 RAC flush. Fixes: 738a3f79027b ("MIPS: BMIPS: Add early CPU initialization code") Signed-off-by: Jaedon Shin Reviewed-by: Florian Fainelli Cc: Kevin Cernekee Cc: linux-mips@linux-mips.org Cc: # 4.7+ Patchwork: https://patchwork.linux-mips.org/patch/16423/ Signed-off-by: James Hogan --- arch/mips/kernel/smp-bmips.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 406072e26752..87dcac2447c8 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -591,11 +591,11 @@ void __init bmips_cpu_setup(void) /* Flush and enable RAC */ cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); - __raw_writel(cfg | 0x100, BMIPS_RAC_CONFIG); + __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG); __raw_readl(cbr + BMIPS_RAC_CONFIG); cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); - __raw_writel(cfg | 0xf, BMIPS_RAC_CONFIG); + __raw_writel(cfg | 0xf, cbr + BMIPS_RAC_CONFIG); __raw_readl(cbr + BMIPS_RAC_CONFIG); cfg = __raw_readl(cbr + BMIPS_RAC_ADDRESS_RANGE); -- cgit From c18475a41a13504928fc57e423f42e264844c5ad Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Nov 2017 18:37:38 -0600 Subject: MAINTAINERS: Remove Stephen Bates as Microsemi Switchtec maintainer Just sent an email there and received an autoreply because he no longer works there. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4b41b6..e981225f74f4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10314,7 +10314,6 @@ F: drivers/pci/host/vmd.c PCI DRIVER FOR MICROSEMI SWITCHTEC M: Kurt Schwemmer -M: Stephen Bates M: Logan Gunthorpe L: linux-pci@vger.kernel.org S: Maintained -- cgit From e806fa6bad186d7d353509b5a909abcf3a1e968b Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni Date: Tue, 7 Nov 2017 18:37:54 -0600 Subject: MAINTAINERS: Remove Gabriele Paoloni as HiSilicon PCI maintainer Gabriele is now moving to a different role, so remove him as HiSilicon PCI maintainer. Signed-off-by: Gabriele Paoloni [bhelgaas: Thanks for all your help, Gabriele, and best wishes!] Signed-off-by: Bjorn Helgaas Acked-by: Zhou Wang --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e981225f74f4..db412a627d96 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10448,7 +10448,6 @@ F: drivers/pci/host/pci-thunder-* PCIE DRIVER FOR HISILICON M: Zhou Wang -M: Gabriele Paoloni L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/hisilicon-pcie.txt -- cgit From 13c249a94f525fe4c757d28854049780b25605c4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 4 Nov 2017 12:33:47 +0000 Subject: net: mvpp2: Prevent userspace from changing TX affinities The mvpp2 driver can't cope at all with the TX affinities being changed from userspace, and spit an endless stream of [ 91.779920] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.779930] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780402] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780406] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780415] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780418] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing rendering the box completely useless (I've measured around 600k interrupts/s on a 8040 box) once irqbalance kicks in and start doing its job. Obviously, the driver was never designed with this in mind. So let's work around the problem by preventing userspace from interacting with these interrupts altogether. Signed-off-by: Marc Zyngier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index a37af5813f33..fcf9ba5eb8d1 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6747,6 +6747,9 @@ static int mvpp2_irqs_init(struct mvpp2_port *port) for (i = 0; i < port->nqvecs; i++) { struct mvpp2_queue_vector *qv = port->qvecs + i; + if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) + irq_set_status_flags(qv->irq, IRQ_NO_BALANCING); + err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv); if (err) goto err; @@ -6776,6 +6779,7 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) struct mvpp2_queue_vector *qv = port->qvecs + i; irq_set_affinity_hint(qv->irq, NULL); + irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING); free_irq(qv->irq, qv); } } -- cgit From 39a4b86f0de4ce5024985a56fc39b16194b04313 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 4 Nov 2017 22:54:53 -0500 Subject: net/mlx5e/core/en_fs: fix pointer dereference after free in mlx5e_execute_l2_action hn is being kfree'd in mlx5e_del_l2_from_hash and then dereferenced by accessing hn->ai.addr Fix this by copying the MAC address into a local variable for its safe use in all possible execution paths within function mlx5e_execute_l2_action. Addresses-Coverity-ID: 1417789 Fixes: eeb66cdb6826 ("net/mlx5: Separate between E-Switch and MPFS") Signed-off-by: Gustavo A. R. Silva Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 850cdc980ab5..4837045ffba3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -365,21 +365,24 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, struct mlx5e_l2_hash_node *hn) { u8 action = hn->action; + u8 mac_addr[ETH_ALEN]; int l2_err = 0; + ether_addr_copy(mac_addr, hn->ai.addr); + switch (action) { case MLX5E_ACTION_ADD: mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); - if (!is_multicast_ether_addr(hn->ai.addr)) { - l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr); + if (!is_multicast_ether_addr(mac_addr)) { + l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr); hn->mpfs = !l2_err; } hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: - if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs) - l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr); + if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr); mlx5e_del_l2_flow_rule(priv, &hn->ai); mlx5e_del_l2_from_hash(hn); break; @@ -387,7 +390,7 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, if (l2_err) netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", - action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err); + action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err); } static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) -- cgit From cdea6a30c2689cc33b34c6691b57cca277f0c5dc Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 7 Nov 2017 16:19:24 -0800 Subject: Input: elan_i2c - add ELAN060C to the ACPI table ELAN060C touchpad uses elan_i2c as its driver. It can be found on Lenovo ideapad 320-14AST. BugLink: https://bugs.launchpad.net/bugs/1727544 Signed-off-by: Kai-Heng Feng Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 6d6b092e2da9..d6135900da64 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1258,6 +1258,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0605", 0 }, { "ELAN0609", 0 }, { "ELAN060B", 0 }, + { "ELAN060C", 0 }, { "ELAN0611", 0 }, { "ELAN1000", 0 }, { } -- cgit From eca253912f734927005500da7d646536064b3ed3 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Tue, 7 Nov 2017 16:20:49 -0800 Subject: Input: tsc200x-core - set INPUT_PROP_DIRECT If INPUT_PROP_DIRECT is set, userspace doesn't have to fall back to old ways of identifying touchscreen devices. In order to identify a touchscreen device, Android for example, seems to already depend on INPUT_PROP_DIRECT to be present in drivers. udev still checks for either BTN_TOUCH or INPUT_PROP_DIRECT. Checking for BTN_TOUCH however can quite easily lead to false positives; it's a code that not only touchscreen device drivers use. According to the documentation, touchscreen drivers should have this property set and in order to make life easy for userspace, let's set it. Signed-off-by: Martin Kepplinger Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/tsc200x-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c index 88ea5e1b72ae..abf27578beb1 100644 --- a/drivers/input/touchscreen/tsc200x-core.c +++ b/drivers/input/touchscreen/tsc200x-core.c @@ -531,6 +531,7 @@ int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id, input_set_drvdata(input_dev, ts); + __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); input_set_capability(input_dev, EV_KEY, BTN_TOUCH); input_set_abs_params(input_dev, ABS_X, -- cgit From 26dd633e437dca218547ccbeacc71fe8a620b6f6 Mon Sep 17 00:00:00 2001 From: Yiannis Marangos Date: Tue, 7 Nov 2017 16:23:11 -0800 Subject: Input: synaptics-rmi4 - RMI4 can also use SMBUS version 3 Some Synaptics devices, such as LEN0073, use SMBUS version 3. Signed-off-by: Yiannis Marangos Acked-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_smbus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c index 225025a0940c..b6ccf39c6a7b 100644 --- a/drivers/input/rmi4/rmi_smbus.c +++ b/drivers/input/rmi4/rmi_smbus.c @@ -312,7 +312,7 @@ static int rmi_smb_probe(struct i2c_client *client, rmi_smb->xport.dev = &client->dev; rmi_smb->xport.pdata = *pdata; rmi_smb->xport.pdata.irq = client->irq; - rmi_smb->xport.proto_name = "smb2"; + rmi_smb->xport.proto_name = "smb"; rmi_smb->xport.ops = &rmi_smb_ops; smbus_version = rmi_smb_get_version(rmi_smb); @@ -322,7 +322,7 @@ static int rmi_smb_probe(struct i2c_client *client, rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d", smbus_version); - if (smbus_version != 2) { + if (smbus_version != 2 && smbus_version != 3) { dev_err(&client->dev, "Unrecognized SMB version %d\n", smbus_version); return -ENODEV; -- cgit From b5f862180d7011d9575d0499fa37f0f25b423b12 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 6 Nov 2017 09:01:57 +0800 Subject: bonding: discard lowest hash bit for 802.3ad layer3+4 After commit 07f4c90062f8 ("tcp/dccp: try to not exhaust ip_local_port_range in connect()"), we will try to use even ports for connect(). Then if an application (seen clearly with iperf) opens multiple streams to the same destination IP and port, each stream will be given an even source port. So the bonding driver's simple xmit_hash_policy based on layer3+4 addressing will always hash all these streams to the same interface. And the total throughput will limited to a single slave. Change the tcp code will impact the whole tcp behavior, only for bonding usage. Paolo Abeni suggested fix this by changing the bonding code only, which should be more reasonable, and less impact. Fix this by discarding the lowest hash bit because it contains little entropy. After the fix we can re-balance between slaves. Signed-off-by: Paolo Abeni Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c99dc59d729b..76e8054bfc4e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3253,7 +3253,7 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) hash ^= (hash >> 16); hash ^= (hash >> 8); - return hash; + return hash >> 1; } /*-------------------------- Device entry points ----------------------------*/ -- cgit From 38c53af853069adf87181684370d7b8866d6387b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Nov 2017 14:44:04 +1100 Subject: KVM: PPC: Book3S HV: Fix exclusion between HPT resizing and other HPT updates Commit 5e9859699aba ("KVM: PPC: Book3S HV: Outline of KVM-HV HPT resizing implementation", 2016-12-20) added code that tries to exclude any use or update of the hashed page table (HPT) while the HPT resizing code is iterating through all the entries in the HPT. It does this by taking the kvm->lock mutex, clearing the kvm->arch.hpte_setup_done flag and then sending an IPI to all CPUs in the host. The idea is that any VCPU task that tries to enter the guest will see that the hpte_setup_done flag is clear and therefore call kvmppc_hv_setup_htab_rma, which also takes the kvm->lock mutex and will therefore block until we release kvm->lock. However, any VCPU that is already in the guest, or is handling a hypervisor page fault or hypercall, can re-enter the guest without rechecking the hpte_setup_done flag. The IPI will cause a guest exit of any VCPUs that are currently in the guest, but does not prevent those VCPU tasks from immediately re-entering the guest. The result is that after resize_hpt_rehash_hpte() has made a HPTE absent, a hypervisor page fault can occur and make that HPTE present again. This includes updating the rmap array for the guest real page, meaning that we now have a pointer in the rmap array which connects with pointers in the old rev array but not the new rev array. In fact, if the HPT is being reduced in size, the pointer in the rmap array could point outside the bounds of the new rev array. If that happens, we can get a host crash later on such as this one: [91652.628516] Unable to handle kernel paging request for data at address 0xd0000000157fb10c [91652.628668] Faulting instruction address: 0xc0000000000e2640 [91652.628736] Oops: Kernel access of bad area, sig: 11 [#1] [91652.628789] LE SMP NR_CPUS=1024 NUMA PowerNV [91652.628847] Modules linked in: binfmt_misc vhost_net vhost tap xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables ses enclosure scsi_transport_sas i2c_opal ipmi_powernv ipmi_devintf i2c_core ipmi_msghandler powernv_op_panel nfsd auth_rpcgss oid_registry nfs_acl lockd grace sunrpc kvm_hv kvm_pr kvm scsi_dh_alua dm_service_time dm_multipath tg3 ptp pps_core [last unloaded: stap_552b612747aec2da355051e464fa72a1_14259] [91652.629566] CPU: 136 PID: 41315 Comm: CPU 21/KVM Tainted: G O 4.14.0-1.rc4.dev.gitb27fc5c.el7.centos.ppc64le #1 [91652.629684] task: c0000007a419e400 task.stack: c0000000028d8000 [91652.629750] NIP: c0000000000e2640 LR: d00000000c36e498 CTR: c0000000000e25f0 [91652.629829] REGS: c0000000028db5d0 TRAP: 0300 Tainted: G O (4.14.0-1.rc4.dev.gitb27fc5c.el7.centos.ppc64le) [91652.629932] MSR: 900000010280b033 CR: 44022422 XER: 00000000 [91652.630034] CFAR: d00000000c373f84 DAR: d0000000157fb10c DSISR: 40000000 SOFTE: 1 [91652.630034] GPR00: d00000000c36e498 c0000000028db850 c000000001403900 c0000007b7960000 [91652.630034] GPR04: d0000000117fb100 d000000007ab00d8 000000000033bb10 0000000000000000 [91652.630034] GPR08: fffffffffffffe7f 801001810073bb10 d00000000e440000 d00000000c373f70 [91652.630034] GPR12: c0000000000e25f0 c00000000fdb9400 f000000003b24680 0000000000000000 [91652.630034] GPR16: 00000000000004fb 00007ff7081a0000 00000000000ec91a 000000000033bb10 [91652.630034] GPR20: 0000000000010000 00000000001b1190 0000000000000001 0000000000010000 [91652.630034] GPR24: c0000007b7ab8038 d0000000117fb100 0000000ec91a1190 c000001e6a000000 [91652.630034] GPR28: 00000000033bb100 000000000073bb10 c0000007b7960000 d0000000157fb100 [91652.630735] NIP [c0000000000e2640] kvmppc_add_revmap_chain+0x50/0x120 [91652.630806] LR [d00000000c36e498] kvmppc_book3s_hv_page_fault+0xbb8/0xc40 [kvm_hv] [91652.630884] Call Trace: [91652.630913] [c0000000028db850] [c0000000028db8b0] 0xc0000000028db8b0 (unreliable) [91652.630996] [c0000000028db8b0] [d00000000c36e498] kvmppc_book3s_hv_page_fault+0xbb8/0xc40 [kvm_hv] [91652.631091] [c0000000028db9e0] [d00000000c36a078] kvmppc_vcpu_run_hv+0xdf8/0x1300 [kvm_hv] [91652.631179] [c0000000028dbb30] [d00000000c2248c4] kvmppc_vcpu_run+0x34/0x50 [kvm] [91652.631266] [c0000000028dbb50] [d00000000c220d54] kvm_arch_vcpu_ioctl_run+0x114/0x2a0 [kvm] [91652.631351] [c0000000028dbbd0] [d00000000c2139d8] kvm_vcpu_ioctl+0x598/0x7a0 [kvm] [91652.631433] [c0000000028dbd40] [c0000000003832e0] do_vfs_ioctl+0xd0/0x8c0 [91652.631501] [c0000000028dbde0] [c000000000383ba4] SyS_ioctl+0xd4/0x130 [91652.631569] [c0000000028dbe30] [c00000000000b8e0] system_call+0x58/0x6c [91652.631635] Instruction dump: [91652.631676] fba1ffe8 fbc1fff0 fbe1fff8 f8010010 f821ffa1 2fa70000 793d0020 e9432110 [91652.631814] 7bbf26e4 7c7e1b78 7feafa14 409e0094 <807f000c> 786326e4 7c6a1a14 93a40008 [91652.631959] ---[ end trace ac85ba6db72e5b2e ]--- To fix this, we tighten up the way that the hpte_setup_done flag is checked to ensure that it does provide the guarantee that the resizing code needs. In kvmppc_run_core(), we check the hpte_setup_done flag after disabling interrupts and refuse to enter the guest if it is clear (for a HPT guest). The code that checks hpte_setup_done and calls kvmppc_hv_setup_htab_rma() is moved from kvmppc_vcpu_run_hv() to a point inside the main loop in kvmppc_run_vcpu(), ensuring that we don't just spin endlessly calling kvmppc_run_core() while hpte_setup_done is clear, but instead have a chance to block on the kvm->lock mutex. Finally we also check hpte_setup_done inside the region in kvmppc_book3s_hv_page_fault() where the HPTE is locked and we are about to update the HPTE, and bail out if it is clear. If another CPU is inside kvm_vm_ioctl_resize_hpt_commit) and has cleared hpte_setup_done, then we know that either we are looking at a HPTE that resize_hpt_rehash_hpte() has not yet processed, which is OK, or else we will see hpte_setup_done clear and refuse to update it, because of the full barrier formed by the unlock of the HPTE in resize_hpt_rehash_hpte() combined with the locking of the HPTE in kvmppc_book3s_hv_page_fault(). Fixes: 5e9859699aba ("KVM: PPC: Book3S HV: Outline of KVM-HV HPT resizing implementation") Cc: stable@vger.kernel.org # v4.10+ Reported-by: Satheesh Rajendran Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 10 ++++++++++ arch/powerpc/kvm/book3s_hv.c | 29 +++++++++++++++++++---------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7c62967d672c..59247af5fd45 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -646,6 +646,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, hnow_v = hpte_new_to_old_v(hnow_v, hnow_r); hnow_r = hpte_new_to_old_r(hnow_r); } + + /* + * If the HPT is being resized, don't update the HPTE, + * instead let the guest retry after the resize operation is complete. + * The synchronization for hpte_setup_done test vs. set is provided + * by the HPTE lock. + */ + if (!kvm->arch.hpte_setup_done) + goto out_unlock; + if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] || rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 73bf1ebfa78f..8d43cf205d34 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2705,11 +2705,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * Hard-disable interrupts, and check resched flag and signals. * If we need to reschedule or deliver a signal, clean up * and return without going into the guest(s). + * If the hpte_setup_done flag has been cleared, don't go into the + * guest because that means a HPT resize operation is in progress. */ local_irq_disable(); hard_irq_disable(); if (lazy_irq_pending() || need_resched() || - recheck_signals(&core_info)) { + recheck_signals(&core_info) || + (!kvm_is_radix(vc->kvm) && !vc->kvm->arch.hpte_setup_done)) { local_irq_enable(); vc->vcore_state = VCORE_INACTIVE; /* Unlock all except the primary vcore */ @@ -3078,7 +3081,7 @@ out: static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int n_ceded, i; + int n_ceded, i, r; struct kvmppc_vcore *vc; struct kvm_vcpu *v; @@ -3132,6 +3135,20 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && !signal_pending(current)) { + /* See if the HPT and VRMA are ready to go */ + if (!kvm_is_radix(vcpu->kvm) && + !vcpu->kvm->arch.hpte_setup_done) { + spin_unlock(&vc->lock); + r = kvmppc_hv_setup_htab_rma(vcpu); + spin_lock(&vc->lock); + if (r) { + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason = 0; + vcpu->arch.ret = r; + break; + } + } + if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL) kvmppc_vcore_end_preempt(vc); @@ -3249,13 +3266,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */ smp_mb(); - /* On the first time here, set up HTAB and VRMA */ - if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) { - r = kvmppc_hv_setup_htab_rma(vcpu); - if (r) - goto out; - } - flush_all_to_thread(current); /* Save userspace EBB and other register values */ @@ -3303,7 +3313,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } mtspr(SPRN_VRSAVE, user_vrsave); - out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); return r; -- cgit From 2cb80187ba065d7decad7c6614e35e07aec8a974 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Mon, 6 Nov 2017 15:37:22 +0100 Subject: net: cdc_ether: fix divide by 0 on bad descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting dev->hard_mtu to 0 will cause a divide error in usbnet_probe. Protect against devices with bogus CDC Ethernet functional descriptors by ignoring a zero wMaxSegmentSize. Signed-off-by: Bjørn Mork Acked-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 3e7a3ac3a362..05dca3e5c93d 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -230,7 +230,7 @@ skip: goto bad_desc; } - if (header.usb_cdc_ether_desc) { + if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(info->ether->wMaxSegmentSize); /* because of Zaurus, we may be ignoring the host * side link address we were given. -- cgit From 7fd078337201cf7468f53c3d9ef81ff78cb6df3b Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Mon, 6 Nov 2017 15:32:18 +0100 Subject: net: qmi_wwan: fix divide by 0 on bad descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A CDC Ethernet functional descriptor with wMaxSegmentSize = 0 will cause a divide error in usbnet_probe: divide error: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc8-44453-g1fdc1a82c34f #56 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bef5c00 task.stack: ffff88006bf60000 RIP: 0010:usbnet_update_max_qlen+0x24d/0x390 drivers/net/usb/usbnet.c:355 RSP: 0018:ffff88006bf67508 EFLAGS: 00010246 RAX: 00000000000163c8 RBX: ffff8800621fce40 RCX: ffff8800621fcf34 RDX: 0000000000000000 RSI: ffffffff837ecb7a RDI: ffff8800621fcf34 RBP: ffff88006bf67520 R08: ffff88006bef5c00 R09: ffffed000c43f881 R10: ffffed000c43f880 R11: ffff8800621fc406 R12: 0000000000000003 R13: ffffffff85c71de0 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe9c0d6dac CR3: 00000000614f4000 CR4: 00000000000006f0 Call Trace: usbnet_probe+0x18b5/0x2790 drivers/net/usb/usbnet.c:1783 qmi_wwan_probe+0x133/0x220 drivers/net/usb/qmi_wwan.c:1338 usb_probe_interface+0x324/0x940 drivers/usb/core/driver.c:361 really_probe drivers/base/dd.c:413 driver_probe_device+0x522/0x740 drivers/base/dd.c:557 Fix by simply ignoring the bogus descriptor, as it is optional for QMI devices anyway. Fixes: 423ce8caab7e ("net: usb: qmi_wwan: New driver for Huawei QMI based WWAN devices") Reported-by: Andrey Konovalov Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8c3733608271..a4f229edcceb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -681,7 +681,7 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) } /* errors aren't fatal - we can live with the dynamic address */ - if (cdc_ether) { + if (cdc_ether && cdc_ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(cdc_ether->wMaxSegmentSize); usbnet_get_ethernet_addr(dev, cdc_ether->iMACAddress); } -- cgit From b7e732fa3171318418524b776b841b4024933b2b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 6 Nov 2017 20:50:35 -0800 Subject: qrtr: Move to postcore_initcall Registering qrtr with module_init makes the ability of typical platform code to create AF_QIPCRTR socket during probe a matter of link order luck. Moving qrtr to postcore_initcall() avoids this. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c2f5c13550c0..78418f38464a 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1085,7 +1085,7 @@ static int __init qrtr_proto_init(void) return 0; } -module_init(qrtr_proto_init); +postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { -- cgit From 055db6957e4735b16cd2fa94a5bbfb754c9b8023 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Tue, 7 Nov 2017 19:50:07 +0900 Subject: bonding: fix slave stuck in BOND_LINK_FAIL state The bonding miimon logic has a flaw, in that a failure of the rtnl_trylock can cause a slave to become permanently stuck in BOND_LINK_FAIL state. The sequence of events to cause this is as follows: 1) bond_miimon_inspect finds that a slave's link is down, and so calls bond_propose_link_state, setting slave->new_link_state to BOND_LINK_FAIL, then sets slave->new_link to BOND_LINK_DOWN and returns non-zero. 2) In bond_mii_monitor, the rtnl_trylock fails, and the timer is rescheduled. No change is committed. 3) bond_miimon_inspect is called again, but this time the slave from step 1 has recovered. slave->new_link is reset to NOCHANGE, and, as slave->link was never changed, the switch enters the BOND_LINK_UP case, and does nothing. The pending BOND_LINK_FAIL state from step 1 remains pending, as new_link_state is not reset. 4) The state from step 3 persists until another slave changes link state and causes bond_miimon_inspect to return non-zero. At this point, the BOND_LINK_FAIL state change on the slave from steps 1-3 is committed, and the slave will remain stuck in BOND_LINK_FAIL state even though it is actually link up. The remedy for this is to initialize new_link_state on each entry to bond_miimon_inspect, as is already done with new_link. Fixes: fb9eb899a6dc ("bonding: handle link transition from FAIL to UP correctly") Reported-by: Alex Sidorenko Reviewed-by: Jarod Wilson Signed-off-by: Jay Vosburgh Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 76e8054bfc4e..b2db581131b2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2042,6 +2042,7 @@ static int bond_miimon_inspect(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state = slave->link; link_state = bond_check_dev_link(bond, slave->dev, 0); -- cgit From 0de0add10e587effa880c741c9413c874f16be91 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Tue, 7 Nov 2017 13:47:56 +0100 Subject: qmi_wwan: Add missing skb_reset_mac_header-call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we receive a packet on a QMI device in raw IP mode, we should call skb_reset_mac_header() to ensure that skb->mac_header contains a valid offset in the packet. While it shouldn't really matter, the packets have no MAC header and the interface is configured as-such, it seems certain parts of the network stack expects a "good" value in skb->mac_header. Without the skb_reset_mac_header() call added in this patch, for example shaping traffic (using tc) triggers the following oops on the first received packet: [ 303.642957] skbuff: skb_under_panic: text:8f137918 len:177 put:67 head:8e4b0f00 data:8e4b0eff tail:0x8e4b0fb0 end:0x8e4b1520 dev:wwan0 [ 303.655045] Kernel bug detected[#1]: [ 303.658622] CPU: 1 PID: 1002 Comm: logd Not tainted 4.9.58 #0 [ 303.664339] task: 8fdf05e0 task.stack: 8f15c000 [ 303.668844] $ 0 : 00000000 00000001 0000007a 00000000 [ 303.674062] $ 4 : 8149a2fc 8149a2fc 8149ce20 00000000 [ 303.679284] $ 8 : 00000030 3878303a 31623465 20303235 [ 303.684510] $12 : ded731e3 2626a277 00000000 03bd0000 [ 303.689747] $16 : 8ef62b40 00000043 8f137918 804db5fc [ 303.694978] $20 : 00000001 00000004 8fc13800 00000003 [ 303.700215] $24 : 00000001 8024ab10 [ 303.705442] $28 : 8f15c000 8fc19cf0 00000043 802cc920 [ 303.710664] Hi : 00000000 [ 303.713533] Lo : 74e58000 [ 303.716436] epc : 802cc920 skb_panic+0x58/0x5c [ 303.721046] ra : 802cc920 skb_panic+0x58/0x5c [ 303.725639] Status: 11007c03 KERNEL EXL IE [ 303.729823] Cause : 50800024 (ExcCode 09) [ 303.733817] PrId : 0001992f (MIPS 1004Kc) [ 303.737892] Modules linked in: rt2800pci rt2800mmio rt2800lib qcserial ppp_async option usb_wwan rt2x00pci rt2x00mmio rt2x00lib rndis_host qmi_wwan ppp_generic nf_nat_pptp nf_conntrack_pptp nf_conntrack_ipv6 mt76x2i Process logd (pid: 1002, threadinfo=8f15c000, task=8fdf05e0, tls=77b3eee4) [ 303.962509] Stack : 00000000 80408990 8f137918 000000b1 00000043 8e4b0f00 8e4b0eff 8e4b0fb0 [ 303.970871] 8e4b1520 8fec1800 00000043 802cd2a4 6e000045 00000043 00000000 8ef62000 [ 303.979219] 8eef5d00 8ef62b40 8fea7300 8f137918 00000000 00000000 0002bb01 793e5664 [ 303.987568] 8ef08884 00000001 8fea7300 00000002 8fc19e80 8eef5d00 00000006 00000003 [ 303.995934] 00000000 8030ba90 00000003 77ab3fd0 8149dc80 8004d1bc 8f15c000 8f383700 [ 304.004324] ... [ 304.006767] Call Trace: [ 304.009241] [<802cc920>] skb_panic+0x58/0x5c [ 304.013504] [<802cd2a4>] skb_push+0x78/0x90 [ 304.017783] [<8f137918>] 0x8f137918 [ 304.021269] Code: 00602825 0c02a3b4 24842888 <000c000d> 8c870060 8c8200a0 0007382b 00070336 8c88005c [ 304.031034] [ 304.032805] ---[ end trace b778c482b3f0bda9 ]--- [ 304.041384] Kernel panic - not syncing: Fatal exception in interrupt [ 304.051975] Rebooting in 3 seconds.. While the oops is for a 4.9-kernel, I was able to trigger the same oops with net-next as of yesterday. Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode") Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index a4f229edcceb..8d4a6f7cba61 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -499,6 +499,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 1; } if (rawip) { + skb_reset_mac_header(skb); skb->dev = dev->net; /* normally set by eth_type_trans */ skb->protocol = proto; return 1; -- cgit From 881125bfe65bb772f34f4fcb04a35dfe117e186a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 7 Nov 2017 20:19:34 -0600 Subject: x86/unwind: Disable KASAN checking in the ORC unwinder Fengguang reported a KASAN warning: Kprobe smoke test: started ================================================================== BUG: KASAN: stack-out-of-bounds in deref_stack_reg+0xb5/0x11a Read of size 8 at addr ffff8800001c7cd8 by task swapper/1 CPU: 0 PID: 1 Comm: swapper Not tainted 4.14.0-rc8 #26 Call Trace: <#DB> ... save_trace+0xd9/0x1d3 mark_lock+0x5f7/0xdc3 __lock_acquire+0x6b4/0x38ef lock_acquire+0x1a1/0x2aa _raw_spin_lock_irqsave+0x46/0x55 kretprobe_table_lock+0x1a/0x42 pre_handler_kretprobe+0x3f5/0x521 kprobe_int3_handler+0x19c/0x25f do_int3+0x61/0x142 int3+0x30/0x60 [...] The ORC unwinder got confused by some kprobes changes, which isn't surprising since the runtime code no longer matches vmlinux and the stack was modified for kretprobes. Until we have a way for generated code to register changes with the unwinder, these types of warnings are inevitable. So just disable KASAN checks for stack accesses in the ORC unwinder. Reported-by: Fengguang Wu Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thiago Jung Bauermann Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171108021934.zbl6unh5hpugybc5@treble Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index b95007e7c1b3..a3f973b2c97a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -279,7 +279,7 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(long))) return false; - *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr); + *val = READ_ONCE_NOCHECK(*(unsigned long *)addr); return true; } -- cgit From a743bbeef27b9176987ec0cb7f906ab0ab52d1da Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 7 Nov 2017 18:53:07 +0100 Subject: x86/oprofile/ppro: Do not use __this_cpu*() in preemptible context The warning below says it all: BUG: using __this_cpu_read() in preemptible [00000000] code: swapper/0/1 caller is __this_cpu_preempt_check CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.0-rc8 #4 Call Trace: dump_stack check_preemption_disabled ? do_early_param __this_cpu_preempt_check arch_perfmon_init op_nmi_init ? alloc_pci_root_info oprofile_arch_init oprofile_init do_one_initcall ... These accessors should not have been used in the first place: it is PPro so no mixed silicon revisions and thus it can simply use boot_cpu_data. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Fix-creation-mandated-by: Linus Torvalds Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Robert Richter Cc: x86@kernel.org Cc: stable@vger.kernel.org --- arch/x86/oprofile/op_model_ppro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 350f7096baac..7913b6921959 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -212,8 +212,8 @@ static void arch_perfmon_setup_counters(void) eax.full = cpuid_eax(0xa); /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ - if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 && - __this_cpu_read(cpu_info.x86_model) == 15) { + if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 15) { eax.split.version_id = 2; eax.split.num_counters = 2; eax.split.bit_width = 40; -- cgit From e6b03ab63b4d270e0249f96536fde632409dc1dc Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 29 Oct 2017 16:27:19 +0100 Subject: MIPS: AR7: Defer registration of GPIO When called from prom init code, ar7_gpio_init() will fail as it will call gpiochip_add() which relies on a working kmalloc() to alloc the gpio_desc array and kmalloc is not useable yet at prom init time. Move ar7_gpio_init() to ar7_register_devices() (a device_initcall) where kmalloc works. Fixes: 14e85c0e69d5 ("gpio: remove gpio_descs global array") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Cc: Ralf Baechle Cc: Greg Kroah-Hartman Cc: Yoshihiro YUNOMAE Cc: Nicolas Schichan Cc: linux-mips@linux-mips.org Cc: linux-serial@vger.kernel.org Cc: # 3.19+ Patchwork: https://patchwork.linux-mips.org/patch/17542/ Signed-off-by: James Hogan --- arch/mips/ar7/platform.c | 4 ++++ arch/mips/ar7/prom.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index df7acea3747a..f2c9f90c5f13 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -653,6 +653,10 @@ static int __init ar7_register_devices(void) u32 val; int res; + res = ar7_gpio_init(); + if (res) + pr_warn("unable to register gpios: %d\n", res); + res = ar7_register_uarts(); if (res) pr_err("unable to setup uart(s): %d\n", res); diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c index 4fd83336131a..dd53987a690f 100644 --- a/arch/mips/ar7/prom.c +++ b/arch/mips/ar7/prom.c @@ -246,8 +246,6 @@ void __init prom_init(void) ar7_init_cmdline(fw_arg0, (char **)fw_arg1); ar7_init_env((struct env_var *)fw_arg2); console_config(); - - ar7_gpio_init(); } #define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4))) -- cgit From 624f5ab8720b3371367327a822c267699c1823b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Nov 2017 22:29:02 +0000 Subject: KEYS: fix NULL pointer dereference during ASN.1 parsing [ver #2] syzkaller reported a NULL pointer dereference in asn1_ber_decoder(). It can be reproduced by the following command, assuming CONFIG_PKCS7_TEST_KEY=y: keyctl add pkcs7_test desc '' @s The bug is that if the data buffer is empty, an integer underflow occurs in the following check: if (unlikely(dp >= datalen - 1)) goto data_overrun_error; This results in the NULL data pointer being dereferenced. Fix it by checking for 'datalen - dp < 2' instead. Also fix the similar check for 'dp >= datalen - n' later in the same function. That one possibly could result in a buffer overread. The NULL pointer dereference was reproducible using the "pkcs7_test" key type but not the "asymmetric" key type because the "asymmetric" key type checks for a 0-length payload before calling into the ASN.1 decoder but the "pkcs7_test" key type does not. The bug report was: BUG: unable to handle kernel NULL pointer dereference at (null) IP: asn1_ber_decoder+0x17f/0xe60 lib/asn1_decoder.c:233 PGD 7b708067 P4D 7b708067 PUD 7b6ee067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 0 PID: 522 Comm: syz-executor1 Not tainted 4.14.0-rc8 #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.3-20171021_125229-anatol 04/01/2014 task: ffff9b6b3798c040 task.stack: ffff9b6b37970000 RIP: 0010:asn1_ber_decoder+0x17f/0xe60 lib/asn1_decoder.c:233 RSP: 0018:ffff9b6b37973c78 EFLAGS: 00010216 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000000000021c RDX: ffffffff814a04ed RSI: ffffb1524066e000 RDI: ffffffff910759e0 RBP: ffff9b6b37973d60 R08: 0000000000000001 R09: ffff9b6b3caa4180 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f10ed1f2700(0000) GS:ffff9b6b3ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007b6f3000 CR4: 00000000000006f0 Call Trace: pkcs7_parse_message+0xee/0x240 crypto/asymmetric_keys/pkcs7_parser.c:139 verify_pkcs7_signature+0x33/0x180 certs/system_keyring.c:216 pkcs7_preparse+0x41/0x70 crypto/asymmetric_keys/pkcs7_key_type.c:63 key_create_or_update+0x180/0x530 security/keys/key.c:855 SYSC_add_key security/keys/keyctl.c:122 [inline] SyS_add_key+0xbf/0x250 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4585c9 RSP: 002b:00007f10ed1f1bd8 EFLAGS: 00000216 ORIG_RAX: 00000000000000f8 RAX: ffffffffffffffda RBX: 00007f10ed1f2700 RCX: 00000000004585c9 RDX: 0000000020000000 RSI: 0000000020008ffb RDI: 0000000020008000 RBP: 0000000000000000 R08: ffffffffffffffff R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00007fff1b2260ae R13: 00007fff1b2260af R14: 00007f10ed1f2700 R15: 0000000000000000 Code: dd ca ff 48 8b 45 88 48 83 e8 01 4c 39 f0 0f 86 a8 07 00 00 e8 53 dd ca ff 49 8d 46 01 48 89 85 58 ff ff ff 48 8b 85 60 ff ff ff <42> 0f b6 0c 30 89 c8 88 8d 75 ff ff ff 83 e0 1f 89 8d 28 ff ff RIP: asn1_ber_decoder+0x17f/0xe60 lib/asn1_decoder.c:233 RSP: ffff9b6b37973c78 CR2: 0000000000000000 Fixes: 42d5ec27f873 ("X.509: Add an ASN.1 decoder") Reported-by: syzbot Cc: # v3.7+ Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris --- lib/asn1_decoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index fef5d2e114be..1ef0cec38d78 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -228,7 +228,7 @@ next_op: hdr = 2; /* Extract a tag from the data */ - if (unlikely(dp >= datalen - 1)) + if (unlikely(datalen - dp < 2)) goto data_overrun_error; tag = data[dp++]; if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) @@ -274,7 +274,7 @@ next_op: int n = len - 0x80; if (unlikely(n > 2)) goto length_too_long; - if (unlikely(dp >= datalen - n)) + if (unlikely(n > datalen - dp)) goto data_overrun_error; hdr += n; for (len = 0; n > 0; n--) { -- cgit From 6b7be529634bbfbf6395f23217a66d731fbed0a0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 8 Nov 2017 08:49:49 -0600 Subject: MAINTAINERS: Add Lorenzo Pieralisi for PCI host bridge drivers Add Lorenzo Pieralisi as maintainer for PCI native host bridge drivers and the endpoint driver framework. Signed-off-by: Bjorn Helgaas Acked-by: Lorenzo Pieralisi --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index db412a627d96..6ce341e86fec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10378,6 +10378,7 @@ F: drivers/pci/dwc/*keystone* PCI ENDPOINT SUBSYSTEM M: Kishon Vijay Abraham I +M: Lorenzo Pieralisi L: linux-pci@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/pci-endpoint.git S: Supported @@ -10429,6 +10430,15 @@ F: include/linux/pci* F: arch/x86/pci/ F: arch/x86/kernel/quirks.c +PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS +M: Lorenzo Pieralisi +L: linux-pci@vger.kernel.org +Q: http://patchwork.ozlabs.org/project/linux-pci/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/ +S: Supported +F: drivers/pci/host/ +F: drivers/pci/dwc/ + PCIE DRIVER FOR AXIS ARTPEC M: Niklas Cassel M: Jesper Nilsson -- cgit From b084116f8587b222a2c5ef6dcd846f40f24b9420 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 29 Oct 2017 16:27:20 +0100 Subject: MIPS: AR7: Ensure that serial ports are properly set up Without UPF_FIXED_TYPE, the data from the PORT_AR7 uart_config entry is never copied, resulting in a dead port. Fixes: 154615d55459 ("MIPS: AR7: Use correct UART port type") Signed-off-by: Oswald Buddenhagen [jonas.gorski: add Fixes tag] Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Cc: Ralf Baechle Cc: Greg Kroah-Hartman Cc: Yoshihiro YUNOMAE Cc: Nicolas Schichan Cc: Oswald Buddenhagen Cc: linux-mips@linux-mips.org Cc: linux-serial@vger.kernel.org Cc: Patchwork: https://patchwork.linux-mips.org/patch/17543/ Signed-off-by: James Hogan --- arch/mips/ar7/platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index f2c9f90c5f13..4674f1efbe7a 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -575,6 +575,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; + uart_port.flags = UPF_FIXED_TYPE; uart_port.regshift = 2; uart_port.line = 0; -- cgit From 40a4884512d5bf9ed3a9cb162b8a7ec3306b6cfc Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 31 Oct 2017 10:23:25 +0000 Subject: drm/i915: Reject unknown syncobj flags We have to reject unknown flags for uAPI considerations, and also because the curent implementation limits their i915 storage space to two bits. v2: (Chris Wilson) * Fix fail in ABI check. * Added unknown flags and BUILD_BUG_ON. v3: * Use ARCH_KMALLOC_MINALIGN instead of alignof. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: cf6e7bac6357 ("drm/i915: Add support for drm syncobjs") Cc: Jason Ekstrand Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171031102326.9738-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit ebcaa1ff8b59097805d548fe7a676f194625c033) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4ac454ae54d7..83876a1c8d98 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2094,6 +2094,11 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + syncobj = drm_syncobj_find(file, fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); @@ -2101,6 +2106,9 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6598fb76d2c2..9816590d3ad2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -829,6 +829,7 @@ struct drm_i915_gem_exec_fence { #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) __u32 flags; }; -- cgit From 423a8a942e95493b73228ba6a3f176dcc7f35fa9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Nov 2017 21:11:28 +0000 Subject: drm/i915: Deconstruct struct sgt_dma initialiser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-4.4 complains about: struct sgt_dma iter = { .sg = vma->pages->sgl, .dma = sg_dma_address(iter.sg), .max = iter.dma + iter.sg->length, }; drivers/gpu/drm/i915/i915_gem_gtt.c: In function ‘gen8_ppgtt_insert_4lvl’: drivers/gpu/drm/i915/i915_gem_gtt.c:938: error: ‘iter.sg’ is used uninitialized in this function drivers/gpu/drm/i915/i915_gem_gtt.c:939: error: ‘iter.dma’ is used uninitialized in this function and worse generates invalid code that triggers a GPF: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: gen8_ppgtt_insert_4lvl+0x1b/0x1e0 [i915] PGD 0 Oops: 0000 [#1] SMP Modules linked in: snd_aloop nf_conntrack_ipv6 nf_defrag_ipv6 nf_log_ipv6 ip6table_filter ip6_tables ctr ccm xt_state nf_log_ipv4 nf_log_common xt_LOG xt_limit xt_recent xt_owner xt_addrtype iptable_filter ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c ip_tables dm_mod vhost_net macvtap macvlan vhost tun kvm_intel kvm irqbypass uas usb_storage hid_multitouch btusb btrtl uvcvideo videobuf2_v4l2 videobuf2_core videodev media videobuf2_vmalloc videobuf2_memops sg ppdev dell_wmi sparse_keymap mei_wdt sd_mod iTCO_wdt iTCO_vendor_support rtsx_pci_ms memstick rtsx_pci_sdmmc mmc_core dell_smm_hwmon hwmon dell_laptop dell_smbios dcdbas joydev input_leds hci_uart btintel btqca btbcm bluetooth parport_pc parport i2c_hid intel_lpss_acpi intel_lpss pcspkr wmi int3400_thermal acpi_thermal_rel dell_rbtn mei_me mei snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ahci libahci acpi_pad xhci_pci xhci_hcd snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore int3403_thermal arc4 e1000e ptp pps_core i2c_i801 iwlmvm mac80211 rtsx_pci iwlwifi cfg80211 rfkill intel_pch_thermal processor_thermal_device int340x_thermal_zone intel_soc_dts_iosf i915 video fjes CPU: 2 PID: 2408 Comm: X Not tainted 4.10.0-rc5+ #1 Hardware name: Dell Inc. Latitude E7470/0T6HHJ, BIOS 1.11.3 11/09/2016 task: ffff880219fe4740 task.stack: ffffc90005f98000 RIP: 0010:gen8_ppgtt_insert_4lvl+0x1b/0x1e0 [i915] RSP: 0018:ffffc90005f9b8c8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8802167d8000 RCX: 0000000000000001 RDX: 00000000ffff7000 RSI: ffff880219f94140 RDI: ffff880228444000 RBP: ffffc90005f9b948 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000080 R13: 0000000000000001 R14: ffffc90005f9bcd7 R15: ffff88020c9a83c0 FS: 00007fb53e1ee920(0000) GS:ffff88024dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000022ef95000 CR4: 00000000003406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ppgtt_bind_vma+0x40/0x50 [i915] i915_vma_bind+0xcb/0x1c0 [i915] __i915_vma_do_pin+0x6e/0xd0 [i915] i915_gem_execbuffer_reserve_vma+0x162/0x1d0 [i915] i915_gem_execbuffer_reserve+0x4fc/0x510 [i915] ? __kmalloc+0x134/0x250 ? i915_gem_wait_for_error+0x25/0x100 [i915] ? i915_gem_wait_for_error+0x25/0x100 [i915] i915_gem_do_execbuffer+0x2df/0xa00 [i915] ? drm_malloc_gfp.clone.0+0x42/0x80 [i915] ? path_put+0x22/0x30 ? __check_object_size+0x62/0x1f0 ? terminate_walk+0x44/0x90 i915_gem_execbuffer2+0x95/0x1e0 [i915] drm_ioctl+0x243/0x490 ? handle_pte_fault+0x1d7/0x220 ? i915_gem_do_execbuffer+0xa00/0xa00 [i915] ? handle_mm_fault+0x10d/0x2a0 vfs_ioctl+0x18/0x30 do_vfs_ioctl+0x14b/0x3f0 SyS_ioctl+0x92/0xa0 entry_SYSCALL_64_fastpath+0x1a/0xa9 RIP: 0033:0x7fb53b4fcb77 RSP: 002b:00007ffe0c572898 EFLAGS: 00003246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fb53e17c038 RCX: 00007fb53b4fcb77 RDX: 00007ffe0c572900 RSI: 0000000040406469 RDI: 000000000000000b RBP: 00007fb5376d67e0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000028 R11: 0000000000003246 R12: 0000000000000000 R13: 0000000000000000 R14: 000055eecb314d00 R15: 000055eecb315460 Code: 0f 84 5d ff ff ff eb a2 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 57 41 56 41 55 41 54 53 48 83 ec 58 0f 1f 44 00 00 31 c0 89 4d b0 <4c> 8b 60 10 44 8b 70 0c 48 89 d0 4c 8b 2e 48 c1 e8 27 25 ff 01 RIP: gen8_ppgtt_insert_4lvl+0x1b/0x1e0 [i915] RSP: ffffc90005f9b8c8 CR2: 0000000000000010 Recent gccs, such as 4.9, 6.3 or 7.2, do not generate the warning nor do they explode on use. If we manually create the struct using locals from the stack, this should eliminate this issue, and does not alter code generation with gcc-7.2. Fixes: 894ccebee2b0 ("drm/i915: Micro-optimise gen8_ppgtt_insert_entries()") Reported-by: Kelly French Signed-off-by: Chris Wilson Cc: Kelly French Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171106211128.12538-1-chris@chris-wilson.co.uk Tested-by: Kelly French Reviewed-by: Tvrtko Ursulin (cherry picked from commit 5684514ba9dc6d7aa932cc53d97d866b2386221f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e2410eb5d96e..ad524cb0f6fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -832,10 +832,14 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, } } -struct sgt_dma { +static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; -}; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + return (struct sgt_dma) { sg, addr, addr + sg->length }; +} struct gen8_insert_pte { u16 pml4e; @@ -916,11 +920,7 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, @@ -933,11 +933,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); @@ -1632,13 +1628,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; const u32 pte_encode = vm->pte_encode(0, cache_level, flags); - struct sgt_dma iter; + struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); - iter.sg = vma->pages->sgl; - iter.dma = sg_dma_address(iter.sg); - iter.max = iter.dma + iter.sg->length; do { vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); -- cgit From f7dc4c9a855a13dbb33294c9fc94f17af03f6291 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 8 Nov 2017 08:09:52 -0800 Subject: apparmor: fix off-by-one comparison on MAXMAPPED_SIG This came in yesterday, and I have verified our regression tests were missing this and it can cause an oops. Please apply. There is a an off-by-one comparision on sig against MAXMAPPED_SIG that can lead to a read outside the sig_map array if sig is MAXMAPPED_SIG. Fix this. Verified that the check is an out of bounds case that can cause an oops. Revised: add comparison fix to second case Fixes: cd1dbf76b23d ("apparmor: add the ability to mediate signals") Signed-off-by: Colin Ian King Signed-off-by: John Johansen Signed-off-by: Linus Torvalds --- security/apparmor/ipc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 66fb9ede9447..7ca0032e7ba9 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -128,7 +128,7 @@ static inline int map_signal_num(int sig) return SIGUNKNOWN; else if (sig >= SIGRTMIN) return sig - SIGRTMIN + 128; /* rt sigs mapped to 128 */ - else if (sig <= MAXMAPPED_SIG) + else if (sig < MAXMAPPED_SIG) return sig_map[sig]; return SIGUNKNOWN; } @@ -163,7 +163,7 @@ static void audit_signal_cb(struct audit_buffer *ab, void *va) audit_signal_mask(ab, aad(sa)->denied); } } - if (aad(sa)->signal <= MAXMAPPED_SIG) + if (aad(sa)->signal < MAXMAPPED_SIG) audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]); else audit_log_format(ab, " signal=rtmin+%d", -- cgit From d0cd64b02aa854d68ce517cb7da1fe4e4fff2653 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 8 Nov 2017 11:28:45 -0800 Subject: x86/idt: Remove X86_TRAP_BP initialization in idt_setup_traps() Commit b70543a0b2b6("x86/idt: Move regular trap init to tables") moves regular trap init for each trap vector into a table based initialization. It introduced the initialization for vector X86_TRAP_BP which was not in the code which it replaced. This breaks uprobe functionality for x86_32; the probed program segfaults instead of handling the probe proper. The reason for this is that TRAP_BP is set up as system interrupt gate (DPL3) in the early IDT and then replaced by a regular interrupt gate (DPL0) in idt_setup_traps(). The DPL0 restriction causes the int3 trap to fail with a #GP resulting in a SIGSEGV of the probed program. On 64bit this does not cause a problem because the IDT entry is replaced with a system interrupt gate (DPL3) with interrupt stack afterwards. Remove X86_TRAP_BP from the def_idts table which is used in idt_setup_traps(). Remove a redundant entry for X86_TRAP_NMI in def_idts while at it. Tested on both x86_64 and x86_32. [ tglx: Amended changelog with a description of the root cause ] Fixes: b70543a0b2b6("x86/idt: Move regular trap init to tables") Reported-and-tested-by: Yonghong Song Signed-off-by: Yonghong Song Signed-off-by: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Cc: ast@fb.com Cc: oleg@redhat.com Cc: luto@kernel.org Cc: kernel-team@fb.com Link: https://lkml.kernel.org/r/20171108192845.552709-1-yhs@fb.com --- arch/x86/kernel/idt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 6107ee1cb8d5..014cb2fc47ff 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -92,8 +92,6 @@ static const __initdata struct idt_data def_idts[] = { INTG(X86_TRAP_DF, double_fault), #endif INTG(X86_TRAP_DB, debug), - INTG(X86_TRAP_NMI, nmi), - INTG(X86_TRAP_BP, int3), #ifdef CONFIG_X86_MCE INTG(X86_TRAP_MC, &machine_check), -- cgit From c0f3ea1589394deac2d840c685f57c69e4ac4243 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Nov 2017 12:51:04 -0800 Subject: stop using '%pK' for /proc/kallsyms pointer values Not only is it annoying to have one single flag for all pointers, as if that was a global choice and all kernel pointers are the same, but %pK can't get the 'access' vs 'open' time check right anyway. So make the /proc/kallsyms pointer value code use logic specific to that particular file. We do continue to honor kptr_restrict, but the default (which is unrestricted) is changed to instead take expected users into account, and restrict access by default. Right now the only actual expected user is kernel profiling, which has a separate sysctl flag for kernel profile access. There may be others. Signed-off-by: Linus Torvalds --- kernel/kallsyms.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 127e7cfafa55..51b49ed452e4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -480,6 +480,7 @@ struct kallsym_iter { char name[KSYM_NAME_LEN]; char module_name[MODULE_NAME_LEN]; int exported; + int show_value; }; static int get_ksymbol_mod(struct kallsym_iter *iter) @@ -580,14 +581,23 @@ static void s_stop(struct seq_file *m, void *p) { } +#ifndef CONFIG_64BIT +# define KALLSYM_FMT "%08lx" +#else +# define KALLSYM_FMT "%016lx" +#endif + static int s_show(struct seq_file *m, void *p) { + unsigned long value; struct kallsym_iter *iter = m->private; /* Some debugging symbols have no name. Ignore them. */ if (!iter->name[0]) return 0; + value = iter->show_value ? iter->value : 0; + if (iter->module_name[0]) { char type; @@ -597,10 +607,10 @@ static int s_show(struct seq_file *m, void *p) */ type = iter->exported ? toupper(iter->type) : tolower(iter->type); - seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value, + seq_printf(m, KALLSYM_FMT " %c %s\t[%s]\n", value, type, iter->name, iter->module_name); } else - seq_printf(m, "%pK %c %s\n", (void *)iter->value, + seq_printf(m, KALLSYM_FMT " %c %s\n", value, iter->type, iter->name); return 0; } @@ -612,6 +622,40 @@ static const struct seq_operations kallsyms_op = { .show = s_show }; +static inline int kallsyms_for_perf(void) +{ +#ifdef CONFIG_PERF_EVENTS + extern int sysctl_perf_event_paranoid; + if (sysctl_perf_event_paranoid <= 1) + return 1; +#endif + return 0; +} + +/* + * We show kallsyms information even to normal users if we've enabled + * kernel profiling and are explicitly not paranoid (so kptr_restrict + * is clear, and sysctl_perf_event_paranoid isn't set). + * + * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to + * block even that). + */ +static int kallsyms_show_value(void) +{ + switch (kptr_restrict) { + case 0: + if (kallsyms_for_perf()) + return 1; + /* fallthrough */ + case 1: + if (has_capability_noaudit(current, CAP_SYSLOG)) + return 1; + /* fallthrough */ + default: + return 0; + } +} + static int kallsyms_open(struct inode *inode, struct file *file) { /* @@ -625,6 +669,7 @@ static int kallsyms_open(struct inode *inode, struct file *file) return -ENOMEM; reset_iter(iter, 0); + iter->show_value = kallsyms_show_value(); return 0; } -- cgit From 87df26175e67c26ccdd3a002fbbb8cde78e28a19 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 8 Nov 2017 21:18:18 +0100 Subject: x86/mm: Unbreak modules that rely on external PAGE_KERNEL availability Commit 7744ccdbc16f0 ("x86/mm: Add Secure Memory Encryption (SME) support") as a side-effect made PAGE_KERNEL all of a sudden unavailable to modules which can't make use of EXPORT_SYMBOL_GPL() symbols. This is because once SME is enabled, sme_me_mask (which is introduced as EXPORT_SYMBOL_GPL) makes its way to PAGE_KERNEL through _PAGE_ENC, causing imminent build failure for all the modules which make use of all the EXPORT-SYMBOL()-exported API (such as vmap(), __vmalloc(), remap_pfn_range(), ...). Exporting (as EXPORT_SYMBOL()) interfaces (and having done so for ages) that take pgprot_t argument, while making it impossible to -- all of a sudden -- pass PAGE_KERNEL to it, feels rather incosistent. Restore the original behavior and make it possible to pass PAGE_KERNEL to all its EXPORT_SYMBOL() consumers. [ This is all so not wonderful. We shouldn't need that "sme_me_mask" access at all in all those places that really don't care about that level of detail, and just want _PAGE_KERNEL or whatever. We have some similar issues with _PAGE_CACHE_WP and _PAGE_NOCACHE, both of which hide a "cachemode2protval()" call, and which also ends up using another EXPORT_SYMBOL(), but at least that only triggers for the much more rare cases. Maybe we could move these dynamic page table bits to be generated much deeper down in the VM layer, instead of hiding them in the macros that everybody uses. So this all would merit some cleanup. But not today. - Linus ] Cc: Tom Lendacky Signed-off-by: Jiri Kosina Despised-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- arch/x86/mm/mem_encrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 16c5f37933a2..0286327e65fa 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -40,7 +40,7 @@ static char sme_cmdline_off[] __initdata = "off"; * section is later cleared. */ u64 sme_me_mask __section(.data) = 0; -EXPORT_SYMBOL_GPL(sme_me_mask); +EXPORT_SYMBOL(sme_me_mask); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); -- cgit From 1a8e6b48fbf534028ce4031d0d035e7e72779cef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 9 Nov 2017 09:21:44 +0900 Subject: Revert "net: usb: asix: fill null-ptr-deref in asix_suspend" This reverts commit baedf68a068ca29624f241426843635920f16e1d. There is an updated version of this fix which covers the problem more thoroughly. Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 743416be84f3..b2ff88e69a81 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv && priv->suspend) + if (priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); -- cgit From 8f5624629105589bcc23d0e51cc01bd8103d09a5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 6 Nov 2017 13:26:46 +0100 Subject: net: usb: asix: fill null-ptr-deref in asix_suspend When asix_suspend() is called dev->driver_priv might not have been assigned a value, so we need to check that it's not NULL. Similar issue is present in asix_resume(), this patch fixes it as well. Found by syzkaller. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc4-43422-geccacdd69a8c #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bb36300 task.stack: ffff88006bba8000 RIP: 0010:asix_suspend+0x76/0xc0 drivers/net/usb/asix_devices.c:629 RSP: 0018:ffff88006bbae718 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff880061ba3b80 RCX: 1ffff1000c34d644 RDX: 0000000000000001 RSI: 0000000000000402 RDI: 0000000000000008 RBP: ffff88006bbae738 R08: 1ffff1000d775cad R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800630a8b40 R13: 0000000000000000 R14: 0000000000000402 R15: ffff880061ba3b80 FS: 0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff33cf89000 CR3: 0000000061c0a000 CR4: 00000000000006f0 Call Trace: usb_suspend_interface drivers/usb/core/driver.c:1209 usb_suspend_both+0x27f/0x7e0 drivers/usb/core/driver.c:1314 usb_runtime_suspend+0x41/0x120 drivers/usb/core/driver.c:1852 __rpm_callback+0x339/0xb60 drivers/base/power/runtime.c:334 rpm_callback+0x106/0x220 drivers/base/power/runtime.c:461 rpm_suspend+0x465/0x1980 drivers/base/power/runtime.c:596 __pm_runtime_suspend+0x11e/0x230 drivers/base/power/runtime.c:1009 pm_runtime_put_sync_autosuspend ./include/linux/pm_runtime.h:251 usb_new_device+0xa37/0x1020 drivers/usb/core/hub.c:2487 hub_port_connect drivers/usb/core/hub.c:4903 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 worker_thread+0x221/0x1850 kernel/workqueue.c:2253 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: 8d 7c 24 20 48 89 fa 48 c1 ea 03 80 3c 02 00 75 5b 48 b8 00 00 00 00 00 fc ff df 4d 8b 6c 24 20 49 8d 7d 08 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 34 4d 8b 6d 08 4d 85 ed 74 0b e8 26 2b 51 fd 4c RIP: asix_suspend+0x76/0xc0 RSP: ffff88006bbae718 ---[ end trace dfc4f5649284342c ]--- Signed-off-by: Andrey Konovalov Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index b2ff88e69a81..3d4f7959dabb 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->suspend) + if (priv && priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); @@ -678,7 +678,7 @@ static int asix_resume(struct usb_interface *intf) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->resume) + if (priv && priv->resume) priv->resume(dev); return usbnet_resume(intf); -- cgit From c7e460ce55724d4e4e22d3126e5c47273819c53a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:18 -0800 Subject: Revert "net_sched: hold netns refcnt for each action" This reverts commit ceffcc5e254b450e6159f173e4538215cebf1b59. If we hold that refcnt, the netns can never be destroyed until all actions are destroyed by user, this breaks our netns design which we expect all actions are destroyed when we destroy the whole netns. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 4 +--- net/sched/act_api.c | 2 -- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 2 +- net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 4 ++-- net/sched/act_mirred.c | 2 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 2 +- 18 files changed, 18 insertions(+), 22 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 1e6df0eb058f..a10a3b1813f3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -14,7 +14,6 @@ struct tcf_idrinfo { spinlock_t lock; struct idr action_idr; - struct net *net; }; struct tc_action_ops; @@ -106,7 +105,7 @@ struct tc_action_net { static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, struct net *net) + const struct tc_action_ops *ops) { int err = 0; @@ -114,7 +113,6 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - tn->idrinfo->net = net; spin_lock_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ca2ff0b3123f..8f2c63514956 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -78,7 +78,6 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) spin_lock_bh(&idrinfo->lock); idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); spin_unlock_bh(&idrinfo->lock); - put_net(idrinfo->net); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } @@ -337,7 +336,6 @@ err3: p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); - get_net(idrinfo->net); *a = p; return 0; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9bce8cc84cbb..c0c707eb2c96 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, net); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 34e52d01a5dd..10b7a8855a6c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, net); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 35171df2ebef..1c40caadcff9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, net); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ef7f7f39d26d..e29a48ef7fc3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, net); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index f65e4b5058e0..8ccd35825b6b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -818,7 +818,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, net); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index dbdf3b2470d5..d9e399a7e3d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, net); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, net); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 84759cfd5a33..416627c66f08 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -343,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, net); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7eeaaf9217b6..c365d01b99c8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, net); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b3d82c334a5f..491fe5deb09e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, net); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 9ec42b26e4b9..3bb2ebf9e9ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, net); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index e69a1e3a39bf..8b5abcd2f32f 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, net); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index a8d0ea95f894..e7b57e5071a3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, net); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fbac62472e09..59949d61f20d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, net); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 8e12d8897d2f..b642ad3d39dd 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, net); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index c33faa373cf2..30c96274c638 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, net); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 115fc33cc6d8..16eb067a8d8f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -269,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, net); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) -- cgit From e4b95c41df36befcfd117210900cd790bc2cd048 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:19 -0800 Subject: net_sched: introduce tcf_exts_get_net() and tcf_exts_put_net() Instead of holding netns refcnt in tc actions, we can minimize the holding time by saving it in struct tcf_exts instead. This means we can just hold netns refcnt right before call_rcu() and release it after tcf_exts_destroy() is done. However, because on netns cleanup path we call tcf_proto_destroy() too, obviously we can not hold netns for a zero refcnt, in this case we have to do cleanup synchronously. It is fine for RCU too, the caller cleanup_net() already waits for a grace period. For other cases, refcnt is non-zero and we can safely grab it as normal and release it after we are done. This patch provides two new API for each filter to use: tcf_exts_get_net() and tcf_exts_put_net(). And all filters now can use the following pattern: void __destroy_filter() { tcf_exts_destroy(); tcf_exts_put_net(); // <== release netns refcnt kfree(); } void some_work() { rtnl_lock(); __destroy_filter(); rtnl_unlock(); } void some_rcu_callback() { tcf_queue_work(some_work); } if (tcf_exts_get_net()) // <== hold netns refcnt call_rcu(some_rcu_callback); else __destroy_filter(); Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 24 ++++++++++++++++++++++++ net/sched/cls_api.c | 1 + 2 files changed, 25 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 70ca2437740e..8826747ef83e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -94,6 +94,7 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; + struct net *net; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -107,6 +108,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + exts->net = NULL; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) @@ -117,6 +119,28 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } +/* Return false if the netns is being destroyed in cleanup_net(). Callers + * need to do cleanup synchronously in this case, otherwise may race with + * tc_action_net_exit(). Return true for other cases. + */ +static inline bool tcf_exts_get_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + exts->net = maybe_get_net(exts->net); + return exts->net != NULL; +#else + return true; +#endif +} + +static inline void tcf_exts_put_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + if (exts->net) + put_net(exts->net); +#endif +} + static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b2d310745487..ecbb019efcbd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -927,6 +927,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, exts->actions[i++] = act; exts->nr_actions = i; } + exts->net = net; } #else if ((exts->action && tb[exts->action]) || -- cgit From 0b2a59894b7657fab46b50f176bd772aa495044f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:20 -0800 Subject: cls_basic: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f177649a2419..e43c56d5b96a 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -85,16 +85,21 @@ static int basic_init(struct tcf_proto *tp) return 0; } +static void __basic_delete_filter(struct basic_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void basic_delete_filter_work(struct work_struct *work) { struct basic_filter *f = container_of(work, struct basic_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - tcf_em_tree_destroy(&f->ematches); + __basic_delete_filter(f); rtnl_unlock(); - - kfree(f); } static void basic_delete_filter(struct rcu_head *head) @@ -113,7 +118,10 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, basic_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, basic_delete_filter); + else + __basic_delete_filter(f); } kfree_rcu(head, rcu); } @@ -125,6 +133,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -219,6 +228,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (fold) { list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, basic_delete_filter); } else { list_add_rcu(&fnew->link, &head->flist); -- cgit From aae2c35ec89252639a97769fa72dbbf8f1cc3107 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:21 -0800 Subject: cls_bpf: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 037a3ae86829..990eb4d91d54 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -249,6 +249,7 @@ static int cls_bpf_init(struct tcf_proto *tp) static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); @@ -282,7 +283,10 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + if (tcf_exts_get_net(&prog->exts)) + call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + else + __cls_bpf_delete_prog(prog); } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) @@ -516,6 +520,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); + tcf_exts_get_net(&oldprog->exts); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); } else { list_add_rcu(&prog->link, &head->plist); -- cgit From ed1481681414e4d4264ad46864d5c1da5ff6ccb1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:22 -0800 Subject: cls_cgroup: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_cgroup.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index a97e069bee89..309d5899265f 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -60,15 +60,21 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void __cls_cgroup_destroy(struct cls_cgroup_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void cls_cgroup_destroy_work(struct work_struct *work) { struct cls_cgroup_head *head = container_of(work, struct cls_cgroup_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + __cls_cgroup_destroy(head); rtnl_unlock(); } @@ -124,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; rcu_assign_pointer(tp->root, new); - if (head) + if (head) { + tcf_exts_get_net(&head->exts); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + } return 0; errout: tcf_exts_destroy(&new->exts); @@ -138,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ - if (head) - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + if (head) { + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + else + __cls_cgroup_destroy(head); + } } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) -- cgit From 22f7cec93f0af86c4b66bf34a977da9d7cef076e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:23 -0800 Subject: cls_flow: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 67f3a2af6aab..85f765cff697 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -372,15 +372,21 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter_work(struct work_struct *work) +static void __flow_destroy_filter(struct flow_filter *f) { - struct flow_filter *f = container_of(work, struct flow_filter, work); - - rtnl_lock(); del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); kfree(f); +} + +static void flow_destroy_filter_work(struct work_struct *work) +{ + struct flow_filter *f = container_of(work, struct flow_filter, work); + + rtnl_lock(); + __flow_destroy_filter(f); rtnl_unlock(); } @@ -552,8 +558,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, *arg = fnew; - if (fold) + if (fold) { + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, flow_destroy_filter); + } return 0; err2: @@ -570,6 +578,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) struct flow_filter *f = arg; list_del_rcu(&f->list); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, flow_destroy_filter); *last = list_empty(&head->filters); return 0; @@ -594,7 +603,10 @@ static void flow_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); - call_rcu(&f->rcu, flow_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, flow_destroy_filter); + else + __flow_destroy_filter(f); } kfree_rcu(head, rcu); } -- cgit From 0dadc117ac8bc78d8144e862ac8ad23f342f9ea8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:24 -0800 Subject: cls_flower: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5b5722c8b32c..7a838d1c1c00 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -218,13 +218,19 @@ static int fl_init(struct tcf_proto *tp) return 0; } +static void __fl_destroy_filter(struct cls_fl_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fl_destroy_filter_work(struct work_struct *work) { struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fl_destroy_filter(f); rtnl_unlock(); } @@ -318,7 +324,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fl_destroy_filter); + else + __fl_destroy_filter(f); } static void fl_destroy_sleepable(struct work_struct *work) @@ -988,6 +997,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { list_add_tail_rcu(&fnew->list, &head->filters); -- cgit From d5f984f5af1d926bc9c7a7f90e7a1e1e313a8ba7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:25 -0800 Subject: cls_fw: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 99183b8621ec..7f45e5ab8afc 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -122,13 +122,19 @@ static int fw_init(struct tcf_proto *tp) return 0; } +static void __fw_delete_filter(struct fw_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fw_delete_filter_work(struct work_struct *work) { struct fw_filter *f = container_of(work, struct fw_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fw_delete_filter(f); rtnl_unlock(); } @@ -154,7 +160,10 @@ static void fw_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fw_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fw_delete_filter); + else + __fw_delete_filter(f); } } kfree_rcu(head, rcu); @@ -179,6 +188,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); ret = 0; break; @@ -299,6 +309,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); *arg = fnew; -- cgit From 57767e785321a427b8cdd282db2b8b33cd218ffa Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:26 -0800 Subject: cls_matchall: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index c33f711b9019..3684153cd8a9 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -44,13 +44,19 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void __mall_destroy(struct cls_mall_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void mall_destroy_work(struct work_struct *work) { struct cls_mall_head *head = container_of(work, struct cls_mall_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - kfree(head); + __mall_destroy(head); rtnl_unlock(); } @@ -109,7 +115,10 @@ static void mall_destroy(struct tcf_proto *tp) if (tc_should_offload(dev, head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&head->rcu, mall_destroy_rcu); + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, mall_destroy_rcu); + else + __mall_destroy(head); } static void *mall_get(struct tcf_proto *tp, u32 handle) -- cgit From 3fd51de5e3ba447624a08a8ba29f90d94f0fe909 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:27 -0800 Subject: cls_route: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_route.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 4b14ccd8b8f2..ac9a5b8825b9 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -257,13 +257,19 @@ static int route4_init(struct tcf_proto *tp) return 0; } +static void __route4_delete_filter(struct route4_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void route4_delete_filter_work(struct work_struct *work) { struct route4_filter *f = container_of(work, struct route4_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __route4_delete_filter(f); rtnl_unlock(); } @@ -297,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp) next = rtnl_dereference(f->next); RCU_INIT_POINTER(b->ht[h2], next); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, route4_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, route4_delete_filter); + else + __route4_delete_filter(f); } } RCU_INIT_POINTER(head->table[h1], NULL); @@ -338,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) /* Delete it */ tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, route4_delete_filter); /* Strip RTNL protected tree */ @@ -541,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, *arg = f; if (fold) { tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, route4_delete_filter); } return 0; -- cgit From 96585063a27f0704dcf7a09f8b78edd6a8973965 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:28 -0800 Subject: cls_rsvp: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index bdbc541787f8..cf325625c99d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -285,13 +285,19 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } +static void __rsvp_delete_filter(struct rsvp_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void rsvp_delete_filter_work(struct work_struct *work) { struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __rsvp_delete_filter(f); rtnl_unlock(); } @@ -310,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - call_rcu(&f->rcu, rsvp_delete_filter_rcu); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, rsvp_delete_filter_rcu); + else + __rsvp_delete_filter(f); } static void rsvp_destroy(struct tcf_proto *tp) -- cgit From f2b751053ee9314e82c178f6ca0fee7e160fac95 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:29 -0800 Subject: cls_tcindex: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index beaa95e09c25..a76937ee0b2d 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -139,13 +139,19 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) +{ + tcf_exts_destroy(&r->exts); + tcf_exts_put_net(&r->exts); +} + static void tcindex_destroy_rexts_work(struct work_struct *work) { struct tcindex_filter_result *r; r = container_of(work, struct tcindex_filter_result, work); rtnl_lock(); - tcf_exts_destroy(&r->exts); + __tcindex_destroy_rexts(r); rtnl_unlock(); } @@ -158,14 +164,20 @@ static void tcindex_destroy_rexts(struct rcu_head *head) tcf_queue_work(&r->work); } +static void __tcindex_destroy_fexts(struct tcindex_filter *f) +{ + tcf_exts_destroy(&f->result.exts); + tcf_exts_put_net(&f->result.exts); + kfree(f); +} + static void tcindex_destroy_fexts_work(struct work_struct *work) { struct tcindex_filter *f = container_of(work, struct tcindex_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->result.exts); - kfree(f); + __tcindex_destroy_fexts(f); rtnl_unlock(); } @@ -210,10 +222,17 @@ found: * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - if (f) - call_rcu(&f->rcu, tcindex_destroy_fexts); - else - call_rcu(&r->rcu, tcindex_destroy_rexts); + if (f) { + if (tcf_exts_get_net(&f->result.exts)) + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + __tcindex_destroy_fexts(f); + } else { + if (tcf_exts_get_net(&r->exts)) + call_rcu(&r->rcu, tcindex_destroy_rexts); + else + __tcindex_destroy_rexts(r); + } *last = false; return 0; -- cgit From 35c55fc156d85a396a975fc17636f560fc02fd65 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:30 -0800 Subject: cls_u32: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index dadd1b344497..b58eccb21f03 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -399,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); + tcf_exts_put_net(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF @@ -476,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); + tcf_exts_get_net(&key->exts); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } @@ -588,7 +590,10 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); - call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + if (tcf_exts_get_net(&n->exts)) + call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + else + u32_destroy_key(n->tp, n, true); } } } @@ -949,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); + tcf_exts_get_net(&n->exts); call_rcu(&n->rcu, u32_delete_key_rcu); return 0; } -- cgit From 75ee94b20b46459e3d29f5ac2c3af3cebdeef777 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 9 Nov 2017 08:48:08 +0800 Subject: ALSA: hda - fix headset mic problem for Dell machines with alc274 Confirmed with Kailang of Realtek, the pin 0x19 is for Headset Mic, and the pin 0x1a is for Headphone Mic, he suggested to apply ALC269_FIXUP_DELL1_MIC_NO_PRESENCE to fix this problem. And we verified applying this FIXUP can fix this problem. Cc: Cc: Kailang Yang Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 546d515f3c1f..dce0682c5001 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6544,6 +6544,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x1b, 0x90a70130}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0xb7a60130}, + {0x13, 0xb8a61140}, + {0x16, 0x90170110}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, {0x12, 0x90a60130}, {0x14, 0x90170110}, -- cgit From 1e37f2f84680fa7f8394fd444b6928e334495ccc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 6 Nov 2017 11:33:36 +0100 Subject: rbd: use GFP_NOIO for parent stat and data requests rbd_img_obj_exists_submit() and rbd_img_obj_parent_read_full() are on the writeback path for cloned images -- we attempt a stat on the parent object to see if it exists and potentially read it in to call copyup. GFP_NOIO should be used instead of GFP_KERNEL here. Cc: stable@vger.kernel.org Link: http://tracker.ceph.com/issues/22014 Signed-off-by: Ilya Dryomov Reviewed-by: David Disseldorp --- drivers/block/rbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b640ad8a6d20..adc877dfef5c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2692,7 +2692,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) * from the parent. */ page_count = (u32)calc_pages_for(0, length); - pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + pages = ceph_alloc_page_vector(page_count, GFP_NOIO); if (IS_ERR(pages)) { result = PTR_ERR(pages); pages = NULL; @@ -2827,7 +2827,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) */ size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32); page_count = (u32)calc_pages_for(0, size); - pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + pages = ceph_alloc_page_vector(page_count, GFP_NOIO); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto fail_stat_request; -- cgit From b6af53b7d6fa40262c16753fe2781a3e792d5e1b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Nov 2017 16:43:09 +0100 Subject: perf tools: Add "reject" option for parse-events.l Arnaldo reported broken builds in some distros using a newer flex release, 2.6.4, found in Alpine Linux 3.6 and Edge, with flex not spotting the REJECT macro: CC /tmp/build/perf/util/parse-events-flex.o util/parse-events.l: In function 'parse_events_lex': /tmp/build/perf/util/parse-events-flex.c:4734:16: error: \ 'reject_used_but_not_detected' undeclared (first use in this function) It's happening because we put the REJECT under another USER_REJECT macro in following commit: 9445464bb831 perf tools: Unwind properly location after REJECT Fortunately flex provides option for force it to use REJECT, adding it to parse-events.l. Reported-by: Arnaldo Carvalho de Melo Reported-by: Markus Trippelsdorf Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Fixes: 9445464bb831 ("perf tools: Unwind properly location after REJECT") Link: http://lkml.kernel.org/n/tip-7kdont984mw12ijk7rji6b8p@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 6680e4fb7967..dc5f40e86e51 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -5,6 +5,7 @@ %option stack %option bison-locations %option yylineno +%option reject %{ #include -- cgit From a271bfaf30ce8f7f57c6b65c47bff6d306d0b758 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 9 Nov 2017 10:02:10 +0100 Subject: perf tools: Fix eBPF event specification parsing Looks like I've reached the new level of stupidity, adding missing braces. Committer testing: Given the following eBPF C filter, that will add a record when it returns true, i.e. when the tv_nsec variable is > 2000ns, should be built and installed via sys_bpf(), but fails to do so before this patch: # cat filter.c #include #define SEC(NAME) __attribute__((section(NAME), used)) SEC("func=hrtimer_nanosleep rqtp->tv_nsec") int func(void *ctx, int err, long nsec) { return nsec > 1000; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; # # perf trace -e nanosleep,filter.c usleep 1 invalid or unsupported event: 'filter.c' Run 'perf list' for a list of valid events Usage: perf trace [] [] or: perf trace [] -- [] or: perf trace record [] [] or: perf trace record [] -- [] -e, --event event/syscall selector. use 'perf list' to list available events # And works again after it is applied, the nothing is inserted when the co # perf trace -e *sleep,filter.c usleep 1 0.000 ( 0.066 ms): usleep/23994 nanosleep(rqtp: 0x7ffead94a0d0) = 0 # perf trace -e *sleep,filter.c usleep 2 0.000 ( 0.008 ms): usleep/24378 nanosleep(rqtp: 0x7fffa021ba50) ... 0.008 ( ): perf_bpf_probe:func:(ffffffffb410cb30) tv_nsec=2000) 0.000 ( 0.066 ms): usleep/24378 ... [continued]: nanosleep()) = 0 # The intent of 9445464bb831 is kept: # perf stat -e 'cpu/uops_executed.core,krava/' true event syntax error: '..cuted.core,krava/' \___ unknown term valid terms: cmask,pc,event,edge,in_tx,any,ldlat,inv,umask,in_tx_cp,offcore_rsp,config,config1,config2,name,period Run 'perf list' for a list of valid events Usage: perf stat [] [] -e, --event event selector. use 'perf list' to list available events # # perf stat -e 'cpu/uops_executed.core,period=1/' true Performance counter stats for 'true': 808,332 cpu/uops_executed.core,period=1/ 0.002997237 seconds time elapsed # Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Namhyung Kim Fixes: 9445464bb831 ("perf tools: Unwind properly location after REJECT") Link: http://lkml.kernel.org/n/tip-diea0ihbwpxfw6938huv3whj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index dc5f40e86e51..025729510525 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -340,8 +340,8 @@ r{num_raw_hex} { return raw(yyscanner); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } -{bpf_object} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_OBJECT); } -{bpf_source} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_SOURCE); } +{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } +{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } {name} { return pmu_str_check(yyscanner); } "/" { BEGIN(config); return '/'; } - { return '-'; } -- cgit From 33974a414ce2324554f75dbd204ff0868f499e32 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 7 Nov 2017 16:22:45 -0800 Subject: perf trace: Call machine__exit() at exit Otherwise 'perf trace' leaves a temporary file /tmp/perf-vdso.so-XXXXXX. $ perf trace -o log true $ ls -l /tmp/perf-vdso.* -rw------- 1 root root 8192 Nov 8 03:08 /tmp/perf-vdso.so-5bCpD0 Signed-off-by: Andrei Vagin Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vasily Averin Link: http://lkml.kernel.org/r/20171108002246.8924-1-avagin@openvz.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 771ddab94bb0..d5d7fff1c211 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1138,6 +1138,14 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) return err; } +static void trace__symbols__exit(struct trace *trace) +{ + machine__exit(trace->host); + trace->host = NULL; + + symbol__exit(); +} + static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) { int idx; @@ -2481,6 +2489,8 @@ out_disable: } out_delete_evlist: + trace__symbols__exit(trace); + perf_evlist__delete(evlist); trace->evlist = NULL; trace->live = false; -- cgit From be739f4b5ddece74ef25e2304b17a7fd24575e9b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 7 Nov 2017 13:38:04 +0300 Subject: x86/mm: Fix ELF_ET_DYN_BASE for 5-level paging On machines with 5-level paging we don't want to allocate mapping above 47-bit unless user explicitly asked for it. See b569bab78d8d ("x86/mm: Prepare to expose larger address space to userspace") for details. c715b72c1ba4 ("mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes") broke the behaviour. After the commit elf binary and heap got mapped above 47-bits. Use DEFAULT_MAP_WINDOW instead of TASK_SIZE to determine ELF_ET_DYN_BASE so it's forced to be below 47-bits unconditionally. Fixes: c715b72c1ba4 ("mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Acked-by: Michal Hocko Cc: Kees Cook Cc: Nicholas Piggin Cc: linux-mm@kvack.org Cc: Andrew Morton Link: https://lkml.kernel.org/r/20171107103804.47341-1-kirill.shutemov@linux.intel.com --- arch/x86/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index c1a125e47ff3..3a091cea36c5 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -253,7 +253,7 @@ extern int force_personality32; * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - (TASK_SIZE / 3 * 2)) + (DEFAULT_MAP_WINDOW / 3 * 2)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, -- cgit From e609a6b85182f8a479d265839d3ff9aad2e67b18 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 9 Nov 2017 13:38:18 -0800 Subject: sysctl: add register_sysctl() dummy helper register_sysctl() has been around for five years with commit fea478d4101a ("sysctl: Add register_sysctl for normal sysctl users") but now that arm64 started using it, I ran into a compile error: arch/arm64/kernel/armv8_deprecated.c: In function 'register_insn_emulation_sysctl': arch/arm64/kernel/armv8_deprecated.c:257:2: error: implicit declaration of function 'register_sysctl' This adds a inline function like we already have for register_sysctl_paths() and register_sysctl_table(). Link: http://lkml.kernel.org/r/20171106133700.558647-1-arnd@arndb.de Fixes: 38b9aeb32fa7 ("arm64: Port deprecated instruction emulation to new sysctl interface") Signed-off-by: Arnd Bergmann Reviewed-by: Dave Martin Acked-by: Kees Cook Acked-by: Will Deacon Cc: Catalin Marinas Cc: "Luis R. Rodriguez" Cc: Alex Benne Cc: Arnd Bergmann Cc: "Eric W. Biederman" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 74f91eefeccf..b769ecfcc3bd 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -213,6 +213,11 @@ static inline struct ctl_table_header *register_sysctl_paths( return NULL; } +static inline struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) +{ + return NULL; +} + static inline void unregister_sysctl_table(struct ctl_table_header * table) { } -- cgit From 60fdb44a23cbc5d8db224577e14ea667d7c53478 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Thu, 9 Nov 2017 13:38:21 -0800 Subject: MAINTAINERS: update TPM driver infrastructure changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [akpm@linux-foundation.org: alpha-sort CREDITS, per Randy] Link: http://lkml.kernel.org/r/20170915223811.21368-1-jarkko.sakkinen@linux.intel.com Signed-off-by: Jarkko Sakkinen Cc: Marcel Selhorst Cc: Ashley Lai Cc: Mimi Zohar Cc: Jarkko Sakkinen Cc: Boris Brezillon Cc: Borislav Petkov Cc: Håvard Skinnemoen Cc: Martin Kepplinger Cc: Pavel Machek Cc: Geert Uytterhoeven Cc: Hans-Christian Noren Egtvedt Cc: Arnaldo Carvalho de Melo Cc: Gertjan van Wingerde Cc: Greg Kroah-Hartman Cc: David S. Miller Cc: Mauro Carvalho Chehab Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- CREDITS | 9 ++++++++- MAINTAINERS | 13 ++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/CREDITS b/CREDITS index 9fbd2c77b546..a3ec0c744172 100644 --- a/CREDITS +++ b/CREDITS @@ -2113,6 +2113,10 @@ S: J. Obrechtstr 23 S: NL-5216 GP 's-Hertogenbosch S: The Netherlands +N: Ashley Lai +E: ashleydlai@gmail.com +D: IBM VTPM driver + N: Savio Lam E: lam836@cs.cuhk.hk D: Author of the dialog utility, foundation @@ -3333,6 +3337,10 @@ S: Braunschweiger Strasse 79 S: 31134 Hildesheim S: Germany +N: Marcel Selhorst +E: tpmdd@selhorst.net +D: TPM driver + N: Darren Senn E: sinster@darkwater.com D: Whatever I notice needs doing (so far: itimers, /proc) @@ -4128,7 +4136,6 @@ D: MD driver D: EISA/sysfs subsystem S: France - # Don't add your name here, unless you really _are_ after Marc # alphabetically. Leonard used to be very proud of being the # last entry, and he'll get positively pissed if he can't even diff --git a/MAINTAINERS b/MAINTAINERS index a7995c737728..89e5ba530aa3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13603,23 +13603,14 @@ F: drivers/platform/x86/toshiba-wmi.c TPM DEVICE DRIVER M: Peter Huewe -M: Marcel Selhorst M: Jarkko Sakkinen R: Jason Gunthorpe -W: http://tpmdd.sourceforge.net -L: tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers) -Q: https://patchwork.kernel.org/project/tpmdd-devel/list/ +L: linux-integrity@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-integrity/list/ T: git git://git.infradead.org/users/jjs/linux-tpmdd.git S: Maintained F: drivers/char/tpm/ -TPM IBM_VTPM DEVICE DRIVER -M: Ashley Lai -W: http://tpmdd.sourceforge.net -L: tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers) -S: Maintained -F: drivers/char/tpm/tpm_ibmvtpm* - TRACING M: Steven Rostedt M: Ingo Molnar -- cgit From 1cb483a5cc84b497afb51a6c5dfb5a38a0b67086 Mon Sep 17 00:00:00 2001 From: Håkon Bugge Date: Tue, 7 Nov 2017 16:33:34 +0100 Subject: rds: ib: Fix NULL pointer dereference in debug code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rds_ib_recv_refill() is a function that refills an IB receive queue. It can be called from both the CQE handler (tasklet) and a worker thread. Just after the call to ib_post_recv(), a debug message is printed with rdsdebug(): ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), (long) ib_sg_dma_address( ic->i_cm_id->device, &recv->r_frag->f_sg), ret); Now consider an invocation of rds_ib_recv_refill() from the worker thread, which is preemptible. Further, assume that the worker thread is preempted between the ib_post_recv() and rdsdebug() statements. Then, if the preemption is due to a receive CQE event, the rds_ib_recv_cqe_handler() will be invoked. This function processes receive completions, including freeing up data structures, such as the recv->r_frag. In this scenario, rds_ib_recv_cqe_handler() will process the receive WR posted above. That implies, that the recv->r_frag has been freed before the above rdsdebug() statement has been executed. When it is later executed, we will have a NULL pointer dereference: [ 4088.068008] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [ 4088.076754] IP: rds_ib_recv_refill+0x87/0x620 [rds_rdma] [ 4088.082686] PGD 0 P4D 0 [ 4088.085515] Oops: 0000 [#1] SMP [ 4088.089015] Modules linked in: rds_rdma(OE) rds(OE) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) mlx4_ib(E) ib_ipoib(E) rdma_ucm(E) ib_ucm(E) ib_uverbs(E) ib_umad(E) rdma_cm(E) ib_cm(E) iw_cm(E) ib_core(E) binfmt_misc(E) sb_edac(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) pcbc(E) aesni_intel(E) crypto_simd(E) iTCO_wdt(E) glue_helper(E) iTCO_vendor_support(E) sg(E) cryptd(E) pcspkr(E) ipmi_si(E) ipmi_devintf(E) ipmi_msghandler(E) shpchp(E) ioatdma(E) i2c_i801(E) wmi(E) lpc_ich(E) mei_me(E) mei(E) mfd_core(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) ip_tables(E) ext4(E) mbcache(E) jbd2(E) fscrypto(E) mgag200(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) [ 4088.168486] fb_sys_fops(E) ahci(E) ixgbe(E) libahci(E) ttm(E) mdio(E) ptp(E) pps_core(E) drm(E) sd_mod(E) libata(E) crc32c_intel(E) mlx4_core(E) i2c_core(E) dca(E) megaraid_sas(E) dm_mirror(E) dm_region_hash(E) dm_log(E) dm_mod(E) [last unloaded: rds] [ 4088.193442] CPU: 20 PID: 1244 Comm: kworker/20:2 Tainted: G OE 4.14.0-rc7.master.20171105.ol7.x86_64 #1 [ 4088.205097] Hardware name: Oracle Corporation ORACLE SERVER X5-2L/ASM,MOBO TRAY,2U, BIOS 31110000 03/03/2017 [ 4088.216074] Workqueue: ib_cm cm_work_handler [ib_cm] [ 4088.221614] task: ffff885fa11d0000 task.stack: ffffc9000e598000 [ 4088.228224] RIP: 0010:rds_ib_recv_refill+0x87/0x620 [rds_rdma] [ 4088.234736] RSP: 0018:ffffc9000e59bb68 EFLAGS: 00010286 [ 4088.240568] RAX: 0000000000000000 RBX: ffffc9002115d050 RCX: ffffc9002115d050 [ 4088.248535] RDX: ffffffffa0521380 RSI: ffffffffa0522158 RDI: ffffffffa0525580 [ 4088.256498] RBP: ffffc9000e59bbf8 R08: 0000000000000005 R09: 0000000000000000 [ 4088.264465] R10: 0000000000000339 R11: 0000000000000001 R12: 0000000000000000 [ 4088.272433] R13: ffff885f8c9d8000 R14: ffffffff81a0a060 R15: ffff884676268000 [ 4088.280397] FS: 0000000000000000(0000) GS:ffff885fbec80000(0000) knlGS:0000000000000000 [ 4088.289434] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4088.295846] CR2: 0000000000000020 CR3: 0000000001e09005 CR4: 00000000001606e0 [ 4088.303816] Call Trace: [ 4088.306557] rds_ib_cm_connect_complete+0xe0/0x220 [rds_rdma] [ 4088.312982] ? __dynamic_pr_debug+0x8c/0xb0 [ 4088.317664] ? __queue_work+0x142/0x3c0 [ 4088.321944] rds_rdma_cm_event_handler+0x19e/0x250 [rds_rdma] [ 4088.328370] cma_ib_handler+0xcd/0x280 [rdma_cm] [ 4088.333522] cm_process_work+0x25/0x120 [ib_cm] [ 4088.338580] cm_work_handler+0xd6b/0x17aa [ib_cm] [ 4088.343832] process_one_work+0x149/0x360 [ 4088.348307] worker_thread+0x4d/0x3e0 [ 4088.352397] kthread+0x109/0x140 [ 4088.355996] ? rescuer_thread+0x380/0x380 [ 4088.360467] ? kthread_park+0x60/0x60 [ 4088.364563] ret_from_fork+0x25/0x30 [ 4088.368548] Code: 48 89 45 90 48 89 45 98 eb 4d 0f 1f 44 00 00 48 8b 43 08 48 89 d9 48 c7 c2 80 13 52 a0 48 c7 c6 58 21 52 a0 48 c7 c7 80 55 52 a0 <4c> 8b 48 20 44 89 64 24 08 48 8b 40 30 49 83 e1 fc 48 89 04 24 [ 4088.389612] RIP: rds_ib_recv_refill+0x87/0x620 [rds_rdma] RSP: ffffc9000e59bb68 [ 4088.397772] CR2: 0000000000000020 [ 4088.401505] ---[ end trace fe922e6ccf004431 ]--- This bug was provoked by compiling rds out-of-tree with EXTRA_CFLAGS="-DRDS_DEBUG -DDEBUG" and inserting an artificial delay between the rdsdebug() and ib_ib_port_recv() statements: /* XXX when can this fail? */ ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); + if (can_wait) + usleep_range(1000, 5000); rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), (long) ib_sg_dma_address( The fix is simply to move the rdsdebug() statement up before the ib_post_recv() and remove the printing of ret, which is taken care of anyway by the non-debug code. Signed-off-by: Håkon Bugge Reviewed-by: Knut Omang Reviewed-by: Wei Lin Guay Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_recv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 9722bf839d9d..b4e421aa9727 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) break; } - /* XXX when can this fail? */ - ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); - rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, + rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), (long) ib_sg_dma_address( ic->i_cm_id->device, - &recv->r_frag->f_sg), - ret); + &recv->r_frag->f_sg)); + + /* XXX when can this fail? */ + ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); if (ret) { rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " -- cgit From b8cce68bf1f1b773ac1a535707f968512b3c1e5f Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Sun, 29 Oct 2017 22:40:56 -0500 Subject: net/mlx5: Loop over temp list to release delay events list_splice_init initializing waiting_events_list after splicing it to temp list, therefore we should loop over temp list to fire the events. Fixes: 4ca637a20a52 ("net/mlx5: Delay events till mlx5 interface's add complete for pci resume") Signed-off-by: Huy Nguyen Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index fc281712869b..17b723218b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -93,7 +93,7 @@ static void delayed_event_release(struct mlx5_device_context *dev_ctx, list_splice_init(&priv->waiting_events_list, &temp); if (!dev_ctx->context) goto out; - list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) + list_for_each_entry_safe(de, n, &temp, list) dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); out: -- cgit From d2aa060d40fa060e963f9a356d43481e43ba3dac Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 26 Sep 2017 15:11:56 -0500 Subject: net/mlx5: Cancel health poll before sending panic teardown command After the panic teardown firmware command, health_care detects the error in PCI bus and calls the mlx5_pci_err_detected. This health_care flow is no longer needed because the panic teardown firmware command will bring down the PCI bus communication with the HCA. The solution is to cancel the health care timer and its pending workqueue request before sending panic teardown firmware command. Kernel trace: mlx5_core 0033:01:00.0: Shutdown was called mlx5_core 0033:01:00.0: health_care:154:(pid 9304): handling bad device here mlx5_core 0033:01:00.0: mlx5_handle_bad_state:114:(pid 9304): NIC state 1 mlx5_core 0033:01:00.0: mlx5_pci_err_detected was called mlx5_core 0033:01:00.0: mlx5_enter_error_state:96:(pid 9304): start mlx5_3:mlx5_ib_event:3061:(pid 9304): warning: event on port 0 mlx5_core 0033:01:00.0: mlx5_enter_error_state:104:(pid 9304): end Unable to handle kernel paging request for data at address 0x0000003f Faulting instruction address: 0xc0080000434b8c80 Fixes: 8812c24d28f4 ('net/mlx5: Add fast unload support in shutdown flow') Signed-off-by: Huy Nguyen Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0d2c8dcd6eae..06562c9a6b9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1482,9 +1482,16 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) return -EAGAIN; } + /* Panic tear down fw command will stop the PCI bus communication + * with the HCA, so the health polll is no longer needed. + */ + mlx5_drain_health_wq(dev); + mlx5_stop_health_poll(dev); + ret = mlx5_cmd_force_teardown_hca(dev); if (ret) { mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + mlx5_start_health_poll(dev); return ret; } -- cgit From 2a8d6065e7b90ad9d5540650944d802b0f86bdfe Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 31 Oct 2017 15:34:00 -0700 Subject: net/mlx5e: Fix napi poll with zero budget napi->poll can be called with budget 0, e.g. in netpoll scenarios where the caller only wants to poll TX rings (poll_one_napi@net/core/netpoll.c). The below commit changed RX polling from "while" loop to "do {} while", which caused to ignore the initial budget and handle at least one RX packet. This fixes the following warning: [ 2852.049194] mlx5e_napi_poll+0x0/0x260 [mlx5_core] exceeded budget in poll [ 2852.049195] ------------[ cut here ]------------ [ 2852.049195] WARNING: CPU: 0 PID: 25691 at net/core/netpoll.c:171 netpoll_poll_dev+0x18a/0x1a0 Fixes: 4b7dfc992514 ("net/mlx5e: Early-return on empty completion queues") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan Reported-by: Martin KaFai Lau Tested-by: Martin KaFai Lau Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index e906b754415c..ab92298eafc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,7 +49,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); bool busy = false; - int work_done; + int work_done = 0; int i; for (i = 0; i < c->num_tc; i++) @@ -58,15 +58,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (c->xdp) busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); - work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); - busy |= work_done == budget; + if (likely(budget)) { /* budget=0 means: don't poll rx rings */ + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + busy |= work_done == budget; + } busy |= c->rq.post_wqes(&c->rq); if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) return budget; - if (work_done == budget) + if (budget && work_done == budget) work_done--; } -- cgit From 2e50b2619538ea0224c037f6fa746023089e0654 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Sun, 15 Oct 2017 17:30:59 +0300 Subject: net/mlx5e: Set page to null in case dma mapping fails Currently, when dma mapping fails, put_page is called, but the page is not set to null. Later, in the page_reuse treatment in mlx5e_free_rx_descs(), mlx5e_page_release() is called for the second time, improperly doing dma_unmap (for a non-mapped address) and an extra put_page. Prevent this by nullifying the page pointer when dma_map fails. Fixes: accd58833237 ("net/mlx5e: Introduce RX Page-Reuse") Signed-off-by: Inbar Karmy Reviewed-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 15a1687483cc..91b1b0938931 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -215,22 +215,20 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - struct page *page; - if (mlx5e_rx_cache_get(rq, dma_info)) return 0; - page = dev_alloc_pages(rq->buff.page_order); - if (unlikely(!page)) + dma_info->page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!dma_info->page)) return -ENOMEM; - dma_info->addr = dma_map_page(rq->pdev, page, 0, + dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, RQ_PAGE_SIZE(rq), rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(page); + put_page(dma_info->page); + dma_info->page = NULL; return -ENOMEM; } - dma_info->page = page; return 0; } -- cgit From d1c61e6d79ea0d4d53dc18bcd2db30ef2d99cfa7 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 12 Jan 2017 17:11:45 +0200 Subject: net/mlx5e: Increase Striding RQ minimum size limit to 4 multi-packet WQEs This is to prevent the case of working with a single MPWQE (1 WQE is always reserved as RQ is linked-list). When the WQE is fully consumed, HW should still have available buffer in order not to drop packets. Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") Signed-off-by: Eugenia Emantayev Reviewed-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc13d3dbd366..13b5ef9d8703 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -67,7 +67,7 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd -#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 -- cgit From b8347c2196492f4e1cccde3d92fda1cc2cc7de7e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 24 Jul 2017 13:04:28 +0300 Subject: x86/debug: Handle warnings before the notifier chain, to fix KGDB crash Commit: 9a93848fe787 ("x86/debug: Implement __WARN() using UD0") turned warnings into UD0, but the fixup code only runs after the notify_die() chain. This is a problem, in particular, with kgdb, which kicks in as if it was a BUG(). Fix this by running the fixup code before the notifier chain in the invalid op handler path. Signed-off-by: Alexander Shishkin Tested-by: Ilya Dryomov Acked-by: Daniel Thompson Acked-by: Thomas Gleixner Cc: Jason Wessel Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Richard Weinberger Cc: # v4.12+ Link: http://lkml.kernel.org/r/20170724100428.19173-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 67db4f43309e..5a6b8f809792 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -209,9 +209,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, if (fixup_exception(regs, trapnr)) return 0; - if (fixup_bug(regs, trapnr)) - return 0; - tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -292,6 +289,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + /* + * WARN*()s end up here; fix them up before we call the + * notifier chain. + */ + if (!user_mode(regs) && fixup_bug(regs, trapnr)) + return; + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); -- cgit From fb5f0b3ef69b95e665e4bbe8a3de7201f09f1071 Mon Sep 17 00:00:00 2001 From: Richard Schütz Date: Sun, 29 Oct 2017 13:03:22 +0100 Subject: can: c_can: don't indicate triple sampling support for D_CAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The D_CAN controller doesn't provide a triple sampling mode, so don't set the CAN_CTRLMODE_3_SAMPLES flag in ctrlmode_supported. Currently enabling triple sampling is a no-op. Signed-off-by: Richard Schütz Cc: linux-stable # >= v3.6 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can_pci.c | 1 - drivers/net/can/c_can/c_can_platform.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c index cf7c18947189..d065c0e2d18e 100644 --- a/drivers/net/can/c_can/c_can_pci.c +++ b/drivers/net/can/c_can/c_can_pci.c @@ -178,7 +178,6 @@ static int c_can_pci_probe(struct pci_dev *pdev, break; case BOSCH_D_CAN: priv->regs = reg_map_d_can; - priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; break; default: ret = -EINVAL; diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 46a746ee80bb..b5145a7f874c 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -320,7 +320,6 @@ static int c_can_plat_probe(struct platform_device *pdev) break; case BOSCH_D_CAN: priv->regs = reg_map_d_can; - priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; priv->read_reg = c_can_plat_read_reg_aligned_to_16bit; priv->write_reg = c_can_plat_write_reg_aligned_to_16bit; priv->read_reg32 = d_can_plat_read_reg32; -- cgit From 4dcf924c2eda0c47a5c53b7703e3dc65ddaa8920 Mon Sep 17 00:00:00 2001 From: Gerhard Bertelsmann Date: Mon, 6 Nov 2017 18:16:56 +0100 Subject: can: sun4i: handle overrun in RX FIFO SUN4Is CAN IP has a 64 byte deep FIFO buffer. If the buffer is not drained fast enough (overrun) it's getting mangled. Already received frames are dropped - the data can't be restored. Signed-off-by: Gerhard Bertelsmann Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sun4i_can.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index b0c80859f746..1ac2090a1721 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -539,6 +539,13 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) } stats->rx_over_errors++; stats->rx_errors++; + + /* reset the CAN IP by entering reset mode + * ignoring timeout error + */ + set_reset_mode(dev); + set_normal_mode(dev); + /* clear bit */ sun4i_can_write_cmdreg(priv, SUN4I_CMD_CLEAR_OR_FLAG); } @@ -653,8 +660,9 @@ static irqreturn_t sun4i_can_interrupt(int irq, void *dev_id) netif_wake_queue(dev); can_led_event(dev, CAN_LED_EVENT_TX); } - if (isrc & SUN4I_INT_RBUF_VLD) { - /* receive interrupt */ + if ((isrc & SUN4I_INT_RBUF_VLD) && + !(isrc & SUN4I_INT_DATA_OR)) { + /* receive interrupt - don't read if overrun occurred */ while (status & SUN4I_STA_RBUF_RDY) { /* RX buffer is not empty */ sun4i_can_rx(dev); -- cgit From 4cbdd0ee67191481ec57ceed94febdfef95c9f25 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 9 Nov 2017 14:42:14 +0100 Subject: can: peak: Add support for new PCIe/M2 CAN FD interfaces This adds support for the following PEAK-System CAN FD interfaces: PCAN-cPCIe FD CAN FD Interface for cPCI Serial (2 or 4 channels) PCAN-PCIe/104-Express CAN FD Interface for PCIe/104-Express (1, 2 or 4 ch.) PCAN-miniPCIe FD CAN FD Interface for PCIe Mini (1, 2 or 4 channels) PCAN-PCIe FD OEM CAN FD Interface for PCIe OEM version (1, 2 or 4 ch.) PCAN-M.2 CAN FD Interface for M.2 (1 or 2 channels) Like the PCAN-PCIe FD interface, all of these boards run the same IP Core that is able to handle CAN FD (see also http://www.peak-system.com). Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_pciefd_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index 51c2d182a33a..b4efd711f824 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -29,14 +29,19 @@ #include "peak_canfd_user.h" MODULE_AUTHOR("Stephane Grosjean "); -MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe FD family cards"); -MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe FD CAN cards"); +MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe/M.2 FD family cards"); +MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe/M.2 FD CAN cards"); MODULE_LICENSE("GPL v2"); #define PCIEFD_DRV_NAME "peak_pciefd" #define PEAK_PCI_VENDOR_ID 0x001c /* The PCI device and vendor IDs */ #define PEAK_PCIEFD_ID 0x0013 /* for PCIe slot cards */ +#define PCAN_CPCIEFD_ID 0x0014 /* for Compact-PCI Serial slot cards */ +#define PCAN_PCIE104FD_ID 0x0017 /* for PCIe-104 Express slot cards */ +#define PCAN_MINIPCIEFD_ID 0x0018 /* for mini-PCIe slot cards */ +#define PCAN_PCIEFD_OEM_ID 0x0019 /* for PCIe slot OEM cards */ +#define PCAN_M2_ID 0x001a /* for M2 slot cards */ /* PEAK PCIe board access description */ #define PCIEFD_BAR0_SIZE (64 * 1024) @@ -203,6 +208,11 @@ struct pciefd_board { /* supported device ids. */ static const struct pci_device_id peak_pciefd_tbl[] = { {PEAK_PCI_VENDOR_ID, PEAK_PCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PEAK_PCI_VENDOR_ID, PCAN_CPCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PEAK_PCI_VENDOR_ID, PCAN_PCIE104FD_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PEAK_PCI_VENDOR_ID, PCAN_MINIPCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PEAK_PCI_VENDOR_ID, PCAN_PCIEFD_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PEAK_PCI_VENDOR_ID, PCAN_M2_ID, PCI_ANY_ID, PCI_ANY_ID,}, {0,} }; -- cgit From 7ec318feeed10a64c0359ec4d10889cb4defa39a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Nov 2017 15:15:04 -0800 Subject: tcp: gso: avoid refcount_t warning from tcp_gso_segment() When a GSO skb of truesize O is segmented into 2 new skbs of truesize N1 and N2, we want to transfer socket ownership to the new fresh skbs. In order to avoid expensive atomic operations on a cache line subject to cache bouncing, we replace the sequence : refcount_add(N1, &sk->sk_wmem_alloc); refcount_add(N2, &sk->sk_wmem_alloc); // repeated by number of segments refcount_sub(O, &sk->sk_wmem_alloc); by a single refcount_add(sum_of(N) - O, &sk->sk_wmem_alloc); Problem is : In some pathological cases, sum(N) - O might be a negative number, and syzkaller bot was apparently able to trigger this trace [1] atomic_t was ok with this construct, but we need to take care of the negative delta with refcount_t [1] refcount_t: saturated; leaking memory. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 8404 at lib/refcount.c:77 refcount_add_not_zero+0x198/0x200 lib/refcount.c:77 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 8404 Comm: syz-executor2 Not tainted 4.14.0-rc5-mm1+ #20 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1c4/0x1e0 kernel/panic.c:546 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:177 do_trap_no_signal arch/x86/kernel/traps.c:211 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:260 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:297 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:310 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 RIP: 0010:refcount_add_not_zero+0x198/0x200 lib/refcount.c:77 RSP: 0018:ffff8801c606e3a0 EFLAGS: 00010282 RAX: 0000000000000026 RBX: 0000000000001401 RCX: 0000000000000000 RDX: 0000000000000026 RSI: ffffc900036fc000 RDI: ffffed0038c0dc68 RBP: ffff8801c606e430 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8801d97f5eba R11: 0000000000000000 R12: ffff8801d5acf73c R13: 1ffff10038c0dc75 R14: 00000000ffffffff R15: 00000000fffff72f refcount_add+0x1b/0x60 lib/refcount.c:101 tcp_gso_segment+0x10d0/0x16b0 net/ipv4/tcp_offload.c:155 tcp4_gso_segment+0xd4/0x310 net/ipv4/tcp_offload.c:51 inet_gso_segment+0x60c/0x11c0 net/ipv4/af_inet.c:1271 skb_mac_gso_segment+0x33f/0x660 net/core/dev.c:2749 __skb_gso_segment+0x35f/0x7f0 net/core/dev.c:2821 skb_gso_segment include/linux/netdevice.h:3971 [inline] validate_xmit_skb+0x4ba/0xb20 net/core/dev.c:3074 __dev_queue_xmit+0xe49/0x2070 net/core/dev.c:3497 dev_queue_xmit+0x17/0x20 net/core/dev.c:3538 neigh_hh_output include/net/neighbour.h:471 [inline] neigh_output include/net/neighbour.h:479 [inline] ip_finish_output2+0xece/0x1460 net/ipv4/ip_output.c:229 ip_finish_output+0x85e/0xd10 net/ipv4/ip_output.c:317 NF_HOOK_COND include/linux/netfilter.h:238 [inline] ip_output+0x1cc/0x860 net/ipv4/ip_output.c:405 dst_output include/net/dst.h:459 [inline] ip_local_out+0x95/0x160 net/ipv4/ip_output.c:124 ip_queue_xmit+0x8c6/0x18e0 net/ipv4/ip_output.c:504 tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1137 tcp_write_xmit+0x663/0x4de0 net/ipv4/tcp_output.c:2341 __tcp_push_pending_frames+0xa0/0x250 net/ipv4/tcp_output.c:2513 tcp_push_pending_frames include/net/tcp.h:1722 [inline] tcp_data_snd_check net/ipv4/tcp_input.c:5050 [inline] tcp_rcv_established+0x8c7/0x18a0 net/ipv4/tcp_input.c:5497 tcp_v4_do_rcv+0x2ab/0x7d0 net/ipv4/tcp_ipv4.c:1460 sk_backlog_rcv include/net/sock.h:909 [inline] __release_sock+0x124/0x360 net/core/sock.c:2264 release_sock+0xa4/0x2a0 net/core/sock.c:2776 tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1462 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763 sock_sendmsg_nosec net/socket.c:632 [inline] sock_sendmsg+0xca/0x110 net/socket.c:642 ___sys_sendmsg+0x31c/0x890 net/socket.c:2048 __sys_sendmmsg+0x1e6/0x5f0 net/socket.c:2138 Fixes: 14afee4b6092 ("net: convert sock.sk_wmem_alloc from atomic_t to refcount_t") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 11f69bbf9307..b6a2aa1dcf56 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, * is freed by GSO engine */ if (copy_destructor) { + int delta; + swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); sum_truesize += skb->truesize; - refcount_add(sum_truesize - gso_skb->truesize, - &skb->sk->sk_wmem_alloc); + delta = sum_truesize - gso_skb->truesize; + /* In some pathological cases, delta can be negative. + * We need to either use refcount_add() or refcount_sub_and_test() + */ + if (likely(delta >= 0)) + refcount_add(delta, &skb->sk->sk_wmem_alloc); + else + WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc)); } delta = htonl(oldlen + (skb_tail_pointer(skb) - -- cgit From 0eb96bf754d7fa6635aa0b0f6650c74b8a6b1cc9 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 7 Nov 2017 15:33:43 -0800 Subject: tcp: fix tcp_fastretrans_alert warning This patch fixes the cause of an WARNING indicatng TCP has pending retransmission in Open state in tcp_fastretrans_alert(). The root cause is a bad interaction between path mtu probing, if enabled, and the RACK loss detection. Upong receiving a SACK above the sequence of the MTU probing packet, RACK could mark the probe packet lost in tcp_fastretrans_alert(), prior to calling tcp_simple_retransmit(). tcp_simple_retransmit() only enters Loss state if it newly marks the probe packet lost. If the probe packet is already identified as lost by RACK, the sender remains in Open state with some packets marked lost and retransmitted. Then the next SACK would trigger the warning. The likely scenario is that the probe packet was lost due to its size or network congestion. The actual impact of this warning is small by potentially entering fast recovery an ACK later. The simple fix is always entering recovery (Loss) state if some packet is marked lost during path MTU probing. Fixes: a0370b3f3f2c ("tcp: enable RACK loss detection to trigger recovery") Reported-by: Oleksandr Natalenko Reported-by: Alexei Starovoitov Reported-by: Roman Gushchin Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b2fc7163bd40..b6bb3cdfad09 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2615,7 +2615,6 @@ void tcp_simple_retransmit(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk); - u32 prior_lost = tp->lost_out; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -2632,7 +2631,7 @@ void tcp_simple_retransmit(struct sock *sk) tcp_clear_retrans_hints_partial(tp); - if (prior_lost == tp->lost_out) + if (!tp->lost_out) return; if (tcp_is_reno(tp)) -- cgit From 4f7116757b4bd99e4ef2636c7d957a6d63035d11 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 10 Nov 2017 11:22:39 +0100 Subject: can: ifi: Fix transmitter delay calculation The CANFD transmitter delay calculation formula was updated in the latest software drop from IFI and improves the behavior of the IFI CANFD core during bitrate switching. Use the new formula to improve stability of the CANFD operation. Signed-off-by: Marek Vasut Cc: Markus Marb Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 4d1fe8d95042..2772d05ff11c 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -670,9 +670,9 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) priv->base + IFI_CANFD_FTIME); /* Configure transmitter delay */ - tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK; - writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc, - priv->base + IFI_CANFD_TDELAY); + tdc = dbt->brp * (dbt->prop_seg + dbt->phase_seg1); + tdc &= IFI_CANFD_TDELAY_MASK; + writel(IFI_CANFD_TDELAY_EN | tdc, priv->base + IFI_CANFD_TDELAY); } static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id, -- cgit From ea0ee33988778fb73e4f45e7c73fb735787e2f32 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 10 Nov 2017 11:19:11 -0800 Subject: Revert "x86: CPU: Fix up "cpu MHz" in /proc/cpuinfo" This reverts commit 941f5f0f6ef5338814145cf2b813cf1f98873e2f. Sadly, it turns out that we really can't just do the cross-CPU IPI to all CPU's to get their proper frequencies, because it's much too expensive on systems with lots of cores. So we'll have to revert this for now, and revisit it using a smarter model (probably doing one system-wide IPI at open time, and doing all the frequency calculations in parallel). Reported-by: WANG Chao Reported-by: Ingo Molnar Cc: Rafael J Wysocki Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/aperfmperf.c | 11 ++++------- arch/x86/kernel/cpu/proc.c | 4 +--- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 236999c54edc..c60922a66385 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -22,7 +22,7 @@ obj-y += common.o obj-y += rdrand.o obj-y += match.o obj-y += bugs.o -obj-y += aperfmperf.o +obj-$(CONFIG_CPU_FREQ) += aperfmperf.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 957813e0180d..0ee83321a313 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -42,6 +42,10 @@ static void aperfmperf_snapshot_khz(void *dummy) s64 time_delta = ktime_ms_delta(now, s->time); unsigned long flags; + /* Don't bother re-computing within the cache threshold time. */ + if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) + return; + local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); @@ -70,7 +74,6 @@ static void aperfmperf_snapshot_khz(void *dummy) unsigned int arch_freq_get_on_cpu(int cpu) { - s64 time_delta; unsigned int khz; if (!cpu_khz) @@ -79,12 +82,6 @@ unsigned int arch_freq_get_on_cpu(int cpu) if (!static_cpu_has(X86_FEATURE_APERFMPERF)) return 0; - /* Don't bother re-computing within the cache threshold time. */ - time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu)); - khz = per_cpu(samples.khz, cpu); - if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS) - return khz; - smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); khz = per_cpu(samples.khz, cpu); if (khz) diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 4378a729b933..6b7e17bf0b71 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -78,10 +78,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "microcode\t: 0x%x\n", c->microcode); if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = arch_freq_get_on_cpu(cpu); + unsigned int freq = cpufreq_quick_get(cpu); - if (!freq) - freq = cpufreq_quick_get(cpu); if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", -- cgit From 085c17ff6bacccb45201098e71f95675e08f0e17 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 10 Nov 2017 20:05:24 +0000 Subject: .mailmap: Add Maciej W. Rozycki's Imagination e-mail address Following my recent transition from Imagination Technologies to the=20 reincarnated MIPS company add a .mailmap mapping for my work address, so that `scripts/get_maintainer.pl' gets it right for past commits. Signed-off-by: Maciej W. Rozycki Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 4757d361fd33..c021f29779a7 100644 --- a/.mailmap +++ b/.mailmap @@ -102,6 +102,7 @@ Leonid I Ananiev Linas Vepstas Linus Lüssing Linus Lüssing +Maciej W. Rozycki Marcin Nowakowski Mark Brown Martin Kepplinger -- cgit From b0b38a1c6684b10dd0462bef4fef038917115012 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 8 Nov 2017 10:49:56 -0500 Subject: net: dsa: return after mdb prepare phase The current code does not return after successfully preparing the MDB addition on every ports member of a multicast group. Fix this. Fixes: a1a6b7ea7f2d ("net: dsa: add cross-chip multicast support") Reported-by: Egil Hjelmeland Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/switch.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e6c06aa349a6..73746fa148f1 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -133,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (err) return err; } + + return 0; } for_each_set_bit(port, group, ds->num_ports) -- cgit From 2118df93b5e1742931da358c2b8804c46fd64490 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 8 Nov 2017 10:50:10 -0500 Subject: net: dsa: return after vlan prepare phase The current code does not return after successfully preparing the VLAN addition on every ports member of a it. Fix this. Fixes: 1ca4aa9cd4cc ("net: dsa: check VLAN capability of every switch") Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/switch.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 73746fa148f1..1e2929f4290a 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -182,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds, if (err) return err; } + + return 0; } for_each_set_bit(port, members, ds->num_ports) -- cgit From 505ee76761062a1872b024140e886b7136a6c1d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 11 Nov 2017 09:06:57 +0100 Subject: tooling/headers: Sync the tools/include/uapi/drm/i915_drm.h UAPI header Last minute upstream update to one of the UAPI headers - sync it with tooling, to address this warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 6598fb76d2c2..9816590d3ad2 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -829,6 +829,7 @@ struct drm_i915_gem_exec_fence { #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) __u32 flags; }; -- cgit From 052d41c01b3a2e3371d66de569717353af489d63 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 9 Nov 2017 16:43:13 -0800 Subject: vlan: fix a use-after-free in vlan_device_event() After refcnt reaches zero, vlan_vid_del() could free dev->vlan_info via RCU: RCU_INIT_POINTER(dev->vlan_info, NULL); call_rcu(&vlan_info->rcu, vlan_info_rcu_free); However, the pointer 'grp' still points to that memory since it is set before vlan_vid_del(): vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) goto out; grp = &vlan_info->grp; Depends on when that RCU callback is scheduled, we could trigger a use-after-free in vlan_group_for_each_dev() right following this vlan_vid_del(). Fix it by moving vlan_vid_del() before setting grp. This is also symmetric to the vlan_vid_add() we call in vlan_device_event(). Reported-by: Fengguang Wu Fixes: efc73f4bbc23 ("net: Fix memory leak - vlan_info struct") Cc: Alexander Duyck Cc: Linus Torvalds Cc: Girish Moodalbail Signed-off-by: Cong Wang Reviewed-by: Girish Moodalbail Tested-by: Fengguang Wu Signed-off-by: David S. Miller --- net/8021q/vlan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9649579b5b9f..4a72ee4e2ae9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -376,6 +376,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev->name); vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } + if (event == NETDEV_DOWN && + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + vlan_vid_del(dev, htons(ETH_P_8021Q), 0); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) @@ -423,9 +426,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, struct net_device *tmp; LIST_HEAD(close_list); - if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) - vlan_vid_del(dev, htons(ETH_P_8021Q), 0); - /* Put all VLANs for this dev in the down state too. */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; -- cgit From bebc6082da0a9f5d47a1ea2edc099bf671058bd4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Nov 2017 10:46:13 -0800 Subject: Linux 4.14 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bee2033e7d1d..ccd981892ef2 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Fearless Coyote # *DOCUMENTATION* -- cgit From ba1029c9cbc5af834467bf264e58f9d1074735fb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 12 Nov 2017 11:21:45 -0800 Subject: modpost: detect modules without a MODULE_LICENSE Partially revert commit 2fa365682943 ("kbuild: soften MODULE_LICENSE check") so that modpost detects modules that do not have a MODULE_LICENSE. Sam's commit also changed the fatal error to a warning, which I am leaving as is. This gives advance notice of when a module has no license and will taint the kernel if the module is loaded. This produces the following warnings on x86_64 allmodconfig: MODPOST 6520 modules WARNING: modpost: missing MODULE_LICENSE() in drivers/auxdisplay/img-ascii-lcd.o WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-ath79.o WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-iop.o WARNING: modpost: missing MODULE_LICENSE() in drivers/iio/accel/kxsd9-i2c.o WARNING: modpost: missing MODULE_LICENSE() in drivers/iio/adc/qcom-vadc-common.o WARNING: modpost: missing MODULE_LICENSE() in drivers/media/platform/mtk-vcodec/mtk-vcodec-common.o WARNING: modpost: missing MODULE_LICENSE() in drivers/media/platform/soc_camera/soc_scale_crop.o WARNING: modpost: missing MODULE_LICENSE() in drivers/mtd/nand/denali_pci.o WARNING: modpost: missing MODULE_LICENSE() in drivers/net/phy/cortina.o WARNING: modpost: missing MODULE_LICENSE() in drivers/pinctrl/pxa/pinctrl-pxa2xx.o WARNING: modpost: missing MODULE_LICENSE() in drivers/power/reset/zx-reboot.o WARNING: modpost: missing MODULE_LICENSE() in drivers/rpmsg/qcom_glink_native.o WARNING: modpost: missing MODULE_LICENSE() in drivers/staging/comedi/drivers/ni_atmio.o WARNING: modpost: missing MODULE_LICENSE() in net/9p/9pnet_xen.o WARNING: modpost: missing MODULE_LICENSE() in sound/soc/codecs/snd-soc-pcm512x-spi.o Signed-off-by: Randy Dunlap Cc: Sam Ravnborg Signed-off-by: Linus Torvalds --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 98314b400a95..f51cf977c65b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1963,7 +1963,7 @@ static void read_symbols(char *modname) } license = get_modinfo(info.modinfo, info.modinfo_len, "license"); - if (info.modinfo && !license && !is_vmlinux(modname)) + if (!license && !is_vmlinux(modname)) warn("modpost: missing MODULE_LICENSE() in %s\n" "see include/linux/module.h for " "more information\n", modname); -- cgit From 277642dcca765a1955d4c753a5a315ff7f2eb09d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Nov 2017 17:00:53 -0800 Subject: modules: make sysfs attribute files readable by owner only This code goes back to the historical bitkeeper tree commit 3f7b0672086 ("Module section offsets in /sys/module"), where Jonathan Corbet wanted to show people how to debug loadable modules. See https://lwn.net/Articles/88052/ from June 2004. To expose the required load address information, Jonathan added the sections subdirectory for every module in /sys/modules, and made them S_IRUGO - readable by everybody. It was a more innocent time, plus those S_IRxxx macro names are a lot more confusing than the octal numbers are, so maybe it wasn't even intentional. But here we are, thirteen years later, and I'll just change it to S_IRUSR instead. Let's see if anybody even notices. Cc: Jonathan Corbet Signed-off-by: Linus Torvalds --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index de66ec825992..fdb3a6aca363 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1516,7 +1516,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; - sattr->mattr.attr.mode = S_IRUGO; + sattr->mattr.attr.mode = S_IRUSR; *(gattr++) = &(sattr++)->mattr.attr; } *gattr = NULL; -- cgit From 516fb7f2e73dcc303fb97fc3593209fcacf2d982 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Nov 2017 18:44:23 -0800 Subject: /proc/module: use the same logic as /proc/kallsyms for address exposure The (alleged) users of the module addresses are the same: kernel profiling. So just expose the same helper and format macros, and unify the logic. Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 8 ++++++++ kernel/kallsyms.c | 8 +------- kernel/module.c | 20 ++++++++++++++++++-- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 11dd93e42580..0a777c5216b1 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,6 +14,14 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +/* How and when do we show kallsyms values? */ +extern int kallsyms_show_value(void); +#ifndef CONFIG_64BIT +# define KALLSYM_FMT "%08lx" +#else +# define KALLSYM_FMT "%016lx" +#endif + struct module; #ifdef CONFIG_KALLSYMS diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 51b49ed452e4..1e6ae66c6244 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -581,12 +581,6 @@ static void s_stop(struct seq_file *m, void *p) { } -#ifndef CONFIG_64BIT -# define KALLSYM_FMT "%08lx" -#else -# define KALLSYM_FMT "%016lx" -#endif - static int s_show(struct seq_file *m, void *p) { unsigned long value; @@ -640,7 +634,7 @@ static inline int kallsyms_for_perf(void) * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to * block even that). */ -static int kallsyms_show_value(void) +int kallsyms_show_value(void) { switch (kptr_restrict) { case 0: diff --git a/kernel/module.c b/kernel/module.c index fdb3a6aca363..0122747ba150 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4147,6 +4147,7 @@ static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; + unsigned long value; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) @@ -4162,7 +4163,8 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%pK", mod->core_layout.base); + value = m->private ? 0 : (unsigned long)mod->core_layout.base; + seq_printf(m, " 0x" KALLSYM_FMT, value); /* Taints info */ if (mod->taints) @@ -4184,9 +4186,23 @@ static const struct seq_operations modules_op = { .show = m_show }; +/* + * This also sets the "private" pointer to non-NULL if the + * kernel pointers should be hidden (so you can just test + * "m->private" to see if you should keep the values private). + * + * We use the same logic as for /proc/kallsyms. + */ static int modules_open(struct inode *inode, struct file *file) { - return seq_open(file, &modules_op); + int err = seq_open(file, &modules_op); + + if (!err) { + struct seq_file *m = file->private_data; + m->private = kallsyms_show_value() ? NULL : (void *)8ul; + } + + return 0; } static const struct file_operations proc_modules_operations = { -- cgit