From 833b45de69a6016c4b0cebe6765d526a31a81580 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 30 Sep 2019 18:48:44 +0200 Subject: kvm: x86, powerpc: do not allow clearing largepages debugfs entry The largepages debugfs entry is incremented/decremented as shadow pages are created or destroyed. Clearing it will result in an underflow, which is harmless to KVM but ugly (and could be misinterpreted by tools that use debugfs information), so make this particular statistic read-only. Cc: kvm-ppc@vger.kernel.org Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fcb46b3374c6..719fc3e15ea4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1090,6 +1090,7 @@ enum kvm_stat_kind { struct kvm_stat_data { int offset; + int mode; struct kvm *kvm; }; @@ -1097,6 +1098,7 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; + int mode; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; -- cgit From f5a1a536fa14895ccff4e94e6a5af90901ce86aa Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 1 Oct 2019 11:10:52 +1000 Subject: lib: introduce copy_struct_from_user() helper A common pattern for syscall extensions is increasing the size of a struct passed from userspace, such that the zero-value of the new fields result in the old kernel behaviour (allowing for a mix of userspace and kernel vintages to operate on one another in most cases). While this interface exists for communication in both directions, only one interface is straightforward to have reasonable semantics for (userspace passing a struct to the kernel). For kernel returns to userspace, what the correct semantics are (whether there should be an error if userspace is unaware of a new extension) is very syscall-dependent and thus probably cannot be unified between syscalls (a good example of this problem is [1]). Previously there was no common lib/ function that implemented the necessary extension-checking semantics (and different syscalls implemented them slightly differently or incompletely[2]). Future patches replace common uses of this pattern to make use of copy_struct_from_user(). Some in-kernel selftests that insure that the handling of alignment and various byte patterns are all handled identically to memchr_inv() usage. [1]: commit 1251201c0d34 ("sched/core: Fix uclamp ABI bug, clean up and robustify sched_read_attr() ABI logic and code") [2]: For instance {sched_setattr,perf_event_open,clone3}(2) all do do similar checks to copy_struct_from_user() while rt_sigprocmask(2) always rejects differently-sized struct arguments. Suggested-by: Rasmus Villemoes Signed-off-by: Aleksa Sarai Reviewed-by: Kees Cook Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20191001011055.19283-2-cyphar@cyphar.com Signed-off-by: Christian Brauner --- include/linux/bitops.h | 7 +++ include/linux/uaccess.h | 70 +++++++++++++++++++++++++ lib/strnlen_user.c | 8 +-- lib/test_user_copy.c | 136 +++++++++++++++++++++++++++++++++++++++++++++--- lib/usercopy.c | 55 ++++++++++++++++++++ 5 files changed, 263 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf074bce3eb3..c94a9ff9f082 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,6 +4,13 @@ #include #include +/* Set bits in the first 'n' bytes when loaded from memory */ +#ifdef __LITTLE_ENDIAN +# define aligned_byte_mask(n) ((1UL << 8*(n))-1) +#else +# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) +#endif + #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70bbdc38dc37..e47d0522a1f4 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -231,6 +231,76 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, #endif /* ARCH_HAS_NOCACHE_UACCESS */ +extern __must_check int check_zeroed_user(const void __user *from, size_t size); + +/** + * copy_struct_from_user: copy a struct from userspace + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @src: Source address, in userspace. + * @usize: (Alleged) size of @src struct. + * + * Copies a struct from userspace to kernel space, in a way that guarantees + * backwards-compatibility for struct syscall arguments (as long as future + * struct extensions are made such that all new fields are *appended* to the + * old struct, and zeroed-out new fields have the same meaning as the old + * struct). + * + * @ksize is just sizeof(*dst), and @usize should've been passed by userspace. + * The recommended usage is something like the following: + * + * SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize) + * { + * int err; + * struct foo karg = {}; + * + * if (usize > PAGE_SIZE) + * return -E2BIG; + * if (usize < FOO_SIZE_VER0) + * return -EINVAL; + * + * err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); + * if (err) + * return err; + * + * // ... + * } + * + * There are three cases to consider: + * * If @usize == @ksize, then it's copied verbatim. + * * If @usize < @ksize, then the userspace has passed an old struct to a + * newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize) + * are to be zero-filled. + * * If @usize > @ksize, then the userspace has passed a new struct to an + * older kernel. The trailing bytes unknown to the kernel (@usize - @ksize) + * are checked to ensure they are zeroed, otherwise -E2BIG is returned. + * + * Returns (in all cases, some data may have been copied): + * * -E2BIG: (@usize > @ksize) and there are non-zero trailing bytes in @src. + * * -EFAULT: access to userspace failed. + */ +static __always_inline __must_check int +copy_struct_from_user(void *dst, size_t ksize, const void __user *src, + size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + /* Deal with trailing bytes. */ + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + int ret = check_zeroed_user(src + size, rest); + if (ret <= 0) + return ret ?: -E2BIG; + } + /* Copy the interoperable parts of the struct. */ + if (copy_from_user(dst, src, size)) + return -EFAULT; + return 0; +} + /* * probe_kernel_read(): safely attempt to read from a location * @dst: pointer to the buffer that shall take the data diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 28ff554a1be8..6c0005d5dd5c 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -3,16 +3,10 @@ #include #include #include +#include #include -/* Set bits in the first 'n' bytes when loaded from memory */ -#ifdef __LITTLE_ENDIAN -# define aligned_byte_mask(n) ((1ul << 8*(n))-1) -#else -# define aligned_byte_mask(n) (~0xfful << (BITS_PER_LONG - 8 - 8*(n))) -#endif - /* * Do a strnlen, return length of string *with* final '\0'. * 'count' is the user-supplied count, while 'max' is the diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index 67bcd5dfd847..950ee88cd6ac 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -31,14 +31,133 @@ # define TEST_U64 #endif -#define test(condition, msg) \ -({ \ - int cond = (condition); \ - if (cond) \ - pr_warn("%s\n", msg); \ - cond; \ +#define test(condition, msg, ...) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("[%d] " msg "\n", __LINE__, ##__VA_ARGS__); \ + cond; \ }) +static bool is_zeroed(void *from, size_t size) +{ + return memchr_inv(from, 0x0, size) == NULL; +} + +static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) +{ + int ret = 0; + size_t start, end, i; + size_t zero_start = size / 4; + size_t zero_end = size - zero_start; + + /* + * We conduct a series of check_nonzero_user() tests on a block of memory + * with the following byte-pattern (trying every possible [start,end] + * pair): + * + * [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ] + * + * And we verify that check_nonzero_user() acts identically to memchr_inv(). + */ + + memset(kmem, 0x0, size); + for (i = 1; i < zero_start; i += 2) + kmem[i] = 0xff; + for (i = zero_end; i < size; i += 2) + kmem[i] = 0xff; + + ret |= test(copy_to_user(umem, kmem, size), + "legitimate copy_to_user failed"); + + for (start = 0; start <= size; start++) { + for (end = start; end <= size; end++) { + size_t len = end - start; + int retval = check_zeroed_user(umem + start, len); + int expected = is_zeroed(kmem + start, len); + + ret |= test(retval != expected, + "check_nonzero_user(=%d) != memchr_inv(=%d) mismatch (start=%zu, end=%zu)", + retval, expected, start, end); + } + } + + return ret; +} + +static int test_copy_struct_from_user(char *kmem, char __user *umem, + size_t size) +{ + int ret = 0; + char *umem_src = NULL, *expected = NULL; + size_t ksize, usize; + + umem_src = kmalloc(size, GFP_KERNEL); + if (ret |= test(umem_src == NULL, "kmalloc failed")) + goto out_free; + + expected = kmalloc(size, GFP_KERNEL); + if (ret |= test(expected == NULL, "kmalloc failed")) + goto out_free; + + /* Fill umem with a fixed byte pattern. */ + memset(umem_src, 0x3e, size); + ret |= test(copy_to_user(umem, umem_src, size), + "legitimate copy_to_user failed"); + + /* Check basic case -- (usize == ksize). */ + ksize = size; + usize = size; + + memcpy(expected, umem_src, ksize); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize == ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize == ksize) gives unexpected copy"); + + /* Old userspace case -- (usize < ksize). */ + ksize = size; + usize = size / 2; + + memcpy(expected, umem_src, usize); + memset(expected + usize, 0x0, ksize - usize); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize < ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize < ksize) gives unexpected copy"); + + /* New userspace (-E2BIG) case -- (usize > ksize). */ + ksize = size / 2; + usize = size; + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize) != -E2BIG, + "copy_struct_from_user(usize > ksize) didn't give E2BIG"); + + /* New userspace (success) case -- (usize > ksize). */ + ksize = size / 2; + usize = size; + + memcpy(expected, umem_src, ksize); + ret |= test(clear_user(umem + ksize, usize - ksize), + "legitimate clear_user failed"); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize > ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize > ksize) gives unexpected copy"); + +out_free: + kfree(expected); + kfree(umem_src); + return ret; +} + static int __init test_user_copy_init(void) { int ret = 0; @@ -106,6 +225,11 @@ static int __init test_user_copy_init(void) #endif #undef test_legit + /* Test usage of check_nonzero_user(). */ + ret |= test_check_nonzero_user(kmem, usermem, 2 * PAGE_SIZE); + /* Test usage of copy_struct_from_user(). */ + ret |= test_copy_struct_from_user(kmem, usermem, 2 * PAGE_SIZE); + /* * Invalid usage: none of these copies should succeed. */ diff --git a/lib/usercopy.c b/lib/usercopy.c index c2bfbcaeb3dc..cbb4d9ec00f2 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include /* out-of-line parts */ @@ -31,3 +32,57 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) } EXPORT_SYMBOL(_copy_to_user); #endif + +/** + * check_zeroed_user: check if a userspace buffer only contains zero bytes + * @from: Source address, in userspace. + * @size: Size of buffer. + * + * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for + * userspace addresses (and is more efficient because we don't care where the + * first non-zero byte is). + * + * Returns: + * * 0: There were non-zero bytes present in the buffer. + * * 1: The buffer was full of zero bytes. + * * -EFAULT: access to userspace failed. + */ +int check_zeroed_user(const void __user *from, size_t size) +{ + unsigned long val; + uintptr_t align = (uintptr_t) from % sizeof(unsigned long); + + if (unlikely(size == 0)) + return 1; + + from -= align; + size += align; + + if (!user_access_begin(from, size)) + return -EFAULT; + + unsafe_get_user(val, (unsigned long __user *) from, err_fault); + if (align) + val &= ~aligned_byte_mask(align); + + while (size > sizeof(unsigned long)) { + if (unlikely(val)) + goto done; + + from += sizeof(unsigned long); + size -= sizeof(unsigned long); + + unsafe_get_user(val, (unsigned long __user *) from, err_fault); + } + + if (size < sizeof(unsigned long)) + val &= aligned_byte_mask(size); + +done: + user_access_end(); + return (val == 0); +err_fault: + user_access_end(); + return -EFAULT; +} +EXPORT_SYMBOL(check_zeroed_user); -- cgit From 895b5c9f206eb7d25dc1360a8ccfc5958895eb89 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 29 Sep 2019 20:54:03 +0200 Subject: netfilter: drop bridge nf reset from nf_reset commit 174e23810cd31 ("sk_buff: drop all skb extensions on free and skb scrubbing") made napi recycle always drop skb extensions. The additional skb_ext_del() that is performed via nf_reset on napi skb recycle is not needed anymore. Most nf_reset() calls in the stack are there so queued skb won't block 'rmmod nf_conntrack' indefinitely. This removes the skb_ext_del from nf_reset, and renames it to a more fitting nf_reset_ct(). In a few selected places, add a call to skb_ext_reset to make sure that no active extensions remain. I am submitting this for "net", because we're still early in the release cycle. The patch applies to net-next too, but I think the rename causes needless divergence between those trees. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- drivers/net/ppp/pptp.c | 4 ++-- drivers/net/tun.c | 2 +- drivers/net/virtio_net.c | 2 +- drivers/net/vrf.c | 8 ++++---- drivers/net/wireless/mac80211_hwsim.c | 4 ++-- drivers/staging/octeon/ethernet-tx.c | 6 ++---- include/linux/skbuff.h | 5 +---- net/batman-adv/soft-interface.c | 2 +- net/core/skbuff.c | 2 +- net/dccp/ipv4.c | 2 +- net/ipv4/ip_input.c | 2 +- net/ipv4/ipmr.c | 4 ++-- net/ipv4/netfilter/nf_dup_ipv4.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 4 ++-- net/ipv6/ip6_input.c | 2 +- net/ipv6/netfilter/nf_dup_ipv6.c | 2 +- net/ipv6/raw.c | 2 +- net/l2tp/l2tp_core.c | 2 +- net/l2tp/l2tp_eth.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- net/openvswitch/vport-internal_dev.c | 2 +- net/packet/af_packet.c | 4 ++-- net/sctp/input.c | 2 +- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_interface.c | 2 +- net/xfrm/xfrm_output.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 31 files changed, 40 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 734de7de03f7..e1fabb3e3246 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -238,7 +238,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); - nf_reset(skb); + nf_reset_ct(skb); skb->ip_summed = CHECKSUM_NONE; ip_select_ident(net, skb, NULL); @@ -358,7 +358,7 @@ static int pptp_rcv(struct sk_buff *skb) po = lookup_chan(htons(header->call_id), iph->saddr); if (po) { skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); return sk_receive_skb(sk_pppox(po), skb, 0); } drop: diff --git a/drivers/net/tun.c b/drivers/net/tun.c index aab0be40d443..812dc3a65efb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1104,7 +1104,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) */ skb_orphan(skb); - nf_reset(skb); + nf_reset_ct(skb); if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba98e0971b84..5a635f028bdc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1585,7 +1585,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); - nf_reset(skb); + nf_reset_ct(skb); } /* If running out of space, stop queue to avoid getting packets that we diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index a4b38a980c3c..ee52bde058df 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -366,7 +366,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, struct neighbour *neigh; int ret; - nf_reset(skb); + nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -459,7 +459,7 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); + nf_reset_ct(skb); else skb = NULL; @@ -560,7 +560,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s bool is_v6gw = false; int ret = -EINVAL; - nf_reset(skb); + nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -670,7 +670,7 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); + nf_reset_ct(skb); else skb = NULL; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 635956024e88..45c73a6f09a1 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1261,8 +1261,8 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw, skb_orphan(skb); skb_dst_drop(skb); skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); /* * Get absolute mactime here so all HWs RX at the "same time", and diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index c64728fc21f2..a62057555d1b 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -349,10 +349,8 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) */ dst_release(skb_dst(skb)); skb_dst_set(skb, NULL); -#ifdef CONFIG_XFRM - secpath_reset(skb); -#endif - nf_reset(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); #ifdef CONFIG_NET_SCHED skb->tc_index = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e7d3b1a513ef..4351577b14d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4160,15 +4160,12 @@ static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} #endif /* CONFIG_SKB_EXTENSIONS */ -static inline void nf_reset(struct sk_buff *skb) +static inline void nf_reset_ct(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(skb)); skb->_nfct = 0; #endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - skb_ext_del(skb, SKB_EXT_BRIDGE_NF); -#endif } static inline void nf_reset_trace(struct sk_buff *skb) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index a1146cb10919..9cbed6f5a85a 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -436,7 +436,7 @@ void batadv_interface_rx(struct net_device *soft_iface, /* clean the netfilter state now that the batman-adv header has been * removed */ - nf_reset(skb); + nf_reset_ct(skb); if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) goto dropped; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 01d65206f4fb..529133611ea2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5120,7 +5120,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb_ext_reset(skb); - nf_reset(skb); + nf_reset_ct(skb); nf_reset_trace(skb); #ifdef CONFIG_NET_SWITCHDEV diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b685bc82f8d0..d9b4200ed12d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -871,7 +871,7 @@ lookup: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - nf_reset(skb); + nf_reset_ct(skb); return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1e2392b7c64e..c59a78a267c3 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -199,7 +199,7 @@ resubmit: kfree_skb(skb); return; } - nf_reset(skb); + nf_reset_ct(skb); } ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 313470f6bb14..716d5472c022 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1794,7 +1794,7 @@ static void ip_encap(struct net *net, struct sk_buff *skb, ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - nf_reset(skb); + nf_reset_ct(skb); } static inline int ipmr_forward_finish(struct net *net, struct sock *sk, @@ -2140,7 +2140,7 @@ int ip_mr_input(struct sk_buff *skb) mroute_sk = rcu_dereference(mrt->mroute_sk); if (mroute_sk) { - nf_reset(skb); + nf_reset_ct(skb); raw_rcv(mroute_sk, skb); return 0; } diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index af3fbf76dbd3..6cc5743c553a 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -65,7 +65,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ - nf_reset(skb); + nf_reset_ct(skb); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif /* diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 80da5a66d5d7..3183413ebc6c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -332,7 +332,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } - nf_reset(skb); + nf_reset_ct(skb); skb_push(skb, skb->data - skb_network_header(skb)); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2ee45e3755e9..bf124b1742df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1916,7 +1916,7 @@ process: if (tcp_v4_inbound_md5_hash(sk, skb)) goto discard_and_relse; - nf_reset(skb); + nf_reset_ct(skb); if (tcp_filter(sk, skb)) goto discard_and_relse; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf755156a684..e8443cc5c1ab 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1969,7 +1969,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) */ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; - nf_reset(skb); + nf_reset_ct(skb); if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); @@ -2298,7 +2298,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - nf_reset(skb); + nf_reset_ct(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_lib_checksum_complete(skb)) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index d432d0011c16..7e5df23cbe7b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -371,7 +371,7 @@ resubmit_final: /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ - nf_reset(skb); + nf_reset_ct(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index e6c9da9866b1..a0a2de30be3e 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -54,7 +54,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, return; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_reset(skb); + nf_reset_ct(skb); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif if (hooknum == NF_INET_PRE_ROUTING || diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6e1888ee4036..a77f6b7d3a7c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -215,7 +215,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) /* Not releasing hash table! */ if (clone) { - nf_reset(clone); + nf_reset_ct(clone); rawv6_rcv(sk, clone); } } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 105e5a7092e7..f82ea12bac37 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1078,7 +1078,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); - nf_reset(skb); + nf_reset_ct(skb); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index bd3f39349d40..fd5ac2788e45 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -151,7 +151,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); rcu_read_lock(); dev = rcu_dereference(spriv->dev); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 622833317dcb..0d7c887a2b75 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -193,7 +193,7 @@ pass_up: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; - nf_reset(skb); + nf_reset_ct(skb); return sk_receive_skb(sk, skb, 1); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 687e23a8b326..802f19aba7e3 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -206,7 +206,7 @@ pass_up: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; - nf_reset(skb); + nf_reset_ct(skb); return sk_receive_skb(sk, skb, 1); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 9c464d24beec..888d3068a492 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -613,7 +613,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) ret = ip_vs_confirm_conntrack(skb); if (ret == NF_ACCEPT) { - nf_reset(skb); + nf_reset_ct(skb); skb_forward_csum(skb); } return ret; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index d2437b5b2f6a..21c90d3a7ebf 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -237,7 +237,7 @@ static netdev_tx_t internal_dev_recv(struct sk_buff *skb) } skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); secpath_reset(skb); skb->pkt_type = PACKET_HOST; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e2742b006d25..82a50e850245 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1821,7 +1821,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); /* drop conntrack reference */ - nf_reset(skb); + nf_reset_ct(skb); spkt = &PACKET_SKB_CB(skb)->sa.pkt; @@ -2121,7 +2121,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); /* drop conntrack reference */ - nf_reset(skb); + nf_reset_ct(skb); spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; diff --git a/net/sctp/input.c b/net/sctp/input.c index 1008cdc44dd6..5a070fb5b278 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -201,7 +201,7 @@ int sctp_rcv(struct sk_buff *skb) if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; - nf_reset(skb); + nf_reset_ct(skb); if (sk_filter(sk, skb)) goto discard_release; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 6088bc2dc11e..9b599ed66d97 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -706,7 +706,7 @@ resume: if (err) goto drop; - nf_reset(skb); + nf_reset_ct(skb); if (decaps) { sp = skb_sec_path(skb); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 2ab4859df55a..0f5131bc3342 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -185,7 +185,7 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); nf_reset_trace(skb); if (!xnet) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9499b35feb92..b1db55b50ba1 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -502,7 +502,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) struct net *net = xs_net(skb_dst(skb)->xfrm); while (likely((err = xfrm_output_one(skb, err)) == 0)) { - nf_reset(skb); + nf_reset_ct(skb); err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); if (unlikely(err != 1)) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 21e939235b39..f2d1e573ea55 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2808,7 +2808,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) continue; } - nf_reset(skb); + nf_reset_ct(skb); skb_dst_drop(skb); skb_dst_set(skb, dst); -- cgit From 3e8db7e56082156a37b71d7334860c10fcea8025 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 1 Oct 2019 21:58:19 +0300 Subject: net: dsa: sja1105: Fix sleeping while atomic in .port_hwtstamp_set Currently this stack trace can be seen with CONFIG_DEBUG_ATOMIC_SLEEP=y: [ 41.568348] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:909 [ 41.576757] in_atomic(): 1, irqs_disabled(): 0, pid: 208, name: ptp4l [ 41.583212] INFO: lockdep is turned off. [ 41.587123] CPU: 1 PID: 208 Comm: ptp4l Not tainted 5.3.0-rc6-01445-ge950f2d4bc7f-dirty #1827 [ 41.599873] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 41.607584] [] (show_stack) from [] (dump_stack+0xd4/0x100) [ 41.614863] [] (dump_stack) from [] (___might_sleep+0x1c8/0x2b4) [ 41.622574] [] (___might_sleep) from [] (__mutex_lock+0x48/0xab8) [ 41.630368] [] (__mutex_lock) from [] (mutex_lock_nested+0x1c/0x24) [ 41.638340] [] (mutex_lock_nested) from [] (sja1105_static_config_reload+0x30/0x27c) [ 41.647779] [] (sja1105_static_config_reload) from [] (sja1105_hwtstamp_set+0x108/0x1cc) [ 41.657562] [] (sja1105_hwtstamp_set) from [] (dev_ifsioc+0x18c/0x330) [ 41.665788] [] (dev_ifsioc) from [] (dev_ioctl+0x320/0x6e8) [ 41.673064] [] (dev_ioctl) from [] (sock_ioctl+0x334/0x5e8) [ 41.680340] [] (sock_ioctl) from [] (do_vfs_ioctl+0xb0/0xa10) [ 41.687789] [] (do_vfs_ioctl) from [] (ksys_ioctl+0x34/0x58) [ 41.695151] [] (ksys_ioctl) from [] (ret_fast_syscall+0x0/0x28) [ 41.702768] Exception stack(0xe8495fa8 to 0xe8495ff0) [ 41.707796] 5fa0: beff4a8c 00000001 00000011 000089b0 beff4a8c beff4a80 [ 41.715933] 5fc0: beff4a8c 00000001 0000000c 00000036 b6fa98c8 004e19c1 00000001 00000000 [ 41.724069] 5fe0: 004dcedc beff4a6c 004c0738 b6e7af4c [ 41.729860] BUG: scheduling while atomic: ptp4l/208/0x00000002 [ 41.735682] INFO: lockdep is turned off. Enabling RX timestamping will logically disturb the fastpath (processing of meta frames). Replace bool hwts_rx_en with a bit that is checked atomically from the fastpath and temporarily unset from the sleepable context during a change of the RX timestamping process (a destructive operation anyways, requires switch reset). If found unset, the fastpath (net/dsa/tag_sja1105.c) will just drop any received meta frame and not take the meta_lock at all. Fixes: a602afd200f5 ("net: dsa: sja1105: Expose PTP timestamping ioctls to userspace") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 19 +++++++++++-------- include/linux/dsa/sja1105.h | 4 +++- net/dsa/tag_sja1105.c | 12 +++++++++++- 3 files changed, 25 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index ea2e7f4f96d0..7687ddcae159 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1897,7 +1897,9 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds, return sja1105_static_config_reload(priv); } -/* Caller must hold priv->tagger_data.meta_lock */ +/* Must be called only with priv->tagger_data.state bit + * SJA1105_HWTS_RX_EN cleared + */ static int sja1105_change_rxtstamping(struct sja1105_private *priv, bool on) { @@ -1954,16 +1956,17 @@ static int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, break; } - if (rx_on != priv->tagger_data.hwts_rx_en) { - spin_lock(&priv->tagger_data.meta_lock); + if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) { + clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); + rc = sja1105_change_rxtstamping(priv, rx_on); - spin_unlock(&priv->tagger_data.meta_lock); if (rc < 0) { dev_err(ds->dev, "Failed to change RX timestamping: %d\n", rc); - return -EFAULT; + return rc; } - priv->tagger_data.hwts_rx_en = rx_on; + if (rx_on) + set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); } if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) @@ -1982,7 +1985,7 @@ static int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, config.tx_type = HWTSTAMP_TX_ON; else config.tx_type = HWTSTAMP_TX_OFF; - if (priv->tagger_data.hwts_rx_en) + if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; else config.rx_filter = HWTSTAMP_FILTER_NONE; @@ -2031,7 +2034,7 @@ static bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, struct sja1105_private *priv = ds->priv; struct sja1105_tagger_data *data = &priv->tagger_data; - if (!data->hwts_rx_en) + if (!test_bit(SJA1105_HWTS_RX_EN, &data->state)) return false; /* We need to read the full PTP clock to reconstruct the Rx diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 79435cfc20eb..897e799dbcb9 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -31,6 +31,8 @@ #define SJA1105_META_SMAC 0x222222222222ull #define SJA1105_META_DMAC 0x0180C200000Eull +#define SJA1105_HWTS_RX_EN 0 + /* Global tagger data: each struct sja1105_port has a reference to * the structure defined in struct sja1105_private. */ @@ -42,7 +44,7 @@ struct sja1105_tagger_data { * from taggers running on multiple ports on SMP systems */ spinlock_t meta_lock; - bool hwts_rx_en; + unsigned long state; }; struct sja1105_skb_cb { diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 9c9aff3e52cf..63ef2a14c934 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -156,7 +156,11 @@ static struct sk_buff /* Step 1: A timestampable frame was received. * Buffer it until we get its meta frame. */ - if (is_link_local && sp->data->hwts_rx_en) { + if (is_link_local) { + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) + /* Do normal processing. */ + return skb; + spin_lock(&sp->data->meta_lock); /* Was this a link-local frame instead of the meta * that we were expecting? @@ -187,6 +191,12 @@ static struct sk_buff } else if (is_meta) { struct sk_buff *stampable_skb; + /* Drop the meta frame if we're not in the right state + * to process it. + */ + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) + return NULL; + spin_lock(&sp->data->meta_lock); stampable_skb = sp->data->stampable_skb; -- cgit From 4cf6c57e61fee954f7b7685de31b80ec26843d27 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 4 Oct 2019 17:05:58 +0100 Subject: net: phy: fix write to mii-ctrl1000 register When userspace writes to the MII_ADVERTISE register, we update phylib's advertising mask and trigger a renegotiation. However, writing to the MII_CTRL1000 register, which contains the gigabit advertisement, does neither. This can lead to phylib's copy of the advertisement becoming de-synced with the values in the PHY register set, which can result in incorrect negotiation resolution. Fixes: 5502b218e001 ("net: phy: use phy_resolve_aneg_linkmode in genphy_read_status") Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 5 +++++ include/linux/mii.h | 9 +++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 7c92afd36bbe..119e6f466056 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -457,6 +457,11 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) val); change_autoneg = true; break; + case MII_CTRL1000: + mii_ctrl1000_mod_linkmode_adv_t(phydev->advertising, + val); + change_autoneg = true; + break; default: /* do nothing */ break; diff --git a/include/linux/mii.h b/include/linux/mii.h index 5cd824c1c0ca..4ce8901a1af6 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -455,6 +455,15 @@ static inline void mii_lpa_mod_linkmode_lpa_t(unsigned long *lp_advertising, lp_advertising, lpa & LPA_LPACK); } +static inline void mii_ctrl1000_mod_linkmode_adv_t(unsigned long *advertising, + u32 ctrl1000) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, advertising, + ctrl1000 & ADVERTISE_1000HALF); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, advertising, + ctrl1000 & ADVERTISE_1000FULL); +} + /** * linkmode_adv_to_lcl_adv_t * @advertising:pointer to linkmode advertising -- cgit From 8d3dc3ac9dd6801c732a72ca6979698c38451b4f Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 4 Oct 2019 17:06:04 +0100 Subject: net: phy: extract link partner advertisement reading Move reading the link partner advertisement out of genphy_read_status() into its own separate function. This will allow re-use of this code by PHY drivers that are able to read the resolved status from the PHY. Tested-by: tinywrkb Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 65 +++++++++++++++++++++++++++----------------- include/linux/phy.h | 1 + 2 files changed, 41 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d347ddcac45b..9d2bbb13293e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1783,32 +1783,9 @@ done: } EXPORT_SYMBOL(genphy_update_link); -/** - * genphy_read_status - check the link status and update current link state - * @phydev: target phy_device struct - * - * Description: Check the link, then figure out the current state - * by comparing what we advertise with what the link partner - * advertises. Start by checking the gigabit possibilities, - * then move on to 10/100. - */ -int genphy_read_status(struct phy_device *phydev) +int genphy_read_lpa(struct phy_device *phydev) { - int lpa, lpagb, err, old_link = phydev->link; - - /* Update the link, but return if there was an error */ - err = genphy_update_link(phydev); - if (err) - return err; - - /* why bother the PHY if nothing can have changed */ - if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) - return 0; - - phydev->speed = SPEED_UNKNOWN; - phydev->duplex = DUPLEX_UNKNOWN; - phydev->pause = 0; - phydev->asym_pause = 0; + int lpa, lpagb; if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { if (phydev->is_gigabit_capable) { @@ -1838,6 +1815,44 @@ int genphy_read_status(struct phy_device *phydev) return lpa; mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); + } + + return 0; +} +EXPORT_SYMBOL(genphy_read_lpa); + +/** + * genphy_read_status - check the link status and update current link state + * @phydev: target phy_device struct + * + * Description: Check the link, then figure out the current state + * by comparing what we advertise with what the link partner + * advertises. Start by checking the gigabit possibilities, + * then move on to 10/100. + */ +int genphy_read_status(struct phy_device *phydev) +{ + int err, old_link = phydev->link; + + /* Update the link, but return if there was an error */ + err = genphy_update_link(phydev); + if (err) + return err; + + /* why bother the PHY if nothing can have changed */ + if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) + return 0; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + err = genphy_read_lpa(phydev); + if (err < 0) + return err; + + if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { phy_resolve_aneg_linkmode(phydev); } else if (phydev->autoneg == AUTONEG_DISABLE) { int bmcr = phy_read(phydev, MII_BMCR); diff --git a/include/linux/phy.h b/include/linux/phy.h index a7ecbe0e55aa..7abee820d05c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1076,6 +1076,7 @@ int genphy_config_eee_advert(struct phy_device *phydev); int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); +int genphy_read_lpa(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); -- cgit From 2d880b8709c013d47472f85a9d42ea1aca3bce47 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 4 Oct 2019 17:06:09 +0100 Subject: net: phy: extract pause mode Extract the update of phylib's software pause mode state from genphy_read_status(), so that we can re-use this functionality with PHYs that have alternative ways to read the negotiation results. Tested-by: tinywrkb Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 20 +++++++++++++------- include/linux/phy.h | 1 + 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 369903d9b6ec..9412669b579c 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -283,6 +283,18 @@ void of_set_phy_eee_broken(struct phy_device *phydev) phydev->eee_broken_modes = broken; } +void phy_resolve_aneg_pause(struct phy_device *phydev) +{ + if (phydev->duplex == DUPLEX_FULL) { + phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->lp_advertising); + phydev->asym_pause = linkmode_test_bit( + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->lp_advertising); + } +} +EXPORT_SYMBOL_GPL(phy_resolve_aneg_pause); + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct @@ -305,13 +317,7 @@ void phy_resolve_aneg_linkmode(struct phy_device *phydev) break; } - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->lp_advertising); - phydev->asym_pause = linkmode_test_bit( - ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->lp_advertising); - } + phy_resolve_aneg_pause(phydev); } EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode); diff --git a/include/linux/phy.h b/include/linux/phy.h index 7abee820d05c..9a0e981df502 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -678,6 +678,7 @@ static inline bool phy_is_started(struct phy_device *phydev) return phydev->state >= PHY_UP; } +void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); /** -- cgit