From ee0a4dc9f317fb9a97f20037d219802ca8de939b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Mar 2022 17:28:38 +0100 Subject: Revert "netfilter: conntrack: tag conntracks picked up in local out hook" This was a prerequisite for the ill-fated "netfilter: nat: force port remap to prevent shadowing well-known ports". As this has been reverted, this change can be backed out too. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8731d5bcb47d..b08b70989d2c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -97,7 +97,6 @@ struct nf_conn { unsigned long status; u16 cpu; - u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) -- cgit From c993ee0f9f81caf5767a50d1faeba39a0dc82af2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2022 13:23:31 +0000 Subject: watch_queue: Fix filter limit check In watch_queue_set_filter(), there are a couple of places where we check that the filter type value does not exceed what the type_filter bitmap can hold. One place calculates the number of bits by: if (tf[i].type >= sizeof(wfilter->type_filter) * 8) which is fine, but the second does: if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) which is not. This can lead to a couple of out-of-bounds writes due to a too-large type: (1) __set_bit() on wfilter->type_filter (2) Writing more elements in wfilter->filters[] than we allocated. Fix this by just using the proper WATCH_TYPE__NR instead, which is the number of types we actually know about. The bug may cause an oops looking something like: BUG: KASAN: slab-out-of-bounds in watch_queue_set_filter+0x659/0x740 Write of size 4 at addr ffff88800d2c66bc by task watch_queue_oob/611 ... Call Trace: dump_stack_lvl+0x45/0x59 print_address_description.constprop.0+0x1f/0x150 ... kasan_report.cold+0x7f/0x11b ... watch_queue_set_filter+0x659/0x740 ... __x64_sys_ioctl+0x127/0x190 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Allocated by task 611: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0x81/0xa0 watch_queue_set_filter+0x23a/0x740 __x64_sys_ioctl+0x127/0x190 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88800d2c66a0 which belongs to the cache kmalloc-32 of size 32 The buggy address is located 28 bytes inside of 32-byte region [ffff88800d2c66a0, ffff88800d2c66c0) Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reported-by: Jann Horn Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/watch_queue.h | 3 ++- kernel/watch_queue.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index c994d1b2cdba..3b9a40ae8bdb 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -28,7 +28,8 @@ struct watch_type_filter { struct watch_filter { union { struct rcu_head rcu; - unsigned long type_filter[2]; /* Bitmask of accepted types */ + /* Bitmask of accepted types */ + DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ struct watch_type_filter filters[]; diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 9c9eb20dd2c5..427b0318e303 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -320,7 +320,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, tf[i].info_mask & WATCH_INFO_LENGTH) goto err_filter; /* Ignore any unknown types */ - if (tf[i].type >= sizeof(wfilter->type_filter) * 8) + if (tf[i].type >= WATCH_TYPE__NR) continue; nr_filter++; } @@ -336,7 +336,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, q = wfilter->filters; for (i = 0; i < filter.nr_filters; i++) { - if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) + if (tf[i].type >= WATCH_TYPE__NR) continue; q->type = tf[i].type; -- cgit From 413a4a6b0b5553f2423d210f65e98c211b99c3f8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2022 16:02:18 +0000 Subject: cachefiles: Fix volume coherency attribute A network filesystem may set coherency data on a volume cookie, and if given, cachefiles will store this in an xattr on the directory in the cache corresponding to the volume. The function that sets the xattr just stores the contents of the volume coherency buffer directly into the xattr, with nothing added; the checking function, on the other hand, has a cut'n'paste error whereby it tries to interpret the xattr contents as would be the xattr on an ordinary file (using the cachefiles_xattr struct). This results in a failure to match the coherency data because the buffer ends up being shifted by 18 bytes. Fix this by defining a structure specifically for the volume xattr and making both the setting and checking functions use it. Since the volume coherency doesn't work if used, take the opportunity to insert a reserved field for future use, set it to 0 and check that it is 0. Log mismatch through the appropriate tracepoint. Note that this only affects cifs; 9p, afs, ceph and nfs don't use the volume coherency data at the moment. Fixes: 32e150037dce ("fscache, cachefiles: Store the volume coherency data") Reported-by: Rohith Surabattula Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: Steve French cc: linux-cifs@vger.kernel.org cc: linux-cachefs@redhat.com Signed-off-by: Linus Torvalds --- fs/cachefiles/xattr.c | 23 ++++++++++++++++++++--- include/trace/events/cachefiles.h | 2 ++ 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 83f41bd0c3a9..35465109d9c4 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -28,6 +28,11 @@ struct cachefiles_xattr { static const char cachefiles_xattr_cache[] = XATTR_USER_PREFIX "CacheFiles.cache"; +struct cachefiles_vol_xattr { + __be32 reserved; /* Reserved, should be 0 */ + __u8 data[]; /* netfs volume coherency data */ +} __packed; + /* * set the state xattr on a cache file */ @@ -185,6 +190,7 @@ void cachefiles_prepare_to_write(struct fscache_cookie *cookie) */ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) { + struct cachefiles_vol_xattr *buf; unsigned int len = volume->vcookie->coherency_len; const void *p = volume->vcookie->coherency; struct dentry *dentry = volume->dentry; @@ -192,10 +198,17 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) _enter("%x,#%d", volume->vcookie->debug_id, len); + len += sizeof(*buf); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return false; + buf->reserved = cpu_to_be32(0); + memcpy(buf->data, p, len); + ret = cachefiles_inject_write_error(); if (ret == 0) ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, - p, len, 0); + buf, len, 0); if (ret < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret, cachefiles_trace_setxattr_error); @@ -209,6 +222,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) cachefiles_coherency_vol_set_ok); } + kfree(buf); _leave(" = %d", ret); return ret == 0; } @@ -218,7 +232,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) */ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) { - struct cachefiles_xattr *buf; + struct cachefiles_vol_xattr *buf; struct dentry *dentry = volume->dentry; unsigned int len = volume->vcookie->coherency_len; const void *p = volume->vcookie->coherency; @@ -228,6 +242,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) _enter(""); + len += sizeof(*buf); buf = kmalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -245,7 +260,9 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) "Failed to read xattr with error %zd", xlen); } why = cachefiles_coherency_vol_check_xattr; - } else if (memcmp(buf->data, p, len) != 0) { + } else if (buf->reserved != cpu_to_be32(0)) { + why = cachefiles_coherency_vol_check_resv; + } else if (memcmp(buf->data, p, len - sizeof(*buf)) != 0) { why = cachefiles_coherency_vol_check_cmp; } else { why = cachefiles_coherency_vol_check_ok; diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index c6f5aa74db89..2c530637e10a 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -56,6 +56,7 @@ enum cachefiles_coherency_trace { cachefiles_coherency_set_ok, cachefiles_coherency_vol_check_cmp, cachefiles_coherency_vol_check_ok, + cachefiles_coherency_vol_check_resv, cachefiles_coherency_vol_check_xattr, cachefiles_coherency_vol_set_fail, cachefiles_coherency_vol_set_ok, @@ -139,6 +140,7 @@ enum cachefiles_error_trace { EM(cachefiles_coherency_set_ok, "SET ok ") \ EM(cachefiles_coherency_vol_check_cmp, "VOL BAD cmp ") \ EM(cachefiles_coherency_vol_check_ok, "VOL OK ") \ + EM(cachefiles_coherency_vol_check_resv, "VOL BAD resv") \ EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt") \ EM(cachefiles_coherency_vol_set_fail, "VOL SET fail") \ E_(cachefiles_coherency_vol_set_ok, "VOL SET ok ") -- cgit From 8e6ed963763fe21429eabfc76c69ce2b0163a3dd Mon Sep 17 00:00:00 2001 From: Jiyong Park Date: Fri, 11 Mar 2022 11:00:16 +0900 Subject: vsock: each transport cycles only on its own sockets When iterating over sockets using vsock_for_each_connected_socket, make sure that a transport filters out sockets that don't belong to the transport. There actually was an issue caused by this; in a nested VM configuration, destroying the nested VM (which often involves the closing of /dev/vhost-vsock if there was h2g connections to the nested VM) kills not only the h2g connections, but also all existing g2h connections to the (outmost) host which are totally unrelated. Tested: Executed the following steps on Cuttlefish (Android running on a VM) [1]: (1) Enter into an `adb shell` session - to have a g2h connection inside the VM, (2) open and then close /dev/vhost-vsock by `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb session is not reset. [1] https://android.googlesource.com/device/google/cuttlefish/ Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Jiyong Park Link: https://lore.kernel.org/r/20220311020017.1509316-1-jiyong@google.com Signed-off-by: Jakub Kicinski --- drivers/vhost/vsock.c | 3 ++- include/net/af_vsock.h | 3 ++- net/vmw_vsock/af_vsock.c | 9 +++++++-- net/vmw_vsock/virtio_transport.c | 7 +++++-- net/vmw_vsock/vmci_transport.c | 5 ++++- 5 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 37f0b4274113..e6c9d41db1de 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ - vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + vsock_for_each_connected_socket(&vhost_transport.transport, + vhost_vsock_reset_orphans); /* Don't check the owner, because we are in the release path, so we * need to stop the vsock device in any case. diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index ab207677e0a8..f742e50207fb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d4e..f04abf662ec6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)) { int i; @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } spin_unlock_bh(&vsock_table_lock); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index fb3302fff627..5afc194a58bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -24,6 +24,7 @@ static struct workqueue_struct *virtio_vsock_workqueue; static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static struct virtio_transport virtio_transport; /* forward declaration */ struct virtio_vsock { struct virtio_device *vdev; @@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock, switch (le32_to_cpu(event->id)) { case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: virtio_vsock_update_guest_cid(vsock); - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); break; } } @@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev) synchronize_rcu(); /* Reset all connected sockets when the device disappear */ - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, * so we can safely call virtio_reset_device(). diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7aef34e32bdf..b17dc9745188 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; static int PROTOCOL_OVERRIDE = -1; +static struct vsock_transport vmci_transport; /* forward declaration */ + /* Helper function to convert from a VMCI error code to a VSock error code. */ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) @@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id, const struct vmci_event_data *e_data, void *client_data) { - vsock_for_each_connected_socket(vmci_transport_handle_detach); + vsock_for_each_connected_socket(&vmci_transport, + vmci_transport_handle_detach); } static void vmci_transport_recv_pkt_work(struct work_struct *work) -- cgit From 4ee06de7729d795773145692e246a06448b1eb7a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 15 Mar 2022 10:20:08 +0100 Subject: net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit() This kind of interface doesn't have a mac header. This patch fixes bpf_redirect() to a PIM interface. Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20220315092008.31423-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index b712217f7030..1ed52441972f 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_VOID: case ARPHRD_NONE: case ARPHRD_RAWIP: + case ARPHRD_PIMREG: return false; default: return true; -- cgit