From 19d8f1ad12fd746e60707a58d954980013c7a35a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 4 Sep 2018 21:53:52 +0200 Subject: if_link: add IFLA_TARGET_NETNSID alias This adds IFLA_TARGET_NETNSID as an alias for IFLA_IF_NETNSID for RTM_*LINK requests. The new name is clearer and also aligns with the newly introduced IFA_TARGET_NETNSID propert for RTM_*ADDR requests. Signed-off-by: Christian Brauner Suggested-by: Nicolas Dichtel Cc: Jiri Benc Signed-off-by: David S. Miller --- tools/include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index cf01b6824244..1c73d63068b1 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -161,6 +161,7 @@ enum { IFLA_EVENT, IFLA_NEW_NETNSID, IFLA_IF_NETNSID, + IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, IFLA_NEW_IFINDEX, -- cgit From 52b7b7843d9523ebc3c60c51c7afc4a45cc10aad Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 5 Sep 2018 16:58:03 -0700 Subject: tools/bpf: sync kernel uapi header if_link.h to tools Among others, this header will be used later for bpftool net support. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/if_link.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index cf01b6824244..43391e2d1153 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -164,6 +164,8 @@ enum { IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, IFLA_NEW_IFINDEX, + IFLA_MIN_MTU, + IFLA_MAX_MTU, __IFLA_MAX }; @@ -334,6 +336,7 @@ enum { IFLA_BRPORT_GROUP_FWD_MASK, IFLA_BRPORT_NEIGH_SUPPRESS, IFLA_BRPORT_ISOLATED, + IFLA_BRPORT_BACKUP_PORT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -459,6 +462,16 @@ enum { #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) +/* XFRM section */ +enum { + IFLA_XFRM_UNSPEC, + IFLA_XFRM_LINK, + IFLA_XFRM_IF_ID, + __IFLA_XFRM_MAX +}; + +#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) + enum macsec_validation_type { MACSEC_VALIDATE_DISABLED = 0, MACSEC_VALIDATE_CHECK = 1, @@ -920,6 +933,7 @@ enum { XDP_ATTACHED_DRV, XDP_ATTACHED_SKB, XDP_ATTACHED_HW, + XDP_ATTACHED_MULTI, }; enum { @@ -928,6 +942,9 @@ enum { IFLA_XDP_ATTACHED, IFLA_XDP_FLAGS, IFLA_XDP_PROG_ID, + IFLA_XDP_DRV_PROG_ID, + IFLA_XDP_SKB_PROG_ID, + IFLA_XDP_HW_PROG_ID, __IFLA_XDP_MAX, }; -- cgit From 0ee03d936cbb300309ed6154ac1cc12b63e9785f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2018 10:57:13 -0300 Subject: tools headers uapi: Update tools's copy of linux/perf_event.h To get the changes in: 09121255c784 ("perf/UAPI: Clearly mark __PERF_SAMPLE_CALLCHAIN_EARLY as internal use") This cures the following warning during perf's build: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Peter Zijlstra Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-2vvwh2o19orn56di0ksrtgzr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index eeb787b1c53c..f35eb72739c0 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -144,7 +144,7 @@ enum perf_event_sample_format { PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ - __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, + __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; /* -- cgit From 0210c156d7fd330bce1c2c842bee9d27f1c5dfeb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2018 11:18:58 -0300 Subject: tools headers uapi: Update tools's copies of kvm headers To get the changes in: a449938297e5 ("KVM: s390: Add huge page enablement control") 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE") be26b3a73413 ("arm64: KVM: export the capability to set guest SError syndrome") b7b27facc7b5 ("arm/arm64: KVM: Add KVM_GET/SET_VCPU_EVENTS") b0960b9569db ("KVM: arm: Add 32bit get/set events support") a3da7b4a3be5 ("KVM: s390: add etoken support for guests") This makes 'perf trace' automagically get aware of these new ioctls: $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > /tmp/after $ diff -u /tmp/before /tmp/after --- /tmp/before 2018-09-11 11:18:29.173207586 -0300 +++ /tmp/after 2018-09-11 11:18:38.488200446 -0300 @@ -84,6 +84,8 @@ [0xbb] = "MEMORY_ENCRYPT_REG_REGION", [0xbc] = "MEMORY_ENCRYPT_UNREG_REGION", [0xbd] = "HYPERV_EVENTFD", + [0xbe] = "GET_NESTED_STATE", + [0xbf] = "SET_NESTED_STATE", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "G And cures the following warning during perf's build: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Christian Borntraeger Cc: Cornelia Huck Cc: David Ahern Cc: David Hildenbrand Cc: Dongjiu Geng Cc: Eduardo Habkost Cc: James Morse Cc: Janosch Frank Cc: Jim Mattson Cc: Jiri Olsa Cc: Marc Zyngier Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-2vvwh2o19orn56di0ksrtgzr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 13 ++++++++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 13 ++++++++++++ tools/arch/s390/include/uapi/asm/kvm.h | 5 ++++- tools/arch/x86/include/uapi/asm/kvm.h | 37 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/kvm.h | 6 ++++++ 5 files changed, 73 insertions(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 16e006f708ca..4602464ebdfb 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -125,6 +126,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 4e76630dd655..97c3478ee6e7 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -154,6 +155,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 4cdaa55fabfe..9a50f02b9894 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -4,7 +4,7 @@ /* * KVM s390 specific structures and definitions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2018 * * Author(s): Carsten Otte * Christian Borntraeger @@ -225,6 +225,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) +#define KVM_SYNC_ETOKEN (1UL << 11) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -258,6 +259,8 @@ struct kvm_sync_regs { struct { __u64 reserved1[2]; __u64 gscb[4]; + __u64 etoken; + __u64 etoken_extension; }; }; }; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index c535c2fdea13..86299efa804a 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -378,4 +378,41 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 + +#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_SMM_VMXON 0x00000002 + +struct kvm_vmx_nested_state { + __u64 vmxon_pa; + __u64 vmcs_pa; + + struct { + __u16 flags; + } smm; +}; + +/* for KVM_CAP_NESTED_STATE */ +struct kvm_nested_state { + /* KVM_STATE_* flags */ + __u16 flags; + + /* 0 for VMX, 1 for SVM. */ + __u16 format; + + /* 128 for SVM, 128 + VMCS size for VMX. */ + __u32 size; + + union { + /* VMXON, VMCS */ + struct kvm_vmx_nested_state vmx; + + /* Pad the header to 128 bytes. */ + __u8 pad[120]; + }; + + __u8 data[0]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b6270a3b38e9..07548de5c988 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -949,6 +949,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 #define KVM_CAP_HYPERV_TLBFLUSH 155 +#define KVM_CAP_S390_HPAGE_1M 156 +#define KVM_CAP_NESTED_STATE 157 +#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #ifdef KVM_CAP_IRQ_ROUTING @@ -1391,6 +1394,9 @@ struct kvm_enc_region { /* Available with KVM_CAP_HYPERV_EVENTFD */ #define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) +/* Available with KVM_CAP_NESTED_STATE */ +#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) +#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit From 7f28785c41f4d5635e69c183b3de8ea19093ccef Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2018 13:12:40 -0300 Subject: tools headers uapi: Update tools's copy of linux/vhost.h To get the changes in: c48300c92ad9 ("vhost: fix VHOST_GET_BACKEND_FEATURES ioctl request definition") This makes 'perf trace' and other tools in the future using its beautifiers in a libbeauty.so library be able to translate these new ioctl to strings: $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > /tmp/after $ diff -u /tmp/before /tmp/after --- /tmp/before 2018-09-11 13:10:57.923038244 -0300 +++ /tmp/after 2018-09-11 13:11:20.329012685 -0300 @@ -15,6 +15,7 @@ [0x22] = "SET_VRING_ERR", [0x23] = "SET_VRING_BUSYLOOP_TIMEOUT", [0x24] = "GET_VRING_BUSYLOOP_TIMEOUT", + [0x25] = "SET_BACKEND_FEATURES", [0x30] = "NET_SET_BACKEND", [0x40] = "SCSI_SET_ENDPOINT", [0x41] = "SCSI_CLEAR_ENDPOINT", @@ -27,4 +28,5 @@ static const char *vhost_virtio_ioctl_read_cmds[] = { [0x00] = "GET_FEATURES", [0x12] = "GET_VRING_BASE", + [0x26] = "GET_BACKEND_FEATURES", }; $ We'll also use this to be able to express syscall filters using symbolic these symbolic names, something like: # perf trace --all-cpus -e ioctl(cmd=*GET_FEATURES) This silences the following warning during perf's build: Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Cc: Adrian Hunter Cc: David Ahern Cc: David S. Miller Cc: Gleb Fotengauer-Malinovskiy Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-35x71oei2hdui9u0tarpimbq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index c51f8e5cc608..84c3de89696a 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -65,6 +65,7 @@ struct vhost_iotlb_msg { }; #define VHOST_IOTLB_MSG 0x1 +#define VHOST_IOTLB_MSG_V2 0x2 struct vhost_msg { int type; @@ -74,6 +75,15 @@ struct vhost_msg { }; }; +struct vhost_msg_v2 { + __u32 type; + __u32 reserved; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + struct vhost_memory_region { __u64 guest_phys_addr; __u64 memory_size; /* bytes */ @@ -160,6 +170,14 @@ struct vhost_memory { #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ struct vhost_vring_state) +/* Set or get vhost backend capability */ + +/* Use message type V2 */ +#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 + +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) + /* VHOST_NET specific defines */ /* Attach virtio net ring to a raw socket, or tap device. -- cgit From 5db48a8d01319620d390bf6d9da5410be14f98e3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2018 14:10:52 -0300 Subject: tools headers uapi: Update tools's copy of linux/if_link.h To get the changes in: 3e7a50ceb11e ("net: report min and max mtu network device settings") 2756f68c3149 ("net: bridge: add support for backup port") a25717d2b604 ("xdp: support simultaneous driver and hw XDP attachment") 4f91da26c811 ("xdp: add per mode attributes for attached programs") f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Silencing this libbpf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h' Cc: Adrian Hunter Cc: Daniel Borkmann Cc: David Ahern Cc: David S. Miller Cc: Jakub Kicinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Nikolay Aleksandrov Cc: Steffen Klassert Cc: Stephen Hemminger Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-xd9ztioa894zemv8ag8kg64u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/if_link.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index cf01b6824244..43391e2d1153 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -164,6 +164,8 @@ enum { IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, IFLA_NEW_IFINDEX, + IFLA_MIN_MTU, + IFLA_MAX_MTU, __IFLA_MAX }; @@ -334,6 +336,7 @@ enum { IFLA_BRPORT_GROUP_FWD_MASK, IFLA_BRPORT_NEIGH_SUPPRESS, IFLA_BRPORT_ISOLATED, + IFLA_BRPORT_BACKUP_PORT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -459,6 +462,16 @@ enum { #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) +/* XFRM section */ +enum { + IFLA_XFRM_UNSPEC, + IFLA_XFRM_LINK, + IFLA_XFRM_IF_ID, + __IFLA_XFRM_MAX +}; + +#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) + enum macsec_validation_type { MACSEC_VALIDATE_DISABLED = 0, MACSEC_VALIDATE_CHECK = 1, @@ -920,6 +933,7 @@ enum { XDP_ATTACHED_DRV, XDP_ATTACHED_SKB, XDP_ATTACHED_HW, + XDP_ATTACHED_MULTI, }; enum { @@ -928,6 +942,9 @@ enum { IFLA_XDP_ATTACHED, IFLA_XDP_FLAGS, IFLA_XDP_PROG_ID, + IFLA_XDP_DRV_PROG_ID, + IFLA_XDP_SKB_PROG_ID, + IFLA_XDP_HW_PROG_ID, __IFLA_XDP_MAX, }; -- cgit From 52d0d404d39dd9eac71a181615d6ca15e23d8e38 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 12 Sep 2018 10:04:21 +0800 Subject: geneve: add ttl inherit support Similar with commit 72f6d71e491e6 ("vxlan: add ttl inherit support"), currently ttl == 0 means "use whatever default value" on geneve instead of inherit inner ttl. To respect compatibility with old behavior, let's add a new IFLA_GENEVE_TTL_INHERIT for geneve ttl inherit support. Reported-by: Jianlin Shi Suggested-by: Jiri Benc Signed-off-by: Hangbin Liu Reviewed-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/geneve.c | 41 ++++++++++++++++++++++++++++++-------- include/uapi/linux/if_link.h | 1 + tools/include/uapi/linux/if_link.h | 1 + 3 files changed, 35 insertions(+), 8 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6acb6b5718b9..6625fabe2c88 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -69,6 +69,7 @@ struct geneve_dev { struct gro_cells gro_cells; bool collect_md; bool use_udp6_rx_checksums; + bool ttl_inherit; }; struct geneve_sock { @@ -843,7 +844,11 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = key->ttl; } else { tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb); - ttl = key->ttl ? : ip4_dst_hoplimit(&rt->dst); + if (geneve->ttl_inherit) + ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); + else + ttl = key->ttl; + ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); } df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -889,7 +894,11 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, } else { prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), ip_hdr(skb), skb); - ttl = key->ttl ? : ip6_dst_hoplimit(dst); + if (geneve->ttl_inherit) + ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); + else + ttl = key->ttl; + ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr)); if (unlikely(err)) @@ -1091,6 +1100,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, + [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1170,7 +1180,8 @@ static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, static int geneve_configure(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack, const struct ip_tunnel_info *info, - bool metadata, bool ipv6_rx_csum) + bool metadata, bool ipv6_rx_csum, + bool ttl_inherit) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *t, *geneve = netdev_priv(dev); @@ -1219,6 +1230,7 @@ static int geneve_configure(struct net *net, struct net_device *dev, geneve->info = *info; geneve->collect_md = metadata; geneve->use_udp6_rx_checksums = ipv6_rx_csum; + geneve->ttl_inherit = ttl_inherit; err = register_netdevice(dev); if (err) @@ -1237,7 +1249,8 @@ static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack, struct ip_tunnel_info *info, bool *metadata, - bool *use_udp6_rx_checksums, bool changelink) + bool *use_udp6_rx_checksums, bool *ttl_inherit, + bool changelink) { int attrtype; @@ -1315,6 +1328,9 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_GENEVE_TTL]) info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); + if (data[IFLA_GENEVE_TTL_INHERIT]) + *ttl_inherit = true; + if (data[IFLA_GENEVE_TOS]) info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); @@ -1438,17 +1454,18 @@ static int geneve_newlink(struct net *net, struct net_device *dev, { bool use_udp6_rx_checksums = false; struct ip_tunnel_info info; + bool ttl_inherit = false; bool metadata = false; int err; init_tnl_info(&info, GENEVE_UDP_PORT); err = geneve_nl2info(tb, data, extack, &info, &metadata, - &use_udp6_rx_checksums, false); + &use_udp6_rx_checksums, &ttl_inherit, false); if (err) return err; err = geneve_configure(net, dev, extack, &info, metadata, - use_udp6_rx_checksums); + use_udp6_rx_checksums, ttl_inherit); if (err) return err; @@ -1511,6 +1528,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_info info; bool metadata; bool use_udp6_rx_checksums; + bool ttl_inherit; int err; /* If the geneve device is configured for metadata (or externally @@ -1523,8 +1541,9 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], memcpy(&info, &geneve->info, sizeof(info)); metadata = geneve->collect_md; use_udp6_rx_checksums = geneve->use_udp6_rx_checksums; + ttl_inherit = geneve->ttl_inherit; err = geneve_nl2info(tb, data, extack, &info, &metadata, - &use_udp6_rx_checksums, true); + &use_udp6_rx_checksums, &ttl_inherit, true); if (err) return err; @@ -1537,6 +1556,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], geneve->info = info; geneve->collect_md = metadata; geneve->use_udp6_rx_checksums = use_udp6_rx_checksums; + geneve->ttl_inherit = ttl_inherit; geneve_unquiesce(geneve, gs4, gs6); return 0; @@ -1562,6 +1582,7 @@ static size_t geneve_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ + nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 0; } @@ -1569,6 +1590,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = &geneve->info; + bool ttl_inherit = geneve->ttl_inherit; bool metadata = geneve->collect_md; __u8 tmp_vni[3]; __u32 vni; @@ -1614,6 +1636,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; #endif + if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1650,7 +1675,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, return dev; init_tnl_info(&info, dst_port); - err = geneve_configure(net, dev, NULL, &info, true, true); + err = geneve_configure(net, dev, NULL, &info, true, true, false); if (err) { free_netdev(dev); return ERR_PTR(err); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 29d49b989acd..58faab897201 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -555,6 +555,7 @@ enum { IFLA_GENEVE_UDP_ZERO_CSUM6_TX, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, IFLA_GENEVE_LABEL, + IFLA_GENEVE_TTL_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 1c73d63068b1..141cbfdc5865 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -542,6 +542,7 @@ enum { IFLA_GENEVE_UDP_ZERO_CSUM6_TX, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, IFLA_GENEVE_LABEL, + IFLA_GENEVE_TTL_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit From 2f965e3fcd4b4159a5ea832d4b5e9ff2550fa571 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 14 Sep 2018 07:46:19 -0700 Subject: bpf: sync bpf.h uapi with tools/ This patch syncs tools/include/uapi/linux/bpf.h with the flow dissector definitions from include/uapi/linux/bpf.h Signed-off-by: Petar Penkov Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 66917a4eba27..aa5ccd2385ed 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -152,6 +152,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_SEG6LOCAL, BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, }; enum bpf_attach_type { @@ -172,6 +173,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP4_SENDMSG, BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, __MAX_BPF_ATTACH_TYPE }; @@ -2333,6 +2335,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; + struct bpf_flow_keys *flow_keys; }; struct bpf_tunnel_key { @@ -2778,4 +2781,27 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +struct bpf_flow_keys { + __u16 nhoff; + __u16 thoff; + __u16 addr_proto; /* ETH_P_* of valid addrs */ + __u8 is_frag; + __u8 is_first_frag; + __u8 is_encap; + __u8 ip_proto; + __be16 n_proto; + __be16 sport; + __be16 dport; + union { + struct { + __be32 ipv4_src; + __be32 ipv4_dst; + }; + struct { + __u32 ipv6_src[4]; /* in6_addr; network order */ + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + }; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit From 25025e0aab2f91a93a1557b299a79814559b4dc2 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Sep 2018 14:45:48 +0000 Subject: bpf: sync include/uapi/linux/bpf.h to tools/include/uapi/linux/bpf.h The sync is required due to the appearance of a new map type: BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, which implements per-cpu cgroup local storage. Signed-off-by: Roman Gushchin Acked-by: Song Liu Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index aa5ccd2385ed..e2070d819e04 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -127,6 +127,7 @@ enum bpf_map_type { BPF_MAP_TYPE_SOCKHASH, BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, }; enum bpf_prog_type { -- cgit From 6acc9b432e6714d72d7d77ec7c27f6f8358d0c71 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:36 -0700 Subject: bpf: Add helper to retrieve socket in BPF This patch adds new BPF helper functions, bpf_sk_lookup_tcp() and bpf_sk_lookup_udp() which allows BPF programs to find out if there is a socket listening on this host, and returns a socket pointer which the BPF program can then access to determine, for instance, whether to forward or drop traffic. bpf_sk_lookup_xxx() may take a reference on the socket, so when a BPF program makes use of this function, it must subsequently pass the returned pointer into the newly added sk_release() to return the reference. By way of example, the following pseudocode would filter inbound connections at XDP if there is no corresponding service listening for the traffic: struct bpf_sock_tuple tuple; struct bpf_sock_ops *sk; populate_tuple(ctx, &tuple); // Extract the 5tuple from the packet sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof tuple, netns, 0); if (!sk) { // Couldn't find a socket listening for this traffic. Drop. return TC_ACT_SHOT; } bpf_sk_release(sk, 0); return TC_ACT_OK; Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 93 +++++++++++++++++- kernel/bpf/verifier.c | 8 +- net/core/filter.c | 151 ++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 93 +++++++++++++++++- tools/testing/selftests/bpf/bpf_helpers.h | 12 +++ 5 files changed, 354 insertions(+), 3 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e2070d819e04..f9187b41dff6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2144,6 +2144,77 @@ union bpf_attr { * request in the skb. * Return * 0 on success, or a negative error in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * int bpf_sk_release(struct bpf_sock *sk) + * Description + * Release the reference held by *sock*. *sock* must be a non-NULL + * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2229,7 +2300,10 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2399,6 +2473,23 @@ struct bpf_sock { */ }; +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cd0d8bc00bd1..73c81bef6ae8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -153,6 +153,12 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type * passes through a NULL-check conditional. For the branch wherein the state is * changed to CONST_IMM, the verifier releases the reference. + * + * For each helper function that allocates a reference, such as + * bpf_sk_lookup_tcp(), there is a corresponding release function, such as + * bpf_sk_release(). When a reference type passes into the release function, + * the verifier also releases the reference. If any unchecked or unreleased + * reference remains at the end of the program, the verifier rejects it. */ /* verifier_state + insn_idx are pushed to stack when branch is encountered */ @@ -300,7 +306,7 @@ static bool arg_type_is_refcounted(enum bpf_arg_type type) */ static bool is_release_function(enum bpf_func_id func_id) { - return false; + return func_id == BPF_FUNC_sk_release; } /* string representation of 'enum bpf_reg_type' */ diff --git a/net/core/filter.c b/net/core/filter.c index b2cb186252e4..591c698bc517 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -58,13 +58,17 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -4813,6 +4817,141 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { }; #endif /* CONFIG_IPV6_SEG6_BPF */ +struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, + struct sk_buff *skb, u8 family, u8 proto) +{ + int dif = skb->dev->ifindex; + bool refcounted = false; + struct sock *sk = NULL; + + if (family == AF_INET) { + __be32 src4 = tuple->ipv4.saddr; + __be32 dst4 = tuple->ipv4.daddr; + int sdif = inet_sdif(skb); + + if (proto == IPPROTO_TCP) + sk = __inet_lookup(net, &tcp_hashinfo, skb, 0, + src4, tuple->ipv4.sport, + dst4, tuple->ipv4.dport, + dif, sdif, &refcounted); + else + sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, + dst4, tuple->ipv4.dport, + dif, sdif, &udp_table, skb); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; + struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; + int sdif = inet6_sdif(skb); + + if (proto == IPPROTO_TCP) + sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0, + src6, tuple->ipv6.sport, + dst6, tuple->ipv6.dport, + dif, sdif, &refcounted); + else + sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport, + dst6, tuple->ipv6.dport, + dif, sdif, &udp_table, skb); +#endif + } + + if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + sk = NULL; + } + return sk; +} + +/* bpf_sk_lookup performs the core lookup for different types of sockets, + * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. + * Returns the socket as an 'unsigned long' to simplify the casting in the + * callers to satisfy BPF_CALL declarations. + */ +static unsigned long +bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) +{ + struct net *caller_net; + struct sock *sk = NULL; + u8 family = AF_UNSPEC; + struct net *net; + + family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; + if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) + goto out; + + if (skb->dev) + caller_net = dev_net(skb->dev); + else + caller_net = sock_net(skb->sk); + if (netns_id) { + net = get_net_ns_by_id(caller_net, netns_id); + if (unlikely(!net)) + goto out; + sk = sk_lookup(net, tuple, skb, family, proto); + put_net(net); + } else { + net = caller_net; + sk = sk_lookup(net, tuple, skb, family, proto); + } + + if (sk) + sk = sk_to_full_sk(sk); +out: + return (unsigned long) sk; +} + +BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { + .func = bpf_sk_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { + .func = bpf_sk_lookup_udp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_1(bpf_sk_release, struct sock *, sk) +{ + if (!sock_flag(sk, SOCK_RCU_FREE)) + sock_gen_put(sk); + return 0; +} + +static const struct bpf_func_proto bpf_sk_release_proto = { + .func = bpf_sk_release, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCKET, +}; + bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || @@ -5019,6 +5158,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_ancestor_cgroup_id: return &bpf_skb_ancestor_cgroup_id_proto; #endif + case BPF_FUNC_sk_lookup_tcp: + return &bpf_sk_lookup_tcp_proto; + case BPF_FUNC_sk_lookup_udp: + return &bpf_sk_lookup_udp_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; default: return bpf_base_func_proto(func_id); } @@ -5119,6 +5264,12 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_hash_proto; case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; + case BPF_FUNC_sk_lookup_tcp: + return &bpf_sk_lookup_tcp_proto; + case BPF_FUNC_sk_lookup_udp: + return &bpf_sk_lookup_udp_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; default: return bpf_base_func_proto(func_id); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e2070d819e04..f9187b41dff6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2144,6 +2144,77 @@ union bpf_attr { * request in the skb. * Return * 0 on success, or a negative error in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * int bpf_sk_release(struct bpf_sock *sk) + * Description + * Release the reference held by *sock*. *sock* must be a non-NULL + * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2229,7 +2300,10 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2399,6 +2473,23 @@ struct bpf_sock { */ }; +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index e4be7730222d..1d407b3494f9 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -143,6 +143,18 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = (void *) BPF_FUNC_skb_cgroup_id; static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; +static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned int netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_tcp; +static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned int netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_udp; +static int (*bpf_sk_release)(struct bpf_sock *sk) = + (void *) BPF_FUNC_sk_release; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit From 25fe15e54fe5e15b4963fe101f7cd8bad4f11393 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Oct 2018 12:09:14 -0300 Subject: tools headers uapi: Sync kvm.h copy To pick up the changes introduced in: 6fbbde9a1969 ("KVM: x86: Control guest reads of MSR_PLATFORM_INFO") That is not yet used in tools such as 'perf trace'. The type of the change in this file, a simple integer parameter to the KVM_CHECK_EXTENSION ioctl should be easier to implement tho, adding to the libbeauty TODO list. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: David Ahern Cc: Drew Schmitt Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-67h1bio5bihi1q6dy7hgwwx8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 07548de5c988..251be353f950 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 #ifdef KVM_CAP_IRQ_ROUTING -- cgit From 421f4292f46e871cdcf8bdc3e6fdfcefe2e81a9d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Oct 2018 15:59:36 +0200 Subject: bpf, tls: add tls header to tools infrastructure Andrey reported a build error for the BPF kselftest suite when compiled on a machine which does not have tls related header bits installed natively: test_sockmap.c:120:23: fatal error: linux/tls.h: No such file or directory #include ^ compilation terminated. Fix it by adding the header to the tools include infrastructure and add definitions such as SOL_TLS that could potentially be missing. Fixes: e9dd904708c4 ("bpf: add tls support for testing in test_sockmap") Reported-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/tls.h | 78 ++++++++++++++++++++++++++++++ tools/testing/selftests/bpf/test_sockmap.c | 13 +++-- 2 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 tools/include/uapi/linux/tls.h (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/tls.h b/tools/include/uapi/linux/tls.h new file mode 100644 index 000000000000..ff02287495ac --- /dev/null +++ b/tools/include/uapi/linux/tls.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UAPI_LINUX_TLS_H +#define _UAPI_LINUX_TLS_H + +#include + +/* TLS socket options */ +#define TLS_TX 1 /* Set transmit parameters */ +#define TLS_RX 2 /* Set receive parameters */ + +/* Supported versions */ +#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) +#define TLS_VERSION_MAJOR(ver) (((ver) >> 8) & 0xFF) + +#define TLS_VERSION_NUMBER(id) ((((id##_VERSION_MAJOR) & 0xFF) << 8) | \ + ((id##_VERSION_MINOR) & 0xFF)) + +#define TLS_1_2_VERSION_MAJOR 0x3 +#define TLS_1_2_VERSION_MINOR 0x3 +#define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) + +/* Supported ciphers */ +#define TLS_CIPHER_AES_GCM_128 51 +#define TLS_CIPHER_AES_GCM_128_IV_SIZE 8 +#define TLS_CIPHER_AES_GCM_128_KEY_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_SALT_SIZE 4 +#define TLS_CIPHER_AES_GCM_128_TAG_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 + +#define TLS_SET_RECORD_TYPE 1 +#define TLS_GET_RECORD_TYPE 2 + +struct tls_crypto_info { + __u16 version; + __u16 cipher_type; +}; + +struct tls12_crypto_info_aes_gcm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE]; +}; + +#endif /* _UAPI_LINUX_TLS_H */ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 10a5fa83c75a..7cb69ce6dfa2 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,13 @@ int running; static void running_handler(int a); +#ifndef TCP_ULP +# define TCP_ULP 31 +#endif +#ifndef SOL_TLS +# define SOL_TLS 282 +#endif + /* randomly selected ports for testing on lo */ #define S1_PORT 10000 #define S2_PORT 10001 @@ -114,11 +122,6 @@ static void usage(char *argv[]) printf("\n"); } -#define TCP_ULP 31 -#define TLS_TX 1 -#define TLS_RX 2 -#include - char *sock_to_string(int s) { if (s == c1) -- cgit From c939989d74e27f44229a51d8fe96b5c297965bfa Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Oct 2018 18:50:10 +0200 Subject: tools/headers: update kvm.h Pick up the latest kvm.h definitions. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/include/uapi/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 07548de5c988..2875ce85b322 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -719,6 +719,7 @@ struct kvm_ppc_one_seg_page_size { #define KVM_PPC_PAGE_SIZES_REAL 0x00000001 #define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 struct kvm_ppc_smmu_info { __u64 flags; @@ -952,6 +953,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_PPC_NESTED_HV 160 +#define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #ifdef KVM_CAP_IRQ_ROUTING -- cgit From b55cbc8d9b44aaee94f19e995a5f241d453763ee Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 17 Oct 2018 16:24:48 +0200 Subject: bpf: fix doc of bpf_skb_adjust_room() in uapi len_diff is signed. Fixes: fa15601ab31e ("bpf: add documentation for eBPF helpers (33-41)") CC: Quentin Monnet Signed-off-by: Nicolas Dichtel Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f9187b41dff6..5e46f6732781 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1433,7 +1433,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f9187b41dff6..5e46f6732781 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1433,7 +1433,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. -- cgit From da4e1b15f67613eca7ae998abd996cfee7cff1ba Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:36 +0200 Subject: Sync uapi/bpf.h to tools/include Sync both files. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5e46f6732781..a2fb333290dc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { @@ -128,6 +129,8 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -462,6 +465,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -2303,7 +2328,10 @@ union bpf_attr { FN(skb_ancestor_cgroup_id), \ FN(sk_lookup_tcp), \ FN(sk_lookup_udp), \ - FN(sk_release), + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit From f908d26b2c41d9a924371099c4979e4b5d385165 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Oct 2018 19:56:50 -0700 Subject: bpf: libbpf support for msg_push_data Add support for new bpf_msg_push_data in libbpf. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 20 +++++++++++++++++++- tools/testing/selftests/bpf/bpf_helpers.h | 2 ++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a2fb333290dc..852dc17ab47a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2240,6 +2240,23 @@ union bpf_attr { * pointer that was returned from bpf_sk_lookup_xxx\ (). * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * Description + * For socket policies, insert *len* bytes into msg at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the msg. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2331,7 +2348,8 @@ union bpf_attr { FN(sk_release), \ FN(map_push_elem), \ FN(map_pop_elem), \ - FN(map_peek_elem), + FN(map_peek_elem), \ + FN(msg_push_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 6407a3df0f3b..686e57ce40f4 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -111,6 +111,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) = (void *) BPF_FUNC_msg_cork_bytes; static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; +static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = + (void *) BPF_FUNC_msg_push_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = -- cgit From f443f38c5789ece6ebe59ae21c27bf861e61c4e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 24 Oct 2018 14:31:12 -0300 Subject: tools include uapi: Grab a copy of linux/fs.h We'll use it to create tables for the 'flags' argument to the 'mount' and 'umount' syscalls. Add it to check_headers.sh so that when a new protocol gets added we get a notification during the build process. Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yacf9jvkwfwg2g95r2us3xb3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fs.h | 393 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 394 insertions(+) create mode 100644 tools/include/uapi/linux/fs.h (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h new file mode 100644 index 000000000000..73e01918f996 --- /dev/null +++ b/tools/include/uapi/linux/fs.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FS_H +#define _UAPI_LINUX_FS_H + +/* + * This file has definitions for some important file table structures + * and constants and structures used by various generic file system + * ioctl's. Please do not make any changes in this file before + * sending patches for review to linux-fsdevel@vger.kernel.org and + * linux-api@vger.kernel.org. + */ + +#include +#include +#include + +/* + * It's silly to have NR_OPEN bigger than NR_FILE, but you can change + * the file limit at runtime and only root can increase the per-process + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. + * + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. + */ + +/* Fixed constants first: */ +#undef NR_OPEN +#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ + +#define BLOCK_SIZE_BITS 10 +#define BLOCK_SIZE (1<string tables with: $ tools/perf/trace/beauty/mount_flags.sh static const char *mount_flags[] = { [4096 ? (ilog2(4096) + 1) : 0] = "BIND", [30 + 1] = "ACTIVE", [31 + 1] = "NOUSER", }; $ # trace -e mount,umount mount --bind /proc /mnt 1.228 ( 2.581 ms): mount/1068 mount(dev_name: /mnt, dir_name: 0x55f011c354a0, type: 0x55f011c38170, flags: BIND) = 0 # trace -e mount,umount umount /proc /mnt umount: /proc: target is busy. 1.587 ( 0.010 ms): umount/1070 umount2(name: /proc) = -1 EBUSY Device or resource busy 1.799 (12.660 ms): umount/1070 umount2(name: /mnt) = 0 # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Jason A. Donenfeld Cc: Herbert Xu Link: https://lkml.kernel.org/n/tip-c00bqzclscgah26z2g5zxm73@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 73e01918f996..a441ea1bfe6d 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -279,8 +279,8 @@ struct fsxattr { #define FS_ENCRYPTION_MODE_AES_256_CTS 4 #define FS_ENCRYPTION_MODE_AES_128_CBC 5 #define FS_ENCRYPTION_MODE_AES_128_CTS 6 -#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 -#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 +#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ struct fscrypt_policy { __u8 version; -- cgit From 685626dc26bd9cead850d06520708acbd16bcfda Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 16:50:08 -0300 Subject: tools include uapi: Update linux/mmap.h copy To pick up the changes from: 20916d4636a9 ("mm/hugetlb: add mmap() encodings for 32MB and 512MB page sizes") That do not entail changes in in tools, this just shows that we have to consider bits [26:31] of flags to beautify that in tools like 'perf trace' This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h' diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h Cc: Anshuman Khandual Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-3rvc39lon93kgt5pl31d8g4x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mman.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index bfd5938fede6..d0f515d53299 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -28,7 +28,9 @@ #define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB #define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB -- cgit From 827758129a0f84fbd0b2dda15e14a77a7604803d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 17:01:46 -0300 Subject: tools headers: Sync the various kvm.h header copies For powerpc, s390, x86 and the main uapi linux/kvm.h header, none of them entail changes in tooling. Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-avn7iy8f4tcm2y40sbsdk31m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/kvm.h | 1 + tools/arch/s390/include/uapi/asm/kvm.h | 2 ++ tools/arch/x86/include/uapi/asm/kvm.h | 6 ++---- tools/include/uapi/linux/kvm.h | 21 +++++++++++++++++++-- 4 files changed, 24 insertions(+), 6 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1b32b56a03d3..8c876c166ef2 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) +#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 9a50f02b9894..16511d97e8dc 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc { #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 +#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4 +#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5 /* kvm attributes for migration mode */ #define KVM_S390_VM_MIGRATION_STOP 0 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 8a6eff9c27f3..dabfcf7c3941 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -300,10 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - union { - __u8 pad; - __u8 pending; - }; + __u8 pending; __u32 error_code; } exception; struct { @@ -387,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 2875ce85b322..2b7a652c9fa4 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -751,6 +757,15 @@ struct kvm_ppc_resize_hpt { #define KVM_S390_SIE_PAGE_OFFSET 1 +/* + * On arm64, machine type can be used to request the physical + * address size for the VM. Bits[7-0] are reserved for the guest + * PA size shift (i.e, log2(PA_Size)). For backward compatibility, + * value 0 implies the default IPA size, 40bits. + */ +#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL +#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ + ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) /* * ioctls for /dev/kvm fds: */ @@ -958,6 +973,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SEND_IPI 161 #define KVM_CAP_COALESCED_PIO 162 #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 +#define KVM_CAP_ARM_VM_IPA_SIZE 165 #ifdef KVM_CAP_IRQ_ROUTING -- cgit From d45a57fff0a657045a77b395ae713ffae0cb4e46 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 17:04:47 -0300 Subject: tools headers uapi: Update linux/netlink.h header copy Picking the changes from: 89d35528d17d ("netlink: Add new socket option to enable strict checking on dumps") To silence this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h' Cc: Alexei Starovoitov Cc: David Ahern Cc: David S. Miller Cc: Eric Leblond Link: https://lkml.kernel.org/n/tip-1xymkfjpmhxfzrs46t8z8mjw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h index 776bc92e9118..486ed1f0c0bc 100644 --- a/tools/include/uapi/linux/netlink.h +++ b/tools/include/uapi/linux/netlink.h @@ -155,6 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 +#define NETLINK_DUMP_STRICT_CHK 12 struct nl_pktinfo { __u32 group; -- cgit From 76b0b801782b34b3028dcef3de36cb634e3908a8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 17:06:57 -0300 Subject: tools headers uapi: Update linux/if_link.h header copy To pick the changes from: 9163a0fc1f0c ("net: bridge: add support for per-port vlan stats") And silence this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h' Cc: Alexei Starovoitov Cc: David S. Miller Cc: Eric Leblond Cc: Nikolay Aleksandrov Link: https://lkml.kernel.org/n/tip-7p53ghippywz7fqkwo3nkzet@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 58faab897201..1debfa42cba1 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -287,6 +287,7 @@ enum { IFLA_BR_MCAST_STATS_ENABLED, IFLA_BR_MCAST_IGMP_VERSION, IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, __IFLA_BR_MAX, }; -- cgit From 4f8f382e635707ddaddf8269a116e4f8cc8835c0 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 30 Oct 2018 22:24:04 -0700 Subject: perf tools: Don't clone maps from parent when synthesizing forks When synthesizing FORK events, we are trying to create thread objects for the already running tasks on the machine. Normally, for a kernel FORK event, we want to clone the parent's maps because that is what the kernel just did. But when synthesizing, this should not be done. If we do, we end up with overlapping maps as we process the sythesized MMAP2 events that get delivered shortly thereafter. Use the FORK event misc flags in an internal way to signal this situation, so we can elide the map clone when appropriate. Signed-off-by: David S. Miller Cc: Don Zickus Cc: Jiri Olsa Cc: Joe Mario Link: http://lkml.kernel.org/r/20181030.222404.2085088822877051075.davem@davemloft.net [ Added comment about flag use in machine__process_fork_event(), use ternary op in thread__clone_map_groups() as suggested by Jiri ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 2 ++ tools/include/uapi/linux/perf_event.h | 2 ++ tools/perf/util/event.c | 1 + tools/perf/util/machine.c | 19 ++++++++++++++++++- tools/perf/util/thread.c | 13 +++++-------- tools/perf/util/thread.h | 2 +- 6 files changed, 29 insertions(+), 10 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index f35eb72739c0..9de8780ac8d9 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index f35eb72739c0..9de8780ac8d9 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bc646185f8d9..e9c108a6b1c3 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8ee8ab39d8ac..8f36ce813bc5 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); + bool do_maps_clone = true; int err = 0; if (dump_trace) @@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + /* + * When synthesizing FORK events, we are trying to create thread + * objects for the already running tasks on the machine. + * + * Normally, for a kernel FORK event, we want to clone the parent's + * maps because that is what the kernel just did. + * + * But when synthesizing, this should not be done. If we do, we end up + * with overlapping maps as we process the sythesized MMAP2 events that + * get delivered shortly thereafter. + * + * Use the FORK event misc flags in an internal way to signal this + * situation, so we can elide the map clone when appropriate. + */ + if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC) + do_maps_clone = false; if (thread == NULL || parent == NULL || - thread__fork(thread, parent, sample->time) < 0) { + thread__fork(thread, parent, sample->time, do_maps_clone) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); err = -1; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 2048d393ece6..3d9ed7d0e281 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread) } static int thread__clone_map_groups(struct thread *thread, - struct thread *parent) + struct thread *parent, + bool do_maps_clone) { /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) @@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread, thread->pid_, thread->tid, parent->pid_, parent->tid); return 0; } - /* But this one is new process, copy maps. */ - if (map_groups__clone(thread, parent->mg) < 0) - return -ENOMEM; - - return 0; + return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0; } -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) { if (parent->comm_set) { const char *comm = thread__comm_str(parent); @@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } thread->ppid = parent->tid; - return thread__clone_map_groups(thread, parent); + return thread__clone_map_groups(thread, parent, do_maps_clone); } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 36c09a9904e6..30e2b4c165fe 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -89,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); size_t thread__fprintf(struct thread *thread, FILE *fp); struct thread *thread__main_thread(struct machine *machine, struct thread *thread); -- cgit