aboutsummaryrefslogtreecommitdiff
path: root/tools/include
diff options
context:
space:
mode:
Diffstat (limited to 'tools/include')
-rw-r--r--tools/include/linux/filter.h20
-rw-r--r--tools/include/linux/numa.h16
-rw-r--r--tools/include/linux/rbtree.h52
-rw-r--r--tools/include/linux/rbtree_augmented.h60
-rw-r--r--tools/include/nolibc/nolibc.h2263
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/include/uapi/asm/bitsperlong.h4
-rw-r--r--tools/include/uapi/drm/i915_drm.h8
-rw-r--r--tools/include/uapi/linux/bpf.h116
-rw-r--r--tools/include/uapi/linux/ethtool.h51
-rw-r--r--tools/include/uapi/linux/fs.h60
-rw-r--r--tools/include/uapi/linux/if_link.h20
-rw-r--r--tools/include/uapi/linux/if_xdp.h78
-rw-r--r--tools/include/uapi/linux/in.h10
-rw-r--r--tools/include/uapi/linux/kvm.h19
-rw-r--r--tools/include/uapi/linux/mount.h58
-rw-r--r--tools/include/uapi/linux/perf_event.h55
-rw-r--r--tools/include/uapi/linux/pkt_sched.h1163
-rw-r--r--tools/include/uapi/linux/prctl.h9
-rw-r--r--tools/include/uapi/linux/tc_act/tc_bpf.h2
-rw-r--r--tools/include/uapi/linux/vhost.h113
21 files changed, 3982 insertions, 199 deletions
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index af55acf73e75..cce0b02c0e28 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -199,6 +199,16 @@
.off = OFF, \
.imm = 0 })
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
@@ -209,6 +219,16 @@
.off = OFF, \
.imm = IMM })
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
/* Unconditional jumps, goto pc + off16 */
#define BPF_JMP_A(OFF) \
diff --git a/tools/include/linux/numa.h b/tools/include/linux/numa.h
new file mode 100644
index 000000000000..110b0e5d0fb0
--- /dev/null
+++ b/tools/include/linux/numa.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NUMA_H
+#define _LINUX_NUMA_H
+
+
+#ifdef CONFIG_NODES_SHIFT
+#define NODES_SHIFT CONFIG_NODES_SHIFT
+#else
+#define NODES_SHIFT 0
+#endif
+
+#define MAX_NUMNODES (1 << NODES_SHIFT)
+
+#define NUMA_NO_NODE (-1)
+
+#endif /* _LINUX_NUMA_H */
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
index 112582253dd0..8e9ed4786269 100644
--- a/tools/include/linux/rbtree.h
+++ b/tools/include/linux/rbtree.h
@@ -43,13 +43,28 @@ struct rb_root {
struct rb_node *rb_node;
};
+/*
+ * Leftmost-cached rbtrees.
+ *
+ * We do not cache the rightmost node based on footprint
+ * size vs number of potential users that could benefit
+ * from O(1) rb_last(). Just not worth it, users that want
+ * this feature can always implement the logic explicitly.
+ * Furthermore, users that want to cache both pointers may
+ * find it a bit asymmetric, but that's ok.
+ */
+struct rb_root_cached {
+ struct rb_root rb_root;
+ struct rb_node *rb_leftmost;
+};
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
#define RB_ROOT (struct rb_root) { NULL, }
+#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
-#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
+#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL)
/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
#define RB_EMPTY_NODE(node) \
@@ -68,6 +83,12 @@ extern struct rb_node *rb_prev(const struct rb_node *);
extern struct rb_node *rb_first(const struct rb_root *);
extern struct rb_node *rb_last(const struct rb_root *);
+extern void rb_insert_color_cached(struct rb_node *,
+ struct rb_root_cached *, bool);
+extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+/* Same as rb_first(), but O(1) */
+#define rb_first_cached(root) (root)->rb_leftmost
+
/* Postorder iteration - always visit the parent after its children */
extern struct rb_node *rb_first_postorder(const struct rb_root *);
extern struct rb_node *rb_next_postorder(const struct rb_node *);
@@ -75,6 +96,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
+extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
+ struct rb_root_cached *root);
static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
@@ -90,12 +113,29 @@ static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
____ptr ? rb_entry(____ptr, type, member) : NULL; \
})
-
-/*
- * Handy for checking that we are not deleting an entry that is
- * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
- * probably should be moved to lib/rbtree.c...
+/**
+ * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
+ * given type allowing the backing memory of @pos to be invalidated
+ *
+ * @pos: the 'type *' to use as a loop cursor.
+ * @n: another 'type *' to use as temporary storage
+ * @root: 'rb_root *' of the rbtree.
+ * @field: the name of the rb_node field within 'type'.
+ *
+ * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
+ * list_for_each_entry_safe() and allows the iteration to continue independent
+ * of changes to @pos by the body of the loop.
+ *
+ * Note, however, that it cannot handle other modifications that re-order the
+ * rbtree it is iterating over. This includes calling rb_erase() on @pos, as
+ * rb_erase() may rebalance the tree, causing us to miss some nodes.
*/
+#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
+ for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
+ pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
+ typeof(*pos), field); 1; }); \
+ pos = n)
+
static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
{
rb_erase(n, root);
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 43be941db695..d008e1404580 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -44,7 +44,9 @@ struct rb_augment_callbacks {
void (*rotate)(struct rb_node *old, struct rb_node *new);
};
-extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+extern void __rb_insert_augmented(struct rb_node *node,
+ struct rb_root *root,
+ bool newleft, struct rb_node **leftmost,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
/*
* Fixup the rbtree and update the augmented information when rebalancing.
@@ -60,7 +62,16 @@ static inline void
rb_insert_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- __rb_insert_augmented(node, root, augment->rotate);
+ __rb_insert_augmented(node, root, false, NULL, augment->rotate);
+}
+
+static inline void
+rb_insert_augmented_cached(struct rb_node *node,
+ struct rb_root_cached *root, bool newleft,
+ const struct rb_augment_callbacks *augment)
+{
+ __rb_insert_augmented(node, &root->rb_root,
+ newleft, &root->rb_leftmost, augment->rotate);
}
#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
@@ -93,7 +104,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
old->rbaugmented = rbcompute(old); \
} \
rbstatic const struct rb_augment_callbacks rbname = { \
- rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
+ .propagate = rbname ## _propagate, \
+ .copy = rbname ## _copy, \
+ .rotate = rbname ## _rotate \
};
@@ -126,11 +139,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
{
if (parent) {
if (parent->rb_left == old)
- parent->rb_left = new;
+ WRITE_ONCE(parent->rb_left, new);
else
- parent->rb_right = new;
+ WRITE_ONCE(parent->rb_right, new);
} else
- root->rb_node = new;
+ WRITE_ONCE(root->rb_node, new);
}
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
@@ -138,12 +151,17 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ struct rb_node **leftmost,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *child = node->rb_right;
+ struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;
+ if (leftmost && node == *leftmost)
+ *leftmost = rb_next(node);
+
if (!tmp) {
/*
* Case 1: node to erase has no more than 1 child (easy!)
@@ -170,6 +188,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
+
tmp = child->rb_left;
if (!tmp) {
/*
@@ -183,6 +202,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
*/
parent = successor;
child2 = successor->rb_right;
+
augment->copy(node, successor);
} else {
/*
@@ -204,19 +224,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
- parent->rb_left = child2 = successor->rb_right;
- successor->rb_right = child;
+ child2 = successor->rb_right;
+ WRITE_ONCE(parent->rb_left, child2);
+ WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
+
augment->copy(node, successor);
augment->propagate(parent, successor);
}
- successor->rb_left = tmp = node->rb_left;
+ tmp = node->rb_left;
+ WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);
pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
+
if (child2) {
successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);
@@ -237,9 +261,21 @@ static __always_inline void
rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
+ struct rb_node *rebalance = __rb_erase_augmented(node, root,
+ NULL, augment);
if (rebalance)
__rb_erase_color(rebalance, root, augment->rotate);
}
-#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
+static __always_inline void
+rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root,
+ &root->rb_leftmost,
+ augment);
+ if (rebalance)
+ __rb_erase_color(rebalance, &root->rb_root, augment->rotate);
+}
+
+#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
new file mode 100644
index 000000000000..1708e9f9f8aa
--- /dev/null
+++ b/tools/include/nolibc/nolibc.h
@@ -0,0 +1,2263 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/* nolibc.h
+ * Copyright (C) 2017-2018 Willy Tarreau <w@1wt.eu>
+ */
+
+/*
+ * This file is designed to be used as a libc alternative for minimal programs
+ * with very limited requirements. It consists of a small number of syscall and
+ * type definitions, and the minimal startup code needed to call main().
+ * All syscalls are declared as static functions so that they can be optimized
+ * away by the compiler when not used.
+ *
+ * Syscalls are split into 3 levels:
+ * - The lower level is the arch-specific syscall() definition, consisting in
+ * assembly code in compound expressions. These are called my_syscall0() to
+ * my_syscall6() depending on the number of arguments. The MIPS
+ * implementation is limited to 5 arguments. All input arguments are cast
+ * to a long stored in a register. These expressions always return the
+ * syscall's return value as a signed long value which is often either a
+ * pointer or the negated errno value.
+ *
+ * - The second level is mostly architecture-independent. It is made of
+ * static functions called sys_<name>() which rely on my_syscallN()
+ * depending on the syscall definition. These functions are responsible
+ * for exposing the appropriate types for the syscall arguments (int,
+ * pointers, etc) and for setting the appropriate return type (often int).
+ * A few of them are architecture-specific because the syscalls are not all
+ * mapped exactly the same among architectures. For example, some archs do
+ * not implement select() and need pselect6() instead, so the sys_select()
+ * function will have to abstract this.
+ *
+ * - The third level is the libc call definition. It exposes the lower raw
+ * sys_<name>() calls in a way that looks like what a libc usually does,
+ * takes care of specific input values, and of setting errno upon error.
+ * There can be minor variations compared to standard libc calls. For
+ * example the open() call always takes 3 args here.
+ *
+ * The errno variable is declared static and unused. This way it can be
+ * optimized away if not used. However this means that a program made of
+ * multiple C files may observe different errno values (one per C file). For
+ * the type of programs this project targets it usually is not a problem. The
+ * resulting program may even be reduced by defining the NOLIBC_IGNORE_ERRNO
+ * macro, in which case the errno value will never be assigned.
+ *
+ * Some stdint-like integer types are defined. These are valid on all currently
+ * supported architectures, because signs are enforced, ints are assumed to be
+ * 32 bits, longs the size of a pointer and long long 64 bits. If more
+ * architectures have to be supported, this may need to be adapted.
+ *
+ * Some macro definitions like the O_* values passed to open(), and some
+ * structures like the sys_stat struct depend on the architecture.
+ *
+ * The definitions start with the architecture-specific parts, which are picked
+ * based on what the compiler knows about the target architecture, and are
+ * completed with the generic code. Since it is the compiler which sets the
+ * target architecture, cross-compiling normally works out of the box without
+ * having to specify anything.
+ *
+ * Finally some very common libc-level functions are provided. It is the case
+ * for a few functions usually found in string.h, ctype.h, or stdlib.h. Nothing
+ * is currently provided regarding stdio emulation.
+ *
+ * The macro NOLIBC is always defined, so that it is possible for a program to
+ * check this macro to know if it is being built against and decide to disable
+ * some features or simply not to include some standard libc files.
+ *
+ * Ideally this file should be split in multiple files for easier long term
+ * maintenance, but provided as a single file as it is now, it's quite
+ * convenient to use. Maybe some variations involving a set of includes at the
+ * top could work.
+ *
+ * A simple static executable may be built this way :
+ * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ * -static -include nolibc.h -lgcc -o hello hello.c
+ *
+ * A very useful calling convention table may be found here :
+ * http://man7.org/linux/man-pages/man2/syscall.2.html
+ *
+ * This doc is quite convenient though not necessarily up to date :
+ * https://w3challs.com/syscalls/
+ *
+ */
+
+/* Some archs (at least aarch64) don't expose the regular syscalls anymore by
+ * default, either because they have an "_at" replacement, or because there are
+ * more modern alternatives. For now we'd rather still use them.
+ */
+#define __ARCH_WANT_SYSCALL_NO_AT
+#define __ARCH_WANT_SYSCALL_NO_FLAGS
+#define __ARCH_WANT_SYSCALL_DEPRECATED
+
+#include <asm/unistd.h>
+#include <asm/ioctls.h>
+#include <asm/errno.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+
+#define NOLIBC
+
+/* this way it will be removed if unused */
+static int errno;
+
+#ifndef NOLIBC_IGNORE_ERRNO
+#define SET_ERRNO(v) do { errno = (v); } while (0)
+#else
+#define SET_ERRNO(v) do { } while (0)
+#endif
+
+/* errno codes all ensure that they will not conflict with a valid pointer
+ * because they all correspond to the highest addressable memry page.
+ */
+#define MAX_ERRNO 4095
+
+/* Declare a few quite common macros and types that usually are in stdlib.h,
+ * stdint.h, ctype.h, unistd.h and a few other common locations.
+ */
+
+#define NULL ((void *)0)
+
+/* stdint types */
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+typedef unsigned short uint16_t;
+typedef signed short int16_t;
+typedef unsigned int uint32_t;
+typedef signed int int32_t;
+typedef unsigned long long uint64_t;
+typedef signed long long int64_t;
+typedef unsigned long size_t;
+typedef signed long ssize_t;
+typedef unsigned long uintptr_t;
+typedef signed long intptr_t;
+typedef signed long ptrdiff_t;
+
+/* for stat() */
+typedef unsigned int dev_t;
+typedef unsigned long ino_t;
+typedef unsigned int mode_t;
+typedef signed int pid_t;
+typedef unsigned int uid_t;
+typedef unsigned int gid_t;
+typedef unsigned long nlink_t;
+typedef signed long off_t;
+typedef signed long blksize_t;
+typedef signed long blkcnt_t;
+typedef signed long time_t;
+
+/* for poll() */
+struct pollfd {
+ int fd;
+ short int events;
+ short int revents;
+};
+
+/* for select() */
+struct timeval {
+ long tv_sec;
+ long tv_usec;
+};
+
+/* for pselect() */
+struct timespec {
+ long tv_sec;
+ long tv_nsec;
+};
+
+/* for gettimeofday() */
+struct timezone {
+ int tz_minuteswest;
+ int tz_dsttime;
+};
+
+/* for getdents64() */
+struct linux_dirent64 {
+ uint64_t d_ino;
+ int64_t d_off;
+ unsigned short d_reclen;
+ unsigned char d_type;
+ char d_name[];
+};
+
+/* commonly an fd_set represents 256 FDs */
+#define FD_SETSIZE 256
+typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
+
+/* needed by wait4() */
+struct rusage {
+ struct timeval ru_utime;
+ struct timeval ru_stime;
+ long ru_maxrss;
+ long ru_ixrss;
+ long ru_idrss;
+ long ru_isrss;
+ long ru_minflt;
+ long ru_majflt;
+ long ru_nswap;
+ long ru_inblock;
+ long ru_oublock;
+ long ru_msgsnd;
+ long ru_msgrcv;
+ long ru_nsignals;
+ long ru_nvcsw;
+ long ru_nivcsw;
+};
+
+/* stat flags (WARNING, octal here) */
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFBLK 0060000
+#define S_IFREG 0100000
+#define S_IFIFO 0010000
+#define S_IFLNK 0120000
+#define S_IFSOCK 0140000
+#define S_IFMT 0170000
+
+#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
+#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
+#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
+#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
+#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
+#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
+#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+
+#define DT_UNKNOWN 0
+#define DT_FIFO 1
+#define DT_CHR 2
+#define DT_DIR 4
+#define DT_BLK 6
+#define DT_REG 8
+#define DT_LNK 10
+#define DT_SOCK 12
+
+/* all the *at functions */
+#ifndef AT_FDWCD
+#define AT_FDCWD -100
+#endif
+
+/* lseek */
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+/* reboot */
+#define LINUX_REBOOT_MAGIC1 0xfee1dead
+#define LINUX_REBOOT_MAGIC2 0x28121969
+#define LINUX_REBOOT_CMD_HALT 0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART 0x01234567
+#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+
+
+/* The format of the struct as returned by the libc to the application, which
+ * significantly differs from the format returned by the stat() syscall flavours.
+ */
+struct stat {
+ dev_t st_dev; /* ID of device containing file */
+ ino_t st_ino; /* inode number */
+ mode_t st_mode; /* protection */
+ nlink_t st_nlink; /* number of hard links */
+ uid_t st_uid; /* user ID of owner */
+ gid_t st_gid; /* group ID of owner */
+ dev_t st_rdev; /* device ID (if special file) */
+ off_t st_size; /* total size, in bytes */
+ blksize_t st_blksize; /* blocksize for file system I/O */
+ blkcnt_t st_blocks; /* number of 512B blocks allocated */
+ time_t st_atime; /* time of last access */
+ time_t st_mtime; /* time of last modification */
+ time_t st_ctime; /* time of last status change */
+};
+
+#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
+#define WIFEXITED(status) (((status) & 0x7f) == 0)
+
+
+/* Below comes the architecture-specific code. For each architecture, we have
+ * the syscall declarations and the _start code definition. This is the only
+ * global part. On all architectures the kernel puts everything in the stack
+ * before jumping to _start just above us, without any return address (_start
+ * is not a function but an entry pint). So at the stack pointer we find argc.
+ * Then argv[] begins, and ends at the first NULL. Then we have envp which
+ * starts and ends with a NULL as well. So envp=argv+argc+1.
+ */
+
+#if defined(__x86_64__)
+/* Syscalls for x86_64 :
+ * - registers are 64-bit
+ * - syscall number is passed in rax
+ * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+ * - the system call is performed by calling the syscall instruction
+ * - syscall return comes in rax
+ * - rcx and r8..r11 may be clobbered, others are preserved.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ */
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num asm("rax") = (num); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a" (_ret) \
+ : "0"(_num) \
+ : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num asm("rax") = (num); \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num asm("rax") = (num); \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num asm("rax") = (num); \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ register long _arg3 asm("rdx") = (long)(arg3); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num asm("rax") = (num); \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ register long _arg3 asm("rdx") = (long)(arg3); \
+ register long _arg4 asm("r10") = (long)(arg4); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a" (_ret), "=r"(_arg4) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "rcx", "r8", "r9", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num asm("rax") = (num); \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ register long _arg3 asm("rdx") = (long)(arg3); \
+ register long _arg4 asm("r10") = (long)(arg4); \
+ register long _arg5 asm("r8") = (long)(arg5); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "rcx", "r9", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ long _ret; \
+ register long _num asm("rax") = (num); \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ register long _arg3 asm("rdx") = (long)(arg3); \
+ register long _arg4 asm("r10") = (long)(arg4); \
+ register long _arg5 asm("r8") = (long)(arg5); \
+ register long _arg6 asm("r9") = (long)(arg6); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+/* startup code */
+asm(".section .text\n"
+ ".global _start\n"
+ "_start:\n"
+ "pop %rdi\n" // argc (first arg, %rdi)
+ "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
+ "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+ "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when
+ "sub $8, %rsp\n" // entering the callee
+ "call main\n" // main() returns the status code, we'll exit with it.
+ "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits
+ "mov $60, %rax\n" // NR_exit == 60
+ "syscall\n" // really exit
+ "hlt\n" // ensure it does not return
+ "");
+
+/* fcntl / open */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall, equivalent to stat64(). The
+ * syscall returns 116 bytes and stops in the middle of __unused.
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned long st_nlink;
+ unsigned int st_mode;
+ unsigned int st_uid;
+
+ unsigned int st_gid;
+ unsigned int __pad0;
+ unsigned long st_rdev;
+ long st_size;
+ long st_blksize;
+
+ long st_blocks;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ long __unused[3];
+};
+
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+/* Syscalls for i386 :
+ * - mostly similar to x86_64
+ * - registers are 32-bit
+ * - syscall number is passed in eax
+ * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+ * - all registers are preserved (except eax of course)
+ * - the system call is performed by calling int $0x80
+ * - syscall return comes in eax
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, i386 supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = (num); \
+ \
+ asm volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = (num); \
+ register long _arg1 asm("ebx") = (long)(arg1); \
+ \
+ asm volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = (num); \
+ register long _arg1 asm("ebx") = (long)(arg1); \
+ register long _arg2 asm("ecx") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = (num); \
+ register long _arg1 asm("ebx") = (long)(arg1); \
+ register long _arg2 asm("ecx") = (long)(arg2); \
+ register long _arg3 asm("edx") = (long)(arg3); \
+ \
+ asm volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = (num); \
+ register long _arg1 asm("ebx") = (long)(arg1); \
+ register long _arg2 asm("ecx") = (long)(arg2); \
+ register long _arg3 asm("edx") = (long)(arg3); \
+ register long _arg4 asm("esi") = (long)(arg4); \
+ \
+ asm volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = (num); \
+ register long _arg1 asm("ebx") = (long)(arg1); \
+ register long _arg2 asm("ecx") = (long)(arg2); \
+ register long _arg3 asm("edx") = (long)(arg3); \
+ register long _arg4 asm("esi") = (long)(arg4); \
+ register long _arg5 asm("edi") = (long)(arg5); \
+ \
+ asm volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+/* startup code */
+asm(".section .text\n"
+ ".global _start\n"
+ "_start:\n"
+ "pop %eax\n" // argc (first arg, %eax)
+ "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
+ "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+ "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when
+ "push %ecx\n" // push all registers on the stack so that we
+ "push %ebx\n" // support both regparm and plain stack modes
+ "push %eax\n"
+ "call main\n" // main() returns the status code in %eax
+ "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits
+ "movl $1, %eax\n" // NR_exit == 1
+ "int $0x80\n" // exit now
+ "hlt\n" // ensure it does not
+ "");
+
+/* fcntl / open */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array).
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+
+ unsigned long st_rdev;
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused[2];
+};
+
+#elif defined(__ARM_EABI__)
+/* Syscalls for ARM in ARM or Thumb modes :
+ * - registers are 32-bit
+ * - stack is 8-byte aligned
+ * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+ * - syscall number is passed in r7
+ * - arguments are in r0, r1, r2, r3, r4, r5
+ * - the system call is performed by calling svc #0
+ * - syscall return comes in r0.
+ * - only lr is clobbered.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, ARM supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num) \
+({ \
+ register long _num asm("r7") = (num); \
+ register long _arg1 asm("r0"); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num asm("r7") = (num); \
+ register long _arg1 asm("r0") = (long)(arg1); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num asm("r7") = (num); \
+ register long _arg1 asm("r0") = (long)(arg1); \
+ register long _arg2 asm("r1") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num asm("r7") = (num); \
+ register long _arg1 asm("r0") = (long)(arg1); \
+ register long _arg2 asm("r1") = (long)(arg2); \
+ register long _arg3 asm("r2") = (long)(arg3); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num asm("r7") = (num); \
+ register long _arg1 asm("r0") = (long)(arg1); \
+ register long _arg2 asm("r1") = (long)(arg2); \
+ register long _arg3 asm("r2") = (long)(arg3); \
+ register long _arg4 asm("r3") = (long)(arg4); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num asm("r7") = (num); \
+ register long _arg1 asm("r0") = (long)(arg1); \
+ register long _arg2 asm("r1") = (long)(arg2); \
+ register long _arg3 asm("r2") = (long)(arg3); \
+ register long _arg4 asm("r3") = (long)(arg4); \
+ register long _arg5 asm("r4") = (long)(arg5); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+asm(".section .text\n"
+ ".global _start\n"
+ "_start:\n"
+#if defined(__THUMBEB__) || defined(__THUMBEL__)
+ /* We enter here in 32-bit mode but if some previous functions were in
+ * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+ * 32-bit now, then switch to 16-bit (is there a better way to do it than
+ * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+ * it generates correct instructions. Note that we do not support thumb1.
+ */
+ ".code 32\n"
+ "add r0, pc, #1\n"
+ "bx r0\n"
+ ".code 16\n"
+#endif
+ "pop {%r0}\n" // argc was in the stack
+ "mov %r1, %sp\n" // argv = sp
+ "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+ "add %r2, %r2, $4\n" // ... + 4
+ "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
+ "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
+ "bl main\n" // main() returns the status code, we'll exit with it.
+ "and %r0, %r0, $0xff\n" // limit exit code to 8 bits
+ "movs r7, $1\n" // NR_exit == 1
+ "svc $0x00\n"
+ "");
+
+/* fcntl / open */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x4000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array). In big endian, the format
+ * differs as devices are returned as short only.
+ */
+struct sys_stat_struct {
+#if defined(__ARMEB__)
+ unsigned short st_dev;
+ unsigned short __pad1;
+#else
+ unsigned long st_dev;
+#endif
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+#if defined(__ARMEB__)
+ unsigned short st_rdev;
+ unsigned short __pad2;
+#else
+ unsigned long st_rdev;
+#endif
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused[2];
+};
+
+#elif defined(__aarch64__)
+/* Syscalls for AARCH64 :
+ * - registers are 64-bit
+ * - stack is 16-byte aligned
+ * - syscall number is passed in x8
+ * - arguments are in x0, x1, x2, x3, x4, x5
+ * - the system call is performed by calling svc 0
+ * - syscall return comes in x0.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ *
+ * On aarch64, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num) \
+({ \
+ register long _num asm("x8") = (num); \
+ register long _arg1 asm("x0"); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num asm("x8") = (num); \
+ register long _arg1 asm("x0") = (long)(arg1); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num asm("x8") = (num); \
+ register long _arg1 asm("x0") = (long)(arg1); \
+ register long _arg2 asm("x1") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num asm("x8") = (num); \
+ register long _arg1 asm("x0") = (long)(arg1); \
+ register long _arg2 asm("x1") = (long)(arg2); \
+ register long _arg3 asm("x2") = (long)(arg3); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num asm("x8") = (num); \
+ register long _arg1 asm("x0") = (long)(arg1); \
+ register long _arg2 asm("x1") = (long)(arg2); \
+ register long _arg3 asm("x2") = (long)(arg3); \
+ register long _arg4 asm("x3") = (long)(arg4); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num asm("x8") = (num); \
+ register long _arg1 asm("x0") = (long)(arg1); \
+ register long _arg2 asm("x1") = (long)(arg2); \
+ register long _arg3 asm("x2") = (long)(arg3); \
+ register long _arg4 asm("x3") = (long)(arg4); \
+ register long _arg5 asm("x4") = (long)(arg5); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num asm("x8") = (num); \
+ register long _arg1 asm("x0") = (long)(arg1); \
+ register long _arg2 asm("x1") = (long)(arg2); \
+ register long _arg3 asm("x2") = (long)(arg3); \
+ register long _arg4 asm("x3") = (long)(arg4); \
+ register long _arg5 asm("x4") = (long)(arg5); \
+ register long _arg6 asm("x5") = (long)(arg6); \
+ \
+ asm volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+asm(".section .text\n"
+ ".global _start\n"
+ "_start:\n"
+ "ldr x0, [sp]\n" // argc (x0) was in the stack
+ "add x1, sp, 8\n" // argv (x1) = sp
+ "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
+ "add x2, x2, 8\n" // + 8 (skip null)
+ "add x2, x2, x1\n" // + argv
+ "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
+ "bl main\n" // main() returns the status code, we'll exit with it.
+ "and x0, x0, 0xff\n" // limit exit code to 8 bits
+ "mov x8, 93\n" // NR_exit == 93
+ "svc #0\n"
+ "");
+
+/* fcntl / open */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x4000
+
+/* The struct returned by the newfstatat() syscall. Differs slightly from the
+ * x86_64's stat one by field ordering, so be careful.
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned int st_uid;
+ unsigned int st_gid;
+
+ unsigned long st_rdev;
+ unsigned long __pad1;
+ long st_size;
+ int st_blksize;
+ int __pad2;
+
+ long st_blocks;
+ long st_atime;
+ unsigned long st_atime_nsec;
+ long st_mtime;
+
+ unsigned long st_mtime_nsec;
+ long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned int __unused[2];
+};
+
+#elif defined(__mips__) && defined(_ABIO32)
+/* Syscalls for MIPS ABI O32 :
+ * - WARNING! there's always a delayed slot!
+ * - WARNING again, the syntax is different, registers take a '$' and numbers
+ * do not.
+ * - registers are 32-bit
+ * - stack is 8-byte aligned
+ * - syscall number is passed in v0 (starts at 0xfa0).
+ * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+ * leave some room in the stack for the callee to save a0..a3 if needed.
+ * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+ * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+ * scall32-o32.S in the kernel sources.
+ * - the system call is performed by calling "syscall"
+ * - syscall return comes in v0, and register a3 needs to be checked to know
+ * if an error occured, in which case errno is in v0.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num asm("v0") = (num); \
+ register long _arg4 asm("a3"); \
+ \
+ asm volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "r"(_num) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num asm("v0") = (num); \
+ register long _arg1 asm("a0") = (long)(arg1); \
+ register long _arg4 asm("a3"); \
+ \
+ asm volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num asm("v0") = (num); \
+ register long _arg1 asm("a0") = (long)(arg1); \
+ register long _arg2 asm("a1") = (long)(arg2); \
+ register long _arg4 asm("a3"); \
+ \
+ asm volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num asm("v0") = (num); \
+ register long _arg1 asm("a0") = (long)(arg1); \
+ register long _arg2 asm("a1") = (long)(arg2); \
+ register long _arg3 asm("a2") = (long)(arg3); \
+ register long _arg4 asm("a3"); \
+ \
+ asm volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num asm("v0") = (num); \
+ register long _arg1 asm("a0") = (long)(arg1); \
+ register long _arg2 asm("a1") = (long)(arg2); \
+ register long _arg3 asm("a2") = (long)(arg3); \
+ register long _arg4 asm("a3") = (long)(arg4); \
+ \
+ asm volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num asm("v0") = (num); \
+ register long _arg1 asm("a0") = (long)(arg1); \
+ register long _arg2 asm("a1") = (long)(arg2); \
+ register long _arg3 asm("a2") = (long)(arg3); \
+ register long _arg4 asm("a3") = (long)(arg4); \
+ register long _arg5 = (long)(arg5); \
+ \
+ asm volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "sw %7, 16($sp)\n" \
+ "syscall\n " \
+ "addiu $sp, $sp, 32\n" \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+/* startup code, note that it's called __start on MIPS */
+asm(".section .text\n"
+ ".set nomips16\n"
+ ".global __start\n"
+ ".set noreorder\n"
+ ".option pic0\n"
+ ".ent __start\n"
+ "__start:\n"
+ "lw $a0,($sp)\n" // argc was in the stack
+ "addiu $a1, $sp, 4\n" // argv = sp + 4
+ "sll $a2, $a0, 2\n" // a2 = argc * 4
+ "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
+ "addiu $a2, $a2, 4\n" // ... + 4
+ "li $t0, -8\n"
+ "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
+ "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
+ "jal main\n" // main() returns the status code, we'll exit with it.
+ "nop\n" // delayed slot
+ "and $a0, $v0, 0xff\n" // limit exit code to 8 bits
+ "li $v0, 4001\n" // NR_exit == 4001
+ "syscall\n"
+ ".end __start\n"
+ "");
+
+/* fcntl / open */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_APPEND 0x0008
+#define O_NONBLOCK 0x0080
+#define O_CREAT 0x0100
+#define O_TRUNC 0x0200
+#define O_EXCL 0x0400
+#define O_NOCTTY 0x0800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall. 88 bytes are returned by the
+ * syscall.
+ */
+struct sys_stat_struct {
+ unsigned int st_dev;
+ long st_pad1[3];
+ unsigned long st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned int st_uid;
+ unsigned int st_gid;
+ unsigned int st_rdev;
+ long st_pad2[2];
+ long st_size;
+ long st_pad3;
+ long st_atime;
+ long st_atime_nsec;
+ long st_mtime;
+ long st_mtime_nsec;
+ long st_ctime;
+ long st_ctime_nsec;
+ long st_blksize;
+ long st_blocks;
+ long st_pad4[14];
+};
+
+#endif
+
+
+/* Below are the C functions used to declare the raw syscalls. They try to be
+ * architecture-agnostic, and return either a success or -errno. Declaring them
+ * static will lead to them being inlined in most cases, but it's still possible
+ * to reference them by a pointer if needed.
+ */
+static __attribute__((unused))
+void *sys_brk(void *addr)
+{
+ return (void *)my_syscall1(__NR_brk, addr);
+}
+
+static __attribute__((noreturn,unused))
+void sys_exit(int status)
+{
+ my_syscall1(__NR_exit, status & 255);
+ while(1); // shut the "noreturn" warnings.
+}
+
+static __attribute__((unused))
+int sys_chdir(const char *path)
+{
+ return my_syscall1(__NR_chdir, path);
+}
+
+static __attribute__((unused))
+int sys_chmod(const char *path, mode_t mode)
+{
+#ifdef __NR_fchmodat
+ return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
+#else
+ return my_syscall2(__NR_chmod, path, mode);
+#endif
+}
+
+static __attribute__((unused))
+int sys_chown(const char *path, uid_t owner, gid_t group)
+{
+#ifdef __NR_fchownat
+ return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
+#else
+ return my_syscall3(__NR_chown, path, owner, group);
+#endif
+}
+
+static __attribute__((unused))
+int sys_chroot(const char *path)
+{
+ return my_syscall1(__NR_chroot, path);
+}
+
+static __attribute__((unused))
+int sys_close(int fd)
+{
+ return my_syscall1(__NR_close, fd);
+}
+
+static __attribute__((unused))
+int sys_dup(int fd)
+{
+ return my_syscall1(__NR_dup, fd);
+}
+
+static __attribute__((unused))
+int sys_dup2(int old, int new)
+{
+ return my_syscall2(__NR_dup2, old, new);
+}
+
+static __attribute__((unused))
+int sys_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ return my_syscall3(__NR_execve, filename, argv, envp);
+}
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+ return my_syscall0(__NR_fork);
+}
+
+static __attribute__((unused))
+int sys_fsync(int fd)
+{
+ return my_syscall1(__NR_fsync, fd);
+}
+
+static __attribute__((unused))
+int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+ return my_syscall3(__NR_getdents64, fd, dirp, count);
+}
+
+static __attribute__((unused))
+pid_t sys_getpgrp(void)
+{
+ return my_syscall0(__NR_getpgrp);
+}
+
+static __attribute__((unused))
+pid_t sys_getpid(void)
+{
+ return my_syscall0(__NR_getpid);
+}
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return my_syscall2(__NR_gettimeofday, tv, tz);
+}
+
+static __attribute__((unused))
+int sys_ioctl(int fd, unsigned long req, void *value)
+{
+ return my_syscall3(__NR_ioctl, fd, req, value);
+}
+
+static __attribute__((unused))
+int sys_kill(pid_t pid, int signal)
+{
+ return my_syscall2(__NR_kill, pid, signal);
+}
+
+static __attribute__((unused))
+int sys_link(const char *old, const char *new)
+{
+#ifdef __NR_linkat
+ return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
+#else
+ return my_syscall2(__NR_link, old, new);
+#endif
+}
+
+static __attribute__((unused))
+off_t sys_lseek(int fd, off_t offset, int whence)
+{
+ return my_syscall3(__NR_lseek, fd, offset, whence);
+}
+
+static __attribute__((unused))
+int sys_mkdir(const char *path, mode_t mode)
+{
+#ifdef __NR_mkdirat
+ return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
+#else
+ return my_syscall2(__NR_mkdir, path, mode);
+#endif
+}
+
+static __attribute__((unused))
+long sys_mknod(const char *path, mode_t mode, dev_t dev)
+{
+#ifdef __NR_mknodat
+ return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
+#else
+ return my_syscall3(__NR_mknod, path, mode, dev);
+#endif
+}
+
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+#ifdef __NR_openat
+ return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+#else
+ return my_syscall3(__NR_open, path, flags, mode);
+#endif
+}
+
+static __attribute__((unused))
+int sys_pivot_root(const char *new, const char *old)
+{
+ return my_syscall2(__NR_pivot_root, new, old);
+}
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
+}
+
+static __attribute__((unused))
+ssize_t sys_read(int fd, void *buf, size_t count)
+{
+ return my_syscall3(__NR_read, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+ return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int sys_sched_yield(void)
+{
+ return my_syscall0(__NR_sched_yield);
+}
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+ struct sel_arg_struct {
+ unsigned long n;
+ fd_set *r, *w, *e;
+ struct timeval *t;
+ } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+ return my_syscall1(__NR_select, &arg);
+#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
+ struct timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#else
+#ifndef __NR__newselect
+#define __NR__newselect __NR_select
+#endif
+ return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#endif
+}
+
+static __attribute__((unused))
+int sys_setpgid(pid_t pid, pid_t pgid)
+{
+ return my_syscall2(__NR_setpgid, pid, pgid);
+}
+
+static __attribute__((unused))
+pid_t sys_setsid(void)
+{
+ return my_syscall0(__NR_setsid);
+}
+
+static __attribute__((unused))
+int sys_stat(const char *path, struct stat *buf)
+{
+ struct sys_stat_struct stat;
+ long ret;
+
+#ifdef __NR_newfstatat
+ /* only solution for arm64 */
+ ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
+#else
+ ret = my_syscall2(__NR_stat, path, &stat);
+#endif
+ buf->st_dev = stat.st_dev;
+ buf->st_ino = stat.st_ino;
+ buf->st_mode = stat.st_mode;
+ buf->st_nlink = stat.st_nlink;
+ buf->st_uid = stat.st_uid;
+ buf->st_gid = stat.st_gid;
+ buf->st_rdev = stat.st_rdev;
+ buf->st_size = stat.st_size;
+ buf->st_blksize = stat.st_blksize;
+ buf->st_blocks = stat.st_blocks;
+ buf->st_atime = stat.st_atime;
+ buf->st_mtime = stat.st_mtime;
+ buf->st_ctime = stat.st_ctime;
+ return ret;
+}
+
+
+static __attribute__((unused))
+int sys_symlink(const char *old, const char *new)
+{
+#ifdef __NR_symlinkat
+ return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
+#else
+ return my_syscall2(__NR_symlink, old, new);
+#endif
+}
+
+static __attribute__((unused))
+mode_t sys_umask(mode_t mode)
+{
+ return my_syscall1(__NR_umask, mode);
+}
+
+static __attribute__((unused))
+int sys_umount2(const char *path, int flags)
+{
+ return my_syscall2(__NR_umount2, path, flags);
+}
+
+static __attribute__((unused))
+int sys_unlink(const char *path)
+{
+#ifdef __NR_unlinkat
+ return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
+#else
+ return my_syscall1(__NR_unlink, path);
+#endif
+}
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ return my_syscall4(__NR_wait4, pid, status, options, rusage);
+}
+
+static __attribute__((unused))
+pid_t sys_waitpid(pid_t pid, int *status, int options)
+{
+ return sys_wait4(pid, status, options, 0);
+}
+
+static __attribute__((unused))
+pid_t sys_wait(int *status)
+{
+ return sys_waitpid(-1, status, 0);
+}
+
+static __attribute__((unused))
+ssize_t sys_write(int fd, const void *buf, size_t count)
+{
+ return my_syscall3(__NR_write, fd, buf, count);
+}
+
+
+/* Below are the libc-compatible syscalls which return x or -1 and set errno.
+ * They rely on the functions above. Similarly they're marked static so that it
+ * is possible to assign pointers to them if needed.
+ */
+
+static __attribute__((unused))
+int brk(void *addr)
+{
+ void *ret = sys_brk(addr);
+
+ if (!ret) {
+ SET_ERRNO(ENOMEM);
+ return -1;
+ }
+ return 0;
+}
+
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+ sys_exit(status);
+}
+
+static __attribute__((unused))
+int chdir(const char *path)
+{
+ int ret = sys_chdir(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int chmod(const char *path, mode_t mode)
+{
+ int ret = sys_chmod(path, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int chown(const char *path, uid_t owner, gid_t group)
+{
+ int ret = sys_chown(path, owner, group);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int chroot(const char *path)
+{
+ int ret = sys_chroot(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int close(int fd)
+{
+ int ret = sys_close(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int dup2(int old, int new)
+{
+ int ret = sys_dup2(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int execve(const char *filename, char *const argv[], char *const envp[])
+{
+ int ret = sys_execve(filename, argv, envp);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t fork(void)
+{
+ pid_t ret = sys_fork();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int fsync(int fd)
+{
+ int ret = sys_fsync(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+ int ret = sys_getdents64(fd, dirp, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t getpgrp(void)
+{
+ pid_t ret = sys_getpgrp();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t getpid(void)
+{
+ pid_t ret = sys_getpid();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ int ret = sys_gettimeofday(tv, tz);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int ioctl(int fd, unsigned long req, void *value)
+{
+ int ret = sys_ioctl(fd, req, value);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int kill(pid_t pid, int signal)
+{
+ int ret = sys_kill(pid, signal);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int link(const char *old, const char *new)
+{
+ int ret = sys_link(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+off_t lseek(int fd, off_t offset, int whence)
+{
+ off_t ret = sys_lseek(fd, offset, whence);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int mkdir(const char *path, mode_t mode)
+{
+ int ret = sys_mkdir(path, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int mknod(const char *path, mode_t mode, dev_t dev)
+{
+ int ret = sys_mknod(path, mode, dev);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+ const char *fst, unsigned long flags,
+ const void *data)
+{
+ int ret = sys_mount(src, tgt, fst, flags, data);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, mode_t mode)
+{
+ int ret = sys_open(path, flags, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int pivot_root(const char *new, const char *old)
+{
+ int ret = sys_pivot_root(new, old);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+ int ret = sys_poll(fds, nfds, timeout);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+ssize_t read(int fd, void *buf, size_t count)
+{
+ ssize_t ret = sys_read(fd, buf, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+ int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+void *sbrk(intptr_t inc)
+{
+ void *ret;
+
+ /* first call to find current end */
+ if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+ return ret + inc;
+
+ SET_ERRNO(ENOMEM);
+ return (void *)-1;
+}
+
+static __attribute__((unused))
+int sched_yield(void)
+{
+ int ret = sys_sched_yield();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+ int ret = sys_select(nfds, rfds, wfds, efds, timeout);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int setpgid(pid_t pid, pid_t pgid)
+{
+ int ret = sys_setpgid(pid, pgid);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t setsid(void)
+{
+ pid_t ret = sys_setsid();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+unsigned int sleep(unsigned int seconds)
+{
+ struct timeval my_timeval = { seconds, 0 };
+
+ if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ return my_timeval.tv_sec + !!my_timeval.tv_usec;
+ else
+ return 0;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+ int ret = sys_stat(path, buf);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int symlink(const char *old, const char *new)
+{
+ int ret = sys_symlink(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int tcsetpgrp(int fd, pid_t pid)
+{
+ return ioctl(fd, TIOCSPGRP, &pid);
+}
+
+static __attribute__((unused))
+mode_t umask(mode_t mode)
+{
+ return sys_umask(mode);
+}
+
+static __attribute__((unused))
+int umount2(const char *path, int flags)
+{
+ int ret = sys_umount2(path, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int unlink(const char *path)
+{
+ int ret = sys_unlink(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ pid_t ret = sys_wait4(pid, status, options, rusage);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+ pid_t ret = sys_waitpid(pid, status, options);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+ pid_t ret = sys_wait(status);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+ssize_t write(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = sys_write(fd, buf, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+/* some size-optimized reimplementations of a few common str* and mem*
+ * functions. They're marked static, except memcpy() and raise() which are used
+ * by libgcc on ARM, so they are marked weak instead in order not to cause an
+ * error when building a program made of multiple files (not recommended).
+ */
+
+static __attribute__((unused))
+void *memmove(void *dst, const void *src, size_t len)
+{
+ ssize_t pos = (dst <= src) ? -1 : (long)len;
+ void *ret = dst;
+
+ while (len--) {
+ pos += (dst <= src) ? 1 : -1;
+ ((char *)dst)[pos] = ((char *)src)[pos];
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+void *memset(void *dst, int b, size_t len)
+{
+ char *p = dst;
+
+ while (len--)
+ *(p++) = b;
+ return dst;
+}
+
+static __attribute__((unused))
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+ size_t ofs = 0;
+ char c1 = 0;
+
+ while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
+ ofs++;
+ }
+ return c1;
+}
+
+static __attribute__((unused))
+char *strcpy(char *dst, const char *src)
+{
+ char *ret = dst;
+
+ while ((*dst++ = *src++));
+ return ret;
+}
+
+static __attribute__((unused))
+char *strchr(const char *s, int c)
+{
+ while (*s) {
+ if (*s == (char)c)
+ return (char *)s;
+ s++;
+ }
+ return NULL;
+}
+
+static __attribute__((unused))
+char *strrchr(const char *s, int c)
+{
+ const char *ret = NULL;
+
+ while (*s) {
+ if (*s == (char)c)
+ ret = s;
+ s++;
+ }
+ return (char *)ret;
+}
+
+static __attribute__((unused))
+size_t nolibc_strlen(const char *str)
+{
+ size_t len;
+
+ for (len = 0; str[len]; len++);
+ return len;
+}
+
+#define strlen(str) ({ \
+ __builtin_constant_p((str)) ? \
+ __builtin_strlen((str)) : \
+ nolibc_strlen((str)); \
+})
+
+static __attribute__((unused))
+int isdigit(int c)
+{
+ return (unsigned int)(c - '0') <= 9;
+}
+
+static __attribute__((unused))
+long atol(const char *s)
+{
+ unsigned long ret = 0;
+ unsigned long d;
+ int neg = 0;
+
+ if (*s == '-') {
+ neg = 1;
+ s++;
+ }
+
+ while (1) {
+ d = (*s++) - '0';
+ if (d > 9)
+ break;
+ ret *= 10;
+ ret += d;
+ }
+
+ return neg ? -ret : ret;
+}
+
+static __attribute__((unused))
+int atoi(const char *s)
+{
+ return atol(s);
+}
+
+static __attribute__((unused))
+const char *ltoa(long in)
+{
+ /* large enough for -9223372036854775808 */
+ static char buffer[21];
+ char *pos = buffer + sizeof(buffer) - 1;
+ int neg = in < 0;
+ unsigned long n = neg ? -in : in;
+
+ *pos-- = '\0';
+ do {
+ *pos-- = '0' + n % 10;
+ n /= 10;
+ if (pos < buffer)
+ return pos + 1;
+ } while (n);
+
+ if (neg)
+ *pos-- = '-';
+ return pos + 1;
+}
+
+__attribute__((weak,unused))
+void *memcpy(void *dst, const void *src, size_t len)
+{
+ return memmove(dst, src, len);
+}
+
+/* needed by libgcc for divide by zero */
+__attribute__((weak,unused))
+int raise(int signal)
+{
+ return kill(getpid(), signal);
+}
+
+/* Here come a few helper functions */
+
+static __attribute__((unused))
+void FD_ZERO(fd_set *set)
+{
+ memset(set, 0, sizeof(*set));
+}
+
+static __attribute__((unused))
+void FD_SET(int fd, fd_set *set)
+{
+ if (fd < 0 || fd >= FD_SETSIZE)
+ return;
+ set->fd32[fd / 32] |= 1 << (fd & 31);
+}
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+static __attribute__((unused))
+dev_t makedev(unsigned int major, unsigned int minor)
+{
+ return ((major & 0xfff) << 8) | (minor & 0xff);
+}
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index c7f3321fbe43..d90127298f12 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -738,9 +738,11 @@ __SYSCALL(__NR_statx, sys_statx)
__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
#define __NR_rseq 293
__SYSCALL(__NR_rseq, sys_rseq)
+#define __NR_kexec_file_load 294
+__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
#undef __NR_syscalls
-#define __NR_syscalls 294
+#define __NR_syscalls 295
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
index 8dd6aefdafa4..57aaeaf8e192 100644
--- a/tools/include/uapi/asm/bitsperlong.h
+++ b/tools/include/uapi/asm/bitsperlong.h
@@ -13,6 +13,10 @@
#include "../../arch/mips/include/uapi/asm/bitsperlong.h"
#elif defined(__ia64__)
#include "../../arch/ia64/include/uapi/asm/bitsperlong.h"
+#elif defined(__riscv)
+#include "../../arch/riscv/include/uapi/asm/bitsperlong.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/uapi/asm/bitsperlong.h"
#else
#include <asm-generic/bitsperlong.h>
#endif
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index a4446f452040..298b2e197744 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -412,6 +412,14 @@ typedef struct drm_i915_irq_wait {
int irq_seq;
} drm_i915_irq_wait_t;
+/*
+ * Different modes of per-process Graphics Translation Table,
+ * see I915_PARAM_HAS_ALIASING_PPGTT
+ */
+#define I915_GEM_PPGTT_NONE 0
+#define I915_GEM_PPGTT_ALIASING 1
+#define I915_GEM_PPGTT_FULL 2
+
/* Ioctl to query kernel params:
*/
#define I915_PARAM_IRQ_ACTIVE 1
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 91c43884f295..3c38ac9a92a7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -14,6 +14,7 @@
/* Extended instruction set based on top of classic BPF */
/* instruction classes */
+#define BPF_JMP32 0x06 /* jmp mode in word width */
#define BPF_ALU64 0x07 /* alu mode in double word width */
/* ld/ldx fields */
@@ -266,6 +267,7 @@ enum bpf_attach_type {
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
+#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
/* flags for BPF_MAP_CREATE command */
#define BPF_F_NO_PREALLOC (1U << 0)
@@ -2014,6 +2016,19 @@ union bpf_attr {
* Only works if *skb* contains an IPv6 packet. Insert a
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
* the IPv6 header.
+ * **BPF_LWT_ENCAP_IP**
+ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ * must be IPv4 or IPv6, followed by zero or more
+ * additional headers, up to LWT_BPF_MAX_HEADROOM total
+ * bytes in all prepended headers. Please note that
+ * if skb_is_gso(skb) is true, no more than two headers
+ * can be prepended, and the inner header, if present,
+ * should be either GRE or UDP/GUE.
+ *
+ * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of
+ * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called
+ * by bpf programs of types BPF_PROG_TYPE_LWT_IN and
+ * BPF_PROG_TYPE_LWT_XMIT.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2327,6 +2342,30 @@ union bpf_attr {
* "**y**".
* Return
* 0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_sock** pointer such
+ * that all the fields in bpf_sock can be accessed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or NULL in
+ * case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_tcp_sock** pointer from a
+ * **struct bpf_sock** pointer.
+ *
+ * Return
+ * A **struct bpf_tcp_sock** pointer on success, or NULL in
+ * case of failure.
+ *
+ * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
+ * Description
+ * Sets ECN of IP header to ce (congestion encountered) if
+ * current value is ect (ECN capable). Works with IPv6 and IPv4.
+ * Return
+ * 1 if set, 0 if not set.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2421,7 +2460,12 @@ union bpf_attr {
FN(map_peek_elem), \
FN(msg_push_data), \
FN(msg_pop_data), \
- FN(rc_pointer_rel),
+ FN(rc_pointer_rel), \
+ FN(spin_lock), \
+ FN(spin_unlock), \
+ FN(sk_fullsock), \
+ FN(tcp_sock), \
+ FN(skb_ecn_set_ce),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2494,7 +2538,8 @@ enum bpf_hdr_start_off {
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6,
- BPF_LWT_ENCAP_SEG6_INLINE
+ BPF_LWT_ENCAP_SEG6_INLINE,
+ BPF_LWT_ENCAP_IP,
};
#define __bpf_md_ptr(type, name) \
@@ -2540,6 +2585,8 @@ struct __sk_buff {
__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
__u64 tstamp;
__u32 wire_len;
+ __u32 gso_segs;
+ __bpf_md_ptr(struct bpf_sock *, sk);
};
struct bpf_tunnel_key {
@@ -2581,7 +2628,15 @@ enum bpf_ret_code {
BPF_DROP = 2,
/* 3-6 reserved */
BPF_REDIRECT = 7,
- /* >127 are reserved for prog type specific return codes */
+ /* >127 are reserved for prog type specific return codes.
+ *
+ * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
+ * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
+ * changed and should be routed based on its new L3 header.
+ * (This is an L3 redirect, as opposed to L2 redirect
+ * represented by BPF_REDIRECT above).
+ */
+ BPF_LWT_REROUTE = 128,
};
struct bpf_sock {
@@ -2591,14 +2646,52 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
- __u32 src_ip4; /* Allows 1,2,4-byte read.
- * Stored in network byte order.
+ /* IP address also allows 1 and 2 bytes access */
+ __u32 src_ip4;
+ __u32 src_ip6[4];
+ __u32 src_port; /* host byte order */
+ __u32 dst_port; /* network byte order */
+ __u32 dst_ip4;
+ __u32 dst_ip6[4];
+ __u32 state;
+};
+
+struct bpf_tcp_sock {
+ __u32 snd_cwnd; /* Sending congestion window */
+ __u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ __u32 rtt_min;
+ __u32 snd_ssthresh; /* Slow start size threshold */
+ __u32 rcv_nxt; /* What we want to receive next */
+ __u32 snd_nxt; /* Next sequence we send */
+ __u32 snd_una; /* First byte we want an ack for */
+ __u32 mss_cache; /* Cached effective mss, not including SACKS */
+ __u32 ecn_flags; /* ECN status bits. */
+ __u32 rate_delivered; /* saved rate sample: packets delivered */
+ __u32 rate_interval_us; /* saved rate sample: time elapsed */
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 retrans_out; /* Retransmitted packets out */
+ __u32 total_retrans; /* Total retransmits for entire connection */
+ __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
+ * total number of segments in.
*/
- __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
- * Stored in network byte order.
+ __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
+ * total number of data segments in.
*/
- __u32 src_port; /* Allows 4-byte read.
- * Stored in host byte order
+ __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
+ * The total number of segments sent.
+ */
+ __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
+ __u32 lost_out; /* Lost packets */
+ __u32 sacked_out; /* SACK'd packets */
+ __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+ * sum(delta(rcv_nxt)), or how many bytes
+ * were acked.
+ */
+ __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+ * sum(delta(snd_una)), or how many bytes
+ * were acked.
*/
};
@@ -2728,6 +2821,8 @@ struct bpf_prog_info {
__u32 jited_line_info_rec_size;
__u32 nr_prog_tags;
__aligned_u64 prog_tags;
+ __u64 run_time_ns;
+ __u64 run_cnt;
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -3054,4 +3149,7 @@ struct bpf_line_info {
__u32 line_col;
};
+struct bpf_spin_lock {
+ __u32 val;
+};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h
new file mode 100644
index 000000000000..c86c3e942df9
--- /dev/null
+++ b/tools/include/uapi/linux/ethtool.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ethtool.h: Defines for Linux ethtool.
+ *
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
+ * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
+ * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
+ * christopher.leech@intel.com,
+ * scott.feldman@intel.com)
+ * Portions Copyright (C) Sun Microsystems 2008
+ */
+
+#ifndef _UAPI_LINUX_ETHTOOL_H
+#define _UAPI_LINUX_ETHTOOL_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
+
+/**
+ * struct ethtool_channels - configuring number of network channel
+ * @cmd: ETHTOOL_{G,S}CHANNELS
+ * @max_rx: Read only. Maximum number of receive channel the driver support.
+ * @max_tx: Read only. Maximum number of transmit channel the driver support.
+ * @max_other: Read only. Maximum number of other channel the driver support.
+ * @max_combined: Read only. Maximum number of combined channel the driver
+ * support. Set of queues RX, TX or other.
+ * @rx_count: Valid values are in the range 1 to the max_rx.
+ * @tx_count: Valid values are in the range 1 to the max_tx.
+ * @other_count: Valid values are in the range 1 to the max_other.
+ * @combined_count: Valid values are in the range 1 to the max_combined.
+ *
+ * This can be used to configure RX, TX and other channels.
+ */
+
+struct ethtool_channels {
+ __u32 cmd;
+ __u32 max_rx;
+ __u32 max_tx;
+ __u32 max_other;
+ __u32 max_combined;
+ __u32 rx_count;
+ __u32 tx_count;
+ __u32 other_count;
+ __u32 combined_count;
+};
+
+#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index a441ea1bfe6d..121e82ce296b 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -14,6 +14,11 @@
#include <linux/ioctl.h>
#include <linux/types.h>
+/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
+#if !defined(__KERNEL__)
+#include <linux/mount.h>
+#endif
+
/*
* It's silly to have NR_OPEN bigger than NR_FILE, but you can change
* the file limit at runtime and only root can increase the per-process
@@ -101,57 +106,6 @@ struct inodes_stat_t {
#define NR_FILE 8192 /* this can well be larger on a larger system */
-
-/*
- * These are the fs-independent mount-flags: up to 32 flags are supported
- */
-#define MS_RDONLY 1 /* Mount read-only */
-#define MS_NOSUID 2 /* Ignore suid and sgid bits */
-#define MS_NODEV 4 /* Disallow access to device special files */
-#define MS_NOEXEC 8 /* Disallow program execution */
-#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
-#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
-#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
-#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
-#define MS_NOATIME 1024 /* Do not update access times. */
-#define MS_NODIRATIME 2048 /* Do not update directory access times */
-#define MS_BIND 4096
-#define MS_MOVE 8192
-#define MS_REC 16384
-#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
- MS_VERBOSE is deprecated. */
-#define MS_SILENT 32768
-#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
-#define MS_UNBINDABLE (1<<17) /* change to unbindable */
-#define MS_PRIVATE (1<<18) /* change to private */
-#define MS_SLAVE (1<<19) /* change to slave */
-#define MS_SHARED (1<<20) /* change to shared */
-#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
-#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
-#define MS_I_VERSION (1<<23) /* Update inode I_version field */
-#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
-#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
-
-/* These sb flags are internal to the kernel */
-#define MS_SUBMOUNT (1<<26)
-#define MS_NOREMOTELOCK (1<<27)
-#define MS_NOSEC (1<<28)
-#define MS_BORN (1<<29)
-#define MS_ACTIVE (1<<30)
-#define MS_NOUSER (1<<31)
-
-/*
- * Superblock flags that can be altered by MS_REMOUNT
- */
-#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
- MS_LAZYTIME)
-
-/*
- * Old magic mount flag and mask
- */
-#define MS_MGC_VAL 0xC0ED0000
-#define MS_MGC_MSK 0xffff0000
-
/*
* Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
*/
@@ -269,7 +223,8 @@ struct fsxattr {
#define FS_POLICY_FLAGS_PAD_16 0x02
#define FS_POLICY_FLAGS_PAD_32 0x03
#define FS_POLICY_FLAGS_PAD_MASK 0x03
-#define FS_POLICY_FLAGS_VALID 0x03
+#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */
+#define FS_POLICY_FLAGS_VALID 0x07
/* Encryption algorithms */
#define FS_ENCRYPTION_MODE_INVALID 0
@@ -281,6 +236,7 @@ struct fsxattr {
#define FS_ENCRYPTION_MODE_AES_128_CTS 6
#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */
#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_ADIANTUM 9
struct fscrypt_policy {
__u8 version;
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 1debfa42cba1..5b225ff63b48 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -288,6 +288,7 @@ enum {
IFLA_BR_MCAST_IGMP_VERSION,
IFLA_BR_MCAST_MLD_VERSION,
IFLA_BR_VLAN_STATS_PER_PORT,
+ IFLA_BR_MULTI_BOOLOPT,
__IFLA_BR_MAX,
};
@@ -533,6 +534,7 @@ enum {
IFLA_VXLAN_LABEL,
IFLA_VXLAN_GPE,
IFLA_VXLAN_TTL_INHERIT,
+ IFLA_VXLAN_DF,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -542,6 +544,14 @@ struct ifla_vxlan_port_range {
__be16 high;
};
+enum ifla_vxlan_df {
+ VXLAN_DF_UNSET = 0,
+ VXLAN_DF_SET,
+ VXLAN_DF_INHERIT,
+ __VXLAN_DF_END,
+ VXLAN_DF_MAX = __VXLAN_DF_END - 1,
+};
+
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
@@ -557,10 +567,19 @@ enum {
IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
IFLA_GENEVE_LABEL,
IFLA_GENEVE_TTL_INHERIT,
+ IFLA_GENEVE_DF,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+enum ifla_geneve_df {
+ GENEVE_DF_UNSET = 0,
+ GENEVE_DF_SET,
+ GENEVE_DF_INHERIT,
+ __GENEVE_DF_END,
+ GENEVE_DF_MAX = __GENEVE_DF_END - 1,
+};
+
/* PPP section */
enum {
IFLA_PPP_UNSPEC,
@@ -906,6 +925,7 @@ enum {
enum {
LINK_XSTATS_TYPE_UNSPEC,
LINK_XSTATS_TYPE_BRIDGE,
+ LINK_XSTATS_TYPE_BOND,
__LINK_XSTATS_TYPE_MAX
};
#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
new file mode 100644
index 000000000000..caed8b1614ff
--- /dev/null
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * if_xdp: XDP socket user-space interface
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ * Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef _LINUX_IF_XDP_H
+#define _LINUX_IF_XDP_H
+
+#include <linux/types.h>
+
+/* Options for the sxdp_flags field */
+#define XDP_SHARED_UMEM (1 << 0)
+#define XDP_COPY (1 << 1) /* Force copy-mode */
+#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+
+struct sockaddr_xdp {
+ __u16 sxdp_family;
+ __u16 sxdp_flags;
+ __u32 sxdp_ifindex;
+ __u32 sxdp_queue_id;
+ __u32 sxdp_shared_umem_fd;
+};
+
+struct xdp_ring_offset {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+struct xdp_mmap_offsets {
+ struct xdp_ring_offset rx;
+ struct xdp_ring_offset tx;
+ struct xdp_ring_offset fr; /* Fill */
+ struct xdp_ring_offset cr; /* Completion */
+};
+
+/* XDP socket options */
+#define XDP_MMAP_OFFSETS 1
+#define XDP_RX_RING 2
+#define XDP_TX_RING 3
+#define XDP_UMEM_REG 4
+#define XDP_UMEM_FILL_RING 5
+#define XDP_UMEM_COMPLETION_RING 6
+#define XDP_STATISTICS 7
+
+struct xdp_umem_reg {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+};
+
+struct xdp_statistics {
+ __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+};
+
+/* Pgoff for mmaping the rings */
+#define XDP_PGOFF_RX_RING 0
+#define XDP_PGOFF_TX_RING 0x80000000
+#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
+#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
+
+/* Rx/Tx descriptor */
+struct xdp_desc {
+ __u64 addr;
+ __u32 len;
+ __u32 options;
+};
+
+/* UMEM descriptor is __u64 */
+
+#endif /* _LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 48e8a225b985..a55cb8b10165 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -266,10 +266,14 @@ struct sockaddr_in {
#define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000)
#define IN_MULTICAST(a) IN_CLASSD(a)
-#define IN_MULTICAST_NET 0xF0000000
+#define IN_MULTICAST_NET 0xe0000000
-#define IN_EXPERIMENTAL(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000)
-#define IN_BADCLASS(a) IN_EXPERIMENTAL((a))
+#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff)
+#define IN_EXPERIMENTAL(a) IN_BADCLASS((a))
+
+#define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define IN_CLASSE_NET 0xffffffff
+#define IN_CLASSE_NSHIFT 0
/* Address to accept any incoming messages. */
#define INADDR_ANY ((unsigned long int) 0x00000000)
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 2b7a652c9fa4..6d4ea4b6c922 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -492,6 +492,17 @@ struct kvm_dirty_log {
};
};
+/* for KVM_CLEAR_DIRTY_LOG */
+struct kvm_clear_dirty_log {
+ __u32 slot;
+ __u32 num_pages;
+ __u64 first_page;
+ union {
+ void __user *dirty_bitmap; /* one bit per page */
+ __u64 padding2;
+ };
+};
+
/* for KVM_SET_SIGNAL_MASK */
struct kvm_signal_mask {
__u32 len;
@@ -975,6 +986,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
#define KVM_CAP_EXCEPTION_PAYLOAD 164
#define KVM_CAP_ARM_VM_IPA_SIZE 165
+#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
+#define KVM_CAP_HYPERV_CPUID 167
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1421,6 +1434,12 @@ struct kvm_enc_region {
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
+/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */
+#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
+
+/* Available with KVM_CAP_HYPERV_CPUID */
+#define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
new file mode 100644
index 000000000000..3f9ec42510b0
--- /dev/null
+++ b/tools/include/uapi/linux/mount.h
@@ -0,0 +1,58 @@
+#ifndef _UAPI_LINUX_MOUNT_H
+#define _UAPI_LINUX_MOUNT_H
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ *
+ * Usage of these is restricted within the kernel to core mount(2) code and
+ * callers of sys_mount() only. Filesystems should be using the SB_*
+ * equivalent instead.
+ */
+#define MS_RDONLY 1 /* Mount read-only */
+#define MS_NOSUID 2 /* Ignore suid and sgid bits */
+#define MS_NODEV 4 /* Disallow access to device special files */
+#define MS_NOEXEC 8 /* Disallow program execution */
+#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
+#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
+#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
+#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
+#define MS_NOATIME 1024 /* Do not update access times. */
+#define MS_NODIRATIME 2048 /* Do not update directory access times */
+#define MS_BIND 4096
+#define MS_MOVE 8192
+#define MS_REC 16384
+#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
+ MS_VERBOSE is deprecated. */
+#define MS_SILENT 32768
+#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
+#define MS_UNBINDABLE (1<<17) /* change to unbindable */
+#define MS_PRIVATE (1<<18) /* change to private */
+#define MS_SLAVE (1<<19) /* change to slave */
+#define MS_SHARED (1<<20) /* change to shared */
+#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION (1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT (1<<26)
+#define MS_NOREMOTELOCK (1<<27)
+#define MS_NOSEC (1<<28)
+#define MS_BORN (1<<29)
+#define MS_ACTIVE (1<<30)
+#define MS_NOUSER (1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+ MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 9de8780ac8d9..7198ddd0c6b1 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -372,7 +372,9 @@ struct perf_event_attr {
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
- __reserved_1 : 35;
+ ksymbol : 1, /* include ksymbol events */
+ bpf_event : 1, /* include bpf events */
+ __reserved_1 : 33;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -445,8 +447,6 @@ struct perf_event_query_bpf {
__u32 ids[0];
};
-#define perf_flags(attr) (*(&(attr)->read_format + 1))
-
/*
* Ioctls that can be done on a perf event fd:
*/
@@ -965,9 +965,58 @@ enum perf_event_type {
*/
PERF_RECORD_NAMESPACES = 16,
+ /*
+ * Record ksymbol register/unregister events:
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 addr;
+ * u32 len;
+ * u16 ksym_type;
+ * u16 flags;
+ * char name[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_KSYMBOL = 17,
+
+ /*
+ * Record bpf events:
+ * enum perf_bpf_event_type {
+ * PERF_BPF_EVENT_UNKNOWN = 0,
+ * PERF_BPF_EVENT_PROG_LOAD = 1,
+ * PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ * };
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u16 type;
+ * u16 flags;
+ * u32 id;
+ * u8 tag[BPF_TAG_SIZE];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_BPF_EVENT = 18,
+
PERF_RECORD_MAX, /* non-ABI */
};
+enum perf_record_ksymbol_type {
+ PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
+ PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
+ PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
+};
+
+#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
+
+enum perf_bpf_event_type {
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
+};
+
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
new file mode 100644
index 000000000000..0d18b1d1fbbc
--- /dev/null
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -0,0 +1,1163 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_SCHED_H
+#define __LINUX_PKT_SCHED_H
+
+#include <linux/types.h>
+
+/* Logical priority bands not depending on specific packet scheduler.
+ Every scheduler will map them to real traffic classes, if it has
+ no more precise mechanism to classify packets.
+
+ These numbers have no special meaning, though their coincidence
+ with obsolete IPv6 values is not occasional :-). New IPv6 drafts
+ preferred full anarchy inspired by diffserv group.
+
+ Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
+ class, actually, as rule it will be handled with more care than
+ filler or even bulk.
+ */
+
+#define TC_PRIO_BESTEFFORT 0
+#define TC_PRIO_FILLER 1
+#define TC_PRIO_BULK 2
+#define TC_PRIO_INTERACTIVE_BULK 4
+#define TC_PRIO_INTERACTIVE 6
+#define TC_PRIO_CONTROL 7
+
+#define TC_PRIO_MAX 15
+
+/* Generic queue statistics, available for all the elements.
+ Particular schedulers may have also their private records.
+ */
+
+struct tc_stats {
+ __u64 bytes; /* Number of enqueued bytes */
+ __u32 packets; /* Number of enqueued packets */
+ __u32 drops; /* Packets dropped because of lack of resources */
+ __u32 overlimits; /* Number of throttle events when this
+ * flow goes out of allocated bandwidth */
+ __u32 bps; /* Current flow byte rate */
+ __u32 pps; /* Current flow packet rate */
+ __u32 qlen;
+ __u32 backlog;
+};
+
+struct tc_estimator {
+ signed char interval;
+ unsigned char ewma_log;
+};
+
+/* "Handles"
+ ---------
+
+ All the traffic control objects have 32bit identifiers, or "handles".
+
+ They can be considered as opaque numbers from user API viewpoint,
+ but actually they always consist of two fields: major and
+ minor numbers, which are interpreted by kernel specially,
+ that may be used by applications, though not recommended.
+
+ F.e. qdisc handles always have minor number equal to zero,
+ classes (or flows) have major equal to parent qdisc major, and
+ minor uniquely identifying class inside qdisc.
+
+ Macros to manipulate handles:
+ */
+
+#define TC_H_MAJ_MASK (0xFFFF0000U)
+#define TC_H_MIN_MASK (0x0000FFFFU)
+#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
+#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
+#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
+
+#define TC_H_UNSPEC (0U)
+#define TC_H_ROOT (0xFFFFFFFFU)
+#define TC_H_INGRESS (0xFFFFFFF1U)
+#define TC_H_CLSACT TC_H_INGRESS
+
+#define TC_H_MIN_PRIORITY 0xFFE0U
+#define TC_H_MIN_INGRESS 0xFFF2U
+#define TC_H_MIN_EGRESS 0xFFF3U
+
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+ TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+ TC_LINKLAYER_ETHERNET,
+ TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
+struct tc_ratespec {
+ unsigned char cell_log;
+ __u8 linklayer; /* lower 4 bits */
+ unsigned short overhead;
+ short cell_align;
+ unsigned short mpu;
+ __u32 rate;
+};
+
+#define TC_RTAB_SIZE 1024
+
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short cell_align;
+ int overhead;
+ unsigned int linklayer;
+ unsigned int mpu;
+ unsigned int mtu;
+ unsigned int tsize;
+};
+
+enum {
+ TCA_STAB_UNSPEC,
+ TCA_STAB_BASE,
+ TCA_STAB_DATA,
+ __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
+/* FIFO section */
+
+struct tc_fifo_qopt {
+ __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */
+};
+
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+ __u32 limit; /* Queue length in packets. */
+};
+
+/* PRIO section */
+
+#define TCQ_PRIO_BANDS 16
+#define TCQ_MIN_PRIO_BANDS 2
+
+struct tc_prio_qopt {
+ int bands; /* Number of bands */
+ __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+};
+
+/* MULTIQ section */
+
+struct tc_multiq_qopt {
+ __u16 bands; /* Number of bands */
+ __u16 max_bands; /* Maximum number of queues */
+};
+
+/* PLUG section */
+
+#define TCQ_PLUG_BUFFER 0
+#define TCQ_PLUG_RELEASE_ONE 1
+#define TCQ_PLUG_RELEASE_INDEFINITE 2
+#define TCQ_PLUG_LIMIT 3
+
+struct tc_plug_qopt {
+ /* TCQ_PLUG_BUFFER: Inset a plug into the queue and
+ * buffer any incoming packets
+ * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
+ * to beginning of the next plug.
+ * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
+ * Stop buffering packets until the next TCQ_PLUG_BUFFER
+ * command is received (just act as a pass-thru queue).
+ * TCQ_PLUG_LIMIT: Increase/decrease queue size
+ */
+ int action;
+ __u32 limit;
+};
+
+/* TBF section */
+
+struct tc_tbf_qopt {
+ struct tc_ratespec rate;
+ struct tc_ratespec peakrate;
+ __u32 limit;
+ __u32 buffer;
+ __u32 mtu;
+};
+
+enum {
+ TCA_TBF_UNSPEC,
+ TCA_TBF_PARMS,
+ TCA_TBF_RTAB,
+ TCA_TBF_PTAB,
+ TCA_TBF_RATE64,
+ TCA_TBF_PRATE64,
+ TCA_TBF_BURST,
+ TCA_TBF_PBURST,
+ TCA_TBF_PAD,
+ __TCA_TBF_MAX,
+};
+
+#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
+
+
+/* TEQL section */
+
+/* TEQL does not require any parameters */
+
+/* SFQ section */
+
+struct tc_sfq_qopt {
+ unsigned quantum; /* Bytes per round allocated to flow */
+ int perturb_period; /* Period of hash perturbation */
+ __u32 limit; /* Maximal packets in queue */
+ unsigned divisor; /* Hash divisor */
+ unsigned flows; /* Maximal number of flows */
+};
+
+struct tc_sfqred_stats {
+ __u32 prob_drop; /* Early drops, below max threshold */
+ __u32 forced_drop; /* Early drops, after max threshold */
+ __u32 prob_mark; /* Marked packets, below max threshold */
+ __u32 forced_mark; /* Marked packets, after max threshold */
+ __u32 prob_mark_head; /* Marked packets, below max threshold */
+ __u32 forced_mark_head;/* Marked packets, after max threshold */
+};
+
+struct tc_sfq_qopt_v1 {
+ struct tc_sfq_qopt v0;
+ unsigned int depth; /* max number of packets per flow */
+ unsigned int headdrop;
+/* SFQRED parameters */
+ __u32 limit; /* HARD maximal flow queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags;
+ __u32 max_P; /* probability, high resolution */
+/* SFQRED stats */
+ struct tc_sfqred_stats stats;
+};
+
+
+struct tc_sfq_xstats {
+ __s32 allot;
+};
+
+/* RED section */
+
+enum {
+ TCA_RED_UNSPEC,
+ TCA_RED_PARMS,
+ TCA_RED_STAB,
+ TCA_RED_MAX_P,
+ __TCA_RED_MAX,
+};
+
+#define TCA_RED_MAX (__TCA_RED_MAX - 1)
+
+struct tc_red_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags;
+#define TC_RED_ECN 1
+#define TC_RED_HARDDROP 2
+#define TC_RED_ADAPTATIVE 4
+};
+
+struct tc_red_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+};
+
+/* GRED section */
+
+#define MAX_DPs 16
+
+enum {
+ TCA_GRED_UNSPEC,
+ TCA_GRED_PARMS,
+ TCA_GRED_STAB,
+ TCA_GRED_DPS,
+ TCA_GRED_MAX_P,
+ TCA_GRED_LIMIT,
+ TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */
+ __TCA_GRED_MAX,
+};
+
+#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_ENTRY_UNSPEC,
+ TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */
+ __TCA_GRED_VQ_ENTRY_MAX,
+};
+#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_UNSPEC,
+ TCA_GRED_VQ_PAD,
+ TCA_GRED_VQ_DP, /* u32 */
+ TCA_GRED_VQ_STAT_BYTES, /* u64 */
+ TCA_GRED_VQ_STAT_PACKETS, /* u32 */
+ TCA_GRED_VQ_STAT_BACKLOG, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_PDROP, /* u32 */
+ TCA_GRED_VQ_STAT_OTHER, /* u32 */
+ TCA_GRED_VQ_FLAGS, /* u32 */
+ __TCA_GRED_VQ_MAX
+};
+
+#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1)
+
+struct tc_gred_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ __u32 DP; /* up to 2^32 DPs */
+ __u32 backlog;
+ __u32 qave;
+ __u32 forced;
+ __u32 early;
+ __u32 other;
+ __u32 pdrop;
+ __u8 Wlog; /* log(W) */
+ __u8 Plog; /* log(P_max/(qth_max-qth_min)) */
+ __u8 Scell_log; /* cell size for idle damping */
+ __u8 prio; /* prio of this VQ */
+ __u32 packets;
+ __u32 bytesin;
+};
+
+/* gred setup */
+struct tc_gred_sopt {
+ __u32 DPs;
+ __u32 def_DP;
+ __u8 grio;
+ __u8 flags;
+ __u16 pad1;
+};
+
+/* CHOKe section */
+
+enum {
+ TCA_CHOKE_UNSPEC,
+ TCA_CHOKE_PARMS,
+ TCA_CHOKE_STAB,
+ TCA_CHOKE_MAX_P,
+ __TCA_CHOKE_MAX,
+};
+
+#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
+
+struct tc_choke_qopt {
+ __u32 limit; /* Hard queue length (packets) */
+ __u32 qth_min; /* Min average threshold (packets) */
+ __u32 qth_max; /* Max average threshold (packets) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags; /* see RED flags */
+};
+
+struct tc_choke_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+ __u32 matched; /* Drops due to flow match */
+};
+
+/* HTB section */
+#define TC_HTB_NUMPRIO 8
+#define TC_HTB_MAXDEPTH 8
+#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */
+
+struct tc_htb_opt {
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 buffer;
+ __u32 cbuffer;
+ __u32 quantum;
+ __u32 level; /* out only */
+ __u32 prio;
+};
+struct tc_htb_glob {
+ __u32 version; /* to match HTB/TC */
+ __u32 rate2quantum; /* bps->quantum divisor */
+ __u32 defcls; /* default class number */
+ __u32 debug; /* debug flags */
+
+ /* stats */
+ __u32 direct_pkts; /* count of non shaped packets */
+};
+enum {
+ TCA_HTB_UNSPEC,
+ TCA_HTB_PARMS,
+ TCA_HTB_INIT,
+ TCA_HTB_CTAB,
+ TCA_HTB_RTAB,
+ TCA_HTB_DIRECT_QLEN,
+ TCA_HTB_RATE64,
+ TCA_HTB_CEIL64,
+ TCA_HTB_PAD,
+ __TCA_HTB_MAX,
+};
+
+#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
+
+struct tc_htb_xstats {
+ __u32 lends;
+ __u32 borrows;
+ __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */
+ __s32 tokens;
+ __s32 ctokens;
+};
+
+/* HFSC section */
+
+struct tc_hfsc_qopt {
+ __u16 defcls; /* default class */
+};
+
+struct tc_service_curve {
+ __u32 m1; /* slope of the first segment in bps */
+ __u32 d; /* x-projection of the first segment in us */
+ __u32 m2; /* slope of the second segment in bps */
+};
+
+struct tc_hfsc_stats {
+ __u64 work; /* total work done */
+ __u64 rtwork; /* work done by real-time criteria */
+ __u32 period; /* current period */
+ __u32 level; /* class level in hierarchy */
+};
+
+enum {
+ TCA_HFSC_UNSPEC,
+ TCA_HFSC_RSC,
+ TCA_HFSC_FSC,
+ TCA_HFSC_USC,
+ __TCA_HFSC_MAX,
+};
+
+#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
+
+
+/* CBQ section */
+
+#define TC_CBQ_MAXPRIO 8
+#define TC_CBQ_MAXLEVEL 8
+#define TC_CBQ_DEF_EWMA 5
+
+struct tc_cbq_lssopt {
+ unsigned char change;
+ unsigned char flags;
+#define TCF_CBQ_LSS_BOUNDED 1
+#define TCF_CBQ_LSS_ISOLATED 2
+ unsigned char ewma_log;
+ unsigned char level;
+#define TCF_CBQ_LSS_FLAGS 1
+#define TCF_CBQ_LSS_EWMA 2
+#define TCF_CBQ_LSS_MAXIDLE 4
+#define TCF_CBQ_LSS_MINIDLE 8
+#define TCF_CBQ_LSS_OFFTIME 0x10
+#define TCF_CBQ_LSS_AVPKT 0x20
+ __u32 maxidle;
+ __u32 minidle;
+ __u32 offtime;
+ __u32 avpkt;
+};
+
+struct tc_cbq_wrropt {
+ unsigned char flags;
+ unsigned char priority;
+ unsigned char cpriority;
+ unsigned char __reserved;
+ __u32 allot;
+ __u32 weight;
+};
+
+struct tc_cbq_ovl {
+ unsigned char strategy;
+#define TC_CBQ_OVL_CLASSIC 0
+#define TC_CBQ_OVL_DELAY 1
+#define TC_CBQ_OVL_LOWPRIO 2
+#define TC_CBQ_OVL_DROP 3
+#define TC_CBQ_OVL_RCLASSIC 4
+ unsigned char priority2;
+ __u16 pad;
+ __u32 penalty;
+};
+
+struct tc_cbq_police {
+ unsigned char police;
+ unsigned char __res1;
+ unsigned short __res2;
+};
+
+struct tc_cbq_fopt {
+ __u32 split;
+ __u32 defmap;
+ __u32 defchange;
+};
+
+struct tc_cbq_xstats {
+ __u32 borrows;
+ __u32 overactions;
+ __s32 avgidle;
+ __s32 undertime;
+};
+
+enum {
+ TCA_CBQ_UNSPEC,
+ TCA_CBQ_LSSOPT,
+ TCA_CBQ_WRROPT,
+ TCA_CBQ_FOPT,
+ TCA_CBQ_OVL_STRATEGY,
+ TCA_CBQ_RATE,
+ TCA_CBQ_RTAB,
+ TCA_CBQ_POLICE,
+ __TCA_CBQ_MAX,
+};
+
+#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1)
+
+/* dsmark section */
+
+enum {
+ TCA_DSMARK_UNSPEC,
+ TCA_DSMARK_INDICES,
+ TCA_DSMARK_DEFAULT_INDEX,
+ TCA_DSMARK_SET_TC_INDEX,
+ TCA_DSMARK_MASK,
+ TCA_DSMARK_VALUE,
+ __TCA_DSMARK_MAX,
+};
+
+#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
+
+/* ATM section */
+
+enum {
+ TCA_ATM_UNSPEC,
+ TCA_ATM_FD, /* file/socket descriptor */
+ TCA_ATM_PTR, /* pointer to descriptor - later */
+ TCA_ATM_HDR, /* LL header */
+ TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */
+ TCA_ATM_ADDR, /* PVC address (for output only) */
+ TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */
+ __TCA_ATM_MAX,
+};
+
+#define TCA_ATM_MAX (__TCA_ATM_MAX - 1)
+
+/* Network emulator */
+
+enum {
+ TCA_NETEM_UNSPEC,
+ TCA_NETEM_CORR,
+ TCA_NETEM_DELAY_DIST,
+ TCA_NETEM_REORDER,
+ TCA_NETEM_CORRUPT,
+ TCA_NETEM_LOSS,
+ TCA_NETEM_RATE,
+ TCA_NETEM_ECN,
+ TCA_NETEM_RATE64,
+ TCA_NETEM_PAD,
+ TCA_NETEM_LATENCY64,
+ TCA_NETEM_JITTER64,
+ TCA_NETEM_SLOT,
+ TCA_NETEM_SLOT_DIST,
+ __TCA_NETEM_MAX,
+};
+
+#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
+
+struct tc_netem_qopt {
+ __u32 latency; /* added delay (us) */
+ __u32 limit; /* fifo limit (packets) */
+ __u32 loss; /* random packet loss (0=none ~0=100%) */
+ __u32 gap; /* re-ordering gap (0 for none) */
+ __u32 duplicate; /* random packet dup (0=none ~0=100%) */
+ __u32 jitter; /* random jitter in latency (us) */
+};
+
+struct tc_netem_corr {
+ __u32 delay_corr; /* delay correlation */
+ __u32 loss_corr; /* packet loss correlation */
+ __u32 dup_corr; /* duplicate correlation */
+};
+
+struct tc_netem_reorder {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_corrupt {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_rate {
+ __u32 rate; /* byte/s */
+ __s32 packet_overhead;
+ __u32 cell_size;
+ __s32 cell_overhead;
+};
+
+struct tc_netem_slot {
+ __s64 min_delay; /* nsec */
+ __s64 max_delay;
+ __s32 max_packets;
+ __s32 max_bytes;
+ __s64 dist_delay; /* nsec */
+ __s64 dist_jitter; /* nsec */
+};
+
+enum {
+ NETEM_LOSS_UNSPEC,
+ NETEM_LOSS_GI, /* General Intuitive - 4 state model */
+ NETEM_LOSS_GE, /* Gilbert Elliot models */
+ __NETEM_LOSS_MAX
+};
+#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
+
+/* State transition probabilities for 4 state model */
+struct tc_netem_gimodel {
+ __u32 p13;
+ __u32 p31;
+ __u32 p32;
+ __u32 p14;
+ __u32 p23;
+};
+
+/* Gilbert-Elliot models */
+struct tc_netem_gemodel {
+ __u32 p;
+ __u32 r;
+ __u32 h;
+ __u32 k1;
+};
+
+#define NETEM_DIST_SCALE 8192
+#define NETEM_DIST_MAX 16384
+
+/* DRR */
+
+enum {
+ TCA_DRR_UNSPEC,
+ TCA_DRR_QUANTUM,
+ __TCA_DRR_MAX
+};
+
+#define TCA_DRR_MAX (__TCA_DRR_MAX - 1)
+
+struct tc_drr_stats {
+ __u32 deficit;
+};
+
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+enum {
+ TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */
+ TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */
+ __TC_MQPRIO_HW_OFFLOAD_MAX
+};
+
+#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
+
+enum {
+ TC_MQPRIO_MODE_DCB,
+ TC_MQPRIO_MODE_CHANNEL,
+ __TC_MQPRIO_MODE_MAX
+};
+
+#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
+
+enum {
+ TC_MQPRIO_SHAPER_DCB,
+ TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */
+ __TC_MQPRIO_SHAPER_MAX
+};
+
+#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+
+struct tc_mqprio_qopt {
+ __u8 num_tc;
+ __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
+ __u8 hw;
+ __u16 count[TC_QOPT_MAX_QUEUE];
+ __u16 offset[TC_QOPT_MAX_QUEUE];
+};
+
+#define TC_MQPRIO_F_MODE 0x1
+#define TC_MQPRIO_F_SHAPER 0x2
+#define TC_MQPRIO_F_MIN_RATE 0x4
+#define TC_MQPRIO_F_MAX_RATE 0x8
+
+enum {
+ TCA_MQPRIO_UNSPEC,
+ TCA_MQPRIO_MODE,
+ TCA_MQPRIO_SHAPER,
+ TCA_MQPRIO_MIN_RATE64,
+ TCA_MQPRIO_MAX_RATE64,
+ __TCA_MQPRIO_MAX,
+};
+
+#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
+
+/* SFB */
+
+enum {
+ TCA_SFB_UNSPEC,
+ TCA_SFB_PARMS,
+ __TCA_SFB_MAX,
+};
+
+#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
+
+/*
+ * Note: increment, decrement are Q0.16 fixed-point values.
+ */
+struct tc_sfb_qopt {
+ __u32 rehash_interval; /* delay between hash move, in ms */
+ __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */
+ __u32 max; /* max len of qlen_min */
+ __u32 bin_size; /* maximum queue length per bin */
+ __u32 increment; /* probability increment, (d1 in Blue) */
+ __u32 decrement; /* probability decrement, (d2 in Blue) */
+ __u32 limit; /* max SFB queue length */
+ __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */
+ __u32 penalty_burst;
+};
+
+struct tc_sfb_xstats {
+ __u32 earlydrop;
+ __u32 penaltydrop;
+ __u32 bucketdrop;
+ __u32 queuedrop;
+ __u32 childdrop; /* drops in child qdisc */
+ __u32 marked;
+ __u32 maxqlen;
+ __u32 maxprob;
+ __u32 avgprob;
+};
+
+#define SFB_MAX_PROB 0xFFFF
+
+/* QFQ */
+enum {
+ TCA_QFQ_UNSPEC,
+ TCA_QFQ_WEIGHT,
+ TCA_QFQ_LMAX,
+ __TCA_QFQ_MAX
+};
+
+#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1)
+
+struct tc_qfq_stats {
+ __u32 weight;
+ __u32 lmax;
+};
+
+/* CODEL */
+
+enum {
+ TCA_CODEL_UNSPEC,
+ TCA_CODEL_TARGET,
+ TCA_CODEL_LIMIT,
+ TCA_CODEL_INTERVAL,
+ TCA_CODEL_ECN,
+ TCA_CODEL_CE_THRESHOLD,
+ __TCA_CODEL_MAX
+};
+
+#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1)
+
+struct tc_codel_xstats {
+ __u32 maxpacket; /* largest packet we've seen so far */
+ __u32 count; /* how many drops we've done since the last time we
+ * entered dropping state
+ */
+ __u32 lastcount; /* count at entry to dropping state */
+ __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */
+ __s32 drop_next; /* time to drop next packet */
+ __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */
+ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */
+ __u32 dropping; /* are we in dropping state ? */
+ __u32 ce_mark; /* number of CE marked packets because of ce_threshold */
+};
+
+/* FQ_CODEL */
+
+enum {
+ TCA_FQ_CODEL_UNSPEC,
+ TCA_FQ_CODEL_TARGET,
+ TCA_FQ_CODEL_LIMIT,
+ TCA_FQ_CODEL_INTERVAL,
+ TCA_FQ_CODEL_ECN,
+ TCA_FQ_CODEL_FLOWS,
+ TCA_FQ_CODEL_QUANTUM,
+ TCA_FQ_CODEL_CE_THRESHOLD,
+ TCA_FQ_CODEL_DROP_BATCH_SIZE,
+ TCA_FQ_CODEL_MEMORY_LIMIT,
+ __TCA_FQ_CODEL_MAX
+};
+
+#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1)
+
+enum {
+ TCA_FQ_CODEL_XSTATS_QDISC,
+ TCA_FQ_CODEL_XSTATS_CLASS,
+};
+
+struct tc_fq_codel_qd_stats {
+ __u32 maxpacket; /* largest packet we've seen so far */
+ __u32 drop_overlimit; /* number of time max qdisc
+ * packet limit was hit
+ */
+ __u32 ecn_mark; /* number of packets we ECN marked
+ * instead of being dropped
+ */
+ __u32 new_flow_count; /* number of time packets
+ * created a 'new flow'
+ */
+ __u32 new_flows_len; /* count of flows in new list */
+ __u32 old_flows_len; /* count of flows in old list */
+ __u32 ce_mark; /* packets above ce_threshold */
+ __u32 memory_usage; /* in bytes */
+ __u32 drop_overmemory;
+};
+
+struct tc_fq_codel_cl_stats {
+ __s32 deficit;
+ __u32 ldelay; /* in-queue delay seen by most recently
+ * dequeued packet
+ */
+ __u32 count;
+ __u32 lastcount;
+ __u32 dropping;
+ __s32 drop_next;
+};
+
+struct tc_fq_codel_xstats {
+ __u32 type;
+ union {
+ struct tc_fq_codel_qd_stats qdisc_stats;
+ struct tc_fq_codel_cl_stats class_stats;
+ };
+};
+
+/* FQ */
+
+enum {
+ TCA_FQ_UNSPEC,
+
+ TCA_FQ_PLIMIT, /* limit of total number of packets in queue */
+
+ TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */
+
+ TCA_FQ_QUANTUM, /* RR quantum */
+
+ TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */
+
+ TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */
+
+ TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */
+
+ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */
+
+ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */
+
+ TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */
+
+ TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */
+
+ TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
+
+ TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
+
+ __TCA_FQ_MAX
+};
+
+#define TCA_FQ_MAX (__TCA_FQ_MAX - 1)
+
+struct tc_fq_qd_stats {
+ __u64 gc_flows;
+ __u64 highprio_packets;
+ __u64 tcp_retrans;
+ __u64 throttled;
+ __u64 flows_plimit;
+ __u64 pkts_too_long;
+ __u64 allocation_errors;
+ __s64 time_next_delayed_flow;
+ __u32 flows;
+ __u32 inactive_flows;
+ __u32 throttled_flows;
+ __u32 unthrottle_latency_ns;
+ __u64 ce_mark; /* packets above ce_threshold */
+};
+
+/* Heavy-Hitter Filter */
+
+enum {
+ TCA_HHF_UNSPEC,
+ TCA_HHF_BACKLOG_LIMIT,
+ TCA_HHF_QUANTUM,
+ TCA_HHF_HH_FLOWS_LIMIT,
+ TCA_HHF_RESET_TIMEOUT,
+ TCA_HHF_ADMIT_BYTES,
+ TCA_HHF_EVICT_TIMEOUT,
+ TCA_HHF_NON_HH_WEIGHT,
+ __TCA_HHF_MAX
+};
+
+#define TCA_HHF_MAX (__TCA_HHF_MAX - 1)
+
+struct tc_hhf_xstats {
+ __u32 drop_overlimit; /* number of times max qdisc packet limit
+ * was hit
+ */
+ __u32 hh_overlimit; /* number of times max heavy-hitters was hit */
+ __u32 hh_tot_count; /* number of captured heavy-hitters so far */
+ __u32 hh_cur_count; /* number of current heavy-hitters */
+};
+
+/* PIE */
+enum {
+ TCA_PIE_UNSPEC,
+ TCA_PIE_TARGET,
+ TCA_PIE_LIMIT,
+ TCA_PIE_TUPDATE,
+ TCA_PIE_ALPHA,
+ TCA_PIE_BETA,
+ TCA_PIE_ECN,
+ TCA_PIE_BYTEMODE,
+ __TCA_PIE_MAX
+};
+#define TCA_PIE_MAX (__TCA_PIE_MAX - 1)
+
+struct tc_pie_xstats {
+ __u32 prob; /* current probability */
+ __u32 delay; /* current delay in ms */
+ __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */
+ __u32 packets_in; /* total number of packets enqueued */
+ __u32 dropped; /* packets dropped due to pie_action */
+ __u32 overlimit; /* dropped due to lack of space in queue */
+ __u32 maxq; /* maximum queue size */
+ __u32 ecn_mark; /* packets marked with ecn*/
+};
+
+/* CBS */
+struct tc_cbs_qopt {
+ __u8 offload;
+ __u8 _pad[3];
+ __s32 hicredit;
+ __s32 locredit;
+ __s32 idleslope;
+ __s32 sendslope;
+};
+
+enum {
+ TCA_CBS_UNSPEC,
+ TCA_CBS_PARMS,
+ __TCA_CBS_MAX,
+};
+
+#define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
+
+
+/* ETF */
+struct tc_etf_qopt {
+ __s32 delta;
+ __s32 clockid;
+ __u32 flags;
+#define TC_ETF_DEADLINE_MODE_ON BIT(0)
+#define TC_ETF_OFFLOAD_ON BIT(1)
+};
+
+enum {
+ TCA_ETF_UNSPEC,
+ TCA_ETF_PARMS,
+ __TCA_ETF_MAX,
+};
+
+#define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
+
+
+/* CAKE */
+enum {
+ TCA_CAKE_UNSPEC,
+ TCA_CAKE_PAD,
+ TCA_CAKE_BASE_RATE64,
+ TCA_CAKE_DIFFSERV_MODE,
+ TCA_CAKE_ATM,
+ TCA_CAKE_FLOW_MODE,
+ TCA_CAKE_OVERHEAD,
+ TCA_CAKE_RTT,
+ TCA_CAKE_TARGET,
+ TCA_CAKE_AUTORATE,
+ TCA_CAKE_MEMORY,
+ TCA_CAKE_NAT,
+ TCA_CAKE_RAW,
+ TCA_CAKE_WASH,
+ TCA_CAKE_MPU,
+ TCA_CAKE_INGRESS,
+ TCA_CAKE_ACK_FILTER,
+ TCA_CAKE_SPLIT_GSO,
+ __TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
+
+enum {
+ __TCA_CAKE_STATS_INVALID,
+ TCA_CAKE_STATS_PAD,
+ TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+ TCA_CAKE_STATS_MEMORY_LIMIT,
+ TCA_CAKE_STATS_MEMORY_USED,
+ TCA_CAKE_STATS_AVG_NETOFF,
+ TCA_CAKE_STATS_MIN_NETLEN,
+ TCA_CAKE_STATS_MAX_NETLEN,
+ TCA_CAKE_STATS_MIN_ADJLEN,
+ TCA_CAKE_STATS_MAX_ADJLEN,
+ TCA_CAKE_STATS_TIN_STATS,
+ TCA_CAKE_STATS_DEFICIT,
+ TCA_CAKE_STATS_COBALT_COUNT,
+ TCA_CAKE_STATS_DROPPING,
+ TCA_CAKE_STATS_DROP_NEXT_US,
+ TCA_CAKE_STATS_P_DROP,
+ TCA_CAKE_STATS_BLUE_TIMER_US,
+ __TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+ __TCA_CAKE_TIN_STATS_INVALID,
+ TCA_CAKE_TIN_STATS_PAD,
+ TCA_CAKE_TIN_STATS_SENT_PACKETS,
+ TCA_CAKE_TIN_STATS_SENT_BYTES64,
+ TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+ TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+ TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+ TCA_CAKE_TIN_STATS_TARGET_US,
+ TCA_CAKE_TIN_STATS_INTERVAL_US,
+ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+ TCA_CAKE_TIN_STATS_WAY_MISSES,
+ TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+ TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+ TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+ TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+ TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+ TCA_CAKE_TIN_STATS_BULK_FLOWS,
+ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+ TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+ TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+ __TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+ CAKE_FLOW_NONE = 0,
+ CAKE_FLOW_SRC_IP,
+ CAKE_FLOW_DST_IP,
+ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+ CAKE_FLOW_FLOWS,
+ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_MAX,
+};
+
+enum {
+ CAKE_DIFFSERV_DIFFSERV3 = 0,
+ CAKE_DIFFSERV_DIFFSERV4,
+ CAKE_DIFFSERV_DIFFSERV8,
+ CAKE_DIFFSERV_BESTEFFORT,
+ CAKE_DIFFSERV_PRECEDENCE,
+ CAKE_DIFFSERV_MAX
+};
+
+enum {
+ CAKE_ACK_NONE = 0,
+ CAKE_ACK_FILTER,
+ CAKE_ACK_AGGRESSIVE,
+ CAKE_ACK_MAX
+};
+
+enum {
+ CAKE_ATM_NONE = 0,
+ CAKE_ATM_ATM,
+ CAKE_ATM_PTM,
+ CAKE_ATM_MAX
+};
+
+
+/* TAPRIO */
+enum {
+ TC_TAPRIO_CMD_SET_GATES = 0x00,
+ TC_TAPRIO_CMD_SET_AND_HOLD = 0x01,
+ TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02,
+};
+
+enum {
+ TCA_TAPRIO_SCHED_ENTRY_UNSPEC,
+ TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */
+ TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */
+ TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */
+ TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */
+ __TCA_TAPRIO_SCHED_ENTRY_MAX,
+};
+#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1)
+
+/* The format for schedule entry list is:
+ * [TCA_TAPRIO_SCHED_ENTRY_LIST]
+ * [TCA_TAPRIO_SCHED_ENTRY]
+ * [TCA_TAPRIO_SCHED_ENTRY_CMD]
+ * [TCA_TAPRIO_SCHED_ENTRY_GATES]
+ * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL]
+ */
+enum {
+ TCA_TAPRIO_SCHED_UNSPEC,
+ TCA_TAPRIO_SCHED_ENTRY,
+ __TCA_TAPRIO_SCHED_MAX,
+};
+
+#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1)
+
+enum {
+ TCA_TAPRIO_ATTR_UNSPEC,
+ TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
+ TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */
+ TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */
+ TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */
+ TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */
+ TCA_TAPRIO_PAD,
+ __TCA_TAPRIO_ATTR_MAX,
+};
+
+#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index b17201edfa09..094bb03b9cc2 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -219,5 +219,14 @@ struct prctl_mm_map {
# define PR_SPEC_ENABLE (1UL << 1)
# define PR_SPEC_DISABLE (1UL << 2)
# define PR_SPEC_FORCE_DISABLE (1UL << 3)
+# define PR_SPEC_DISABLE_NOEXEC (1UL << 4)
+
+/* Reset arm64 pointer authentication keys */
+#define PR_PAC_RESET_KEYS 54
+# define PR_PAC_APIAKEY (1UL << 0)
+# define PR_PAC_APIBKEY (1UL << 1)
+# define PR_PAC_APDAKEY (1UL << 2)
+# define PR_PAC_APDBKEY (1UL << 3)
+# define PR_PAC_APGAKEY (1UL << 4)
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h
index 6e89a5df49a4..653c4f94f76e 100644
--- a/tools/include/uapi/linux/tc_act/tc_bpf.h
+++ b/tools/include/uapi/linux/tc_act/tc_bpf.h
@@ -13,8 +13,6 @@
#include <linux/pkt_cls.h>
-#define TCA_ACT_BPF 13
-
struct tc_act_bpf {
tc_gen;
};
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 84c3de89696a..40d028eed645 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -11,94 +11,9 @@
* device configuration.
*/
+#include <linux/vhost_types.h>
#include <linux/types.h>
-#include <linux/compiler.h>
#include <linux/ioctl.h>
-#include <linux/virtio_config.h>
-#include <linux/virtio_ring.h>
-
-struct vhost_vring_state {
- unsigned int index;
- unsigned int num;
-};
-
-struct vhost_vring_file {
- unsigned int index;
- int fd; /* Pass -1 to unbind from file. */
-
-};
-
-struct vhost_vring_addr {
- unsigned int index;
- /* Option flags. */
- unsigned int flags;
- /* Flag values: */
- /* Whether log address is valid. If set enables logging. */
-#define VHOST_VRING_F_LOG 0
-
- /* Start of array of descriptors (virtually contiguous) */
- __u64 desc_user_addr;
- /* Used structure address. Must be 32 bit aligned */
- __u64 used_user_addr;
- /* Available structure address. Must be 16 bit aligned */
- __u64 avail_user_addr;
- /* Logging support. */
- /* Log writes to used structure, at offset calculated from specified
- * address. Address must be 32 bit aligned. */
- __u64 log_guest_addr;
-};
-
-/* no alignment requirement */
-struct vhost_iotlb_msg {
- __u64 iova;
- __u64 size;
- __u64 uaddr;
-#define VHOST_ACCESS_RO 0x1
-#define VHOST_ACCESS_WO 0x2
-#define VHOST_ACCESS_RW 0x3
- __u8 perm;
-#define VHOST_IOTLB_MISS 1
-#define VHOST_IOTLB_UPDATE 2
-#define VHOST_IOTLB_INVALIDATE 3
-#define VHOST_IOTLB_ACCESS_FAIL 4
- __u8 type;
-};
-
-#define VHOST_IOTLB_MSG 0x1
-#define VHOST_IOTLB_MSG_V2 0x2
-
-struct vhost_msg {
- int type;
- union {
- struct vhost_iotlb_msg iotlb;
- __u8 padding[64];
- };
-};
-
-struct vhost_msg_v2 {
- __u32 type;
- __u32 reserved;
- union {
- struct vhost_iotlb_msg iotlb;
- __u8 padding[64];
- };
-};
-
-struct vhost_memory_region {
- __u64 guest_phys_addr;
- __u64 memory_size; /* bytes */
- __u64 userspace_addr;
- __u64 flags_padding; /* No flags are currently specified. */
-};
-
-/* All region addresses and sizes must be 4K aligned. */
-#define VHOST_PAGE_SIZE 0x1000
-
-struct vhost_memory {
- __u32 nregions;
- __u32 padding;
- struct vhost_memory_region regions[0];
-};
/* ioctls */
@@ -186,31 +101,7 @@ struct vhost_memory {
* device. This can be used to stop the ring (e.g. for migration). */
#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
-/* Feature bits */
-/* Log all write descriptors. Can be changed while device is active. */
-#define VHOST_F_LOG_ALL 26
-/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
-#define VHOST_NET_F_VIRTIO_NET_HDR 27
-
-/* VHOST_SCSI specific definitions */
-
-/*
- * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
- *
- * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
- * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
- * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
- * All the targets under vhost_wwpn can be seen and used by guset.
- */
-
-#define VHOST_SCSI_ABI_VERSION 1
-
-struct vhost_scsi_target {
- int abi_version;
- char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */
- unsigned short vhost_tpgt;
- unsigned short reserved;
-};
+/* VHOST_SCSI specific defines */
#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)