diff options
Diffstat (limited to 'arch/s390/net/bpf_jit_comp.c')
| -rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 715 | 
1 files changed, 678 insertions, 37 deletions
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index af35052d06ed..d0846ba818ee 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -30,6 +30,7 @@  #include <asm/facility.h>  #include <asm/nospec-branch.h>  #include <asm/set_memory.h> +#include <asm/text-patching.h>  #include "bpf_jit.h"  struct bpf_jit { @@ -50,12 +51,13 @@ struct bpf_jit {  	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */  	int tail_call_start;	/* Tail call start offset */  	int excnt;		/* Number of exception table entries */ +	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */ +	int prologue_plt;	/* Start of prologue hotpatch PLT */  };  #define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */  #define SEEN_LITERAL	BIT(1)		/* code uses literals */  #define SEEN_FUNC	BIT(2)		/* calls C functions */ -#define SEEN_TAIL_CALL	BIT(3)		/* code uses tail calls */  #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)  /* @@ -68,6 +70,10 @@ struct bpf_jit {  #define REG_0		REG_W0			/* Register 0 */  #define REG_1		REG_W1			/* Register 1 */  #define REG_2		BPF_REG_1		/* Register 2 */ +#define REG_3		BPF_REG_2		/* Register 3 */ +#define REG_4		BPF_REG_3		/* Register 4 */ +#define REG_7		BPF_REG_6		/* Register 7 */ +#define REG_8		BPF_REG_7		/* Register 8 */  #define REG_14		BPF_REG_0		/* Register 14 */  /* @@ -507,20 +513,58 @@ static void bpf_skip(struct bpf_jit *jit, int size)  }  /* + * PLT for hotpatchable calls. The calling convention is the same as for the + * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered. + */ +extern const char bpf_plt[]; +extern const char bpf_plt_ret[]; +extern const char bpf_plt_target[]; +extern const char bpf_plt_end[]; +#define BPF_PLT_SIZE 32 +asm( +	".pushsection .rodata\n" +	"	.align 8\n" +	"bpf_plt:\n" +	"	lgrl %r0,bpf_plt_ret\n" +	"	lgrl %r1,bpf_plt_target\n" +	"	br %r1\n" +	"	.align 8\n" +	"bpf_plt_ret: .quad 0\n" +	"bpf_plt_target: .quad 0\n" +	"bpf_plt_end:\n" +	"	.popsection\n" +); + +static void bpf_jit_plt(void *plt, void *ret, void *target) +{ +	memcpy(plt, bpf_plt, BPF_PLT_SIZE); +	*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret; +	*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target; +} + +/*   * Emit function prologue   *   * Save registers and create stack frame if necessary. - * See stack frame layout desription in "bpf_jit.h"! + * See stack frame layout description in "bpf_jit.h"!   */ -static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) +static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, +			     u32 stack_depth)  { -	if (jit->seen & SEEN_TAIL_CALL) { +	/* No-op for hotpatching */ +	/* brcl 0,prologue_plt */ +	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt); +	jit->prologue_plt_ret = jit->prg; + +	if (fp->aux->func_idx == 0) { +		/* Initialize the tail call counter in the main program. */  		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */  		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);  	} else {  		/* -		 * There are no tail calls. Insert nops in order to have -		 * tail_call_start at a predictable offset. +		 * Skip the tail call counter initialization in subprograms. +		 * Insert nops in order to have tail_call_start at a +		 * predictable offset.  		 */  		bpf_skip(jit, 6);  	} @@ -558,6 +602,43 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)  }  /* + * Emit an expoline for a jump that follows + */ +static void emit_expoline(struct bpf_jit *jit) +{ +	/* exrl %r0,.+10 */ +	EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); +	/* j . */ +	EMIT4_PCREL(0xa7f40000, 0); +} + +/* + * Emit __s390_indirect_jump_r1 thunk if necessary + */ +static void emit_r1_thunk(struct bpf_jit *jit) +{ +	if (nospec_uses_trampoline()) { +		jit->r1_thunk_ip = jit->prg; +		emit_expoline(jit); +		/* br %r1 */ +		_EMIT2(0x07f1); +	} +} + +/* + * Call r1 either directly or via __s390_indirect_jump_r1 thunk + */ +static void call_r1(struct bpf_jit *jit) +{ +	if (nospec_uses_trampoline()) +		/* brasl %r14,__s390_indirect_jump_r1 */ +		EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); +	else +		/* basr %r14,%r1 */ +		EMIT2(0x0d00, REG_14, REG_1); +} + +/*   * Function epilogue   */  static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) @@ -570,25 +651,20 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)  	if (nospec_uses_trampoline()) {  		jit->r14_thunk_ip = jit->prg;  		/* Generate __s390_indirect_jump_r14 thunk */ -		/* exrl %r0,.+10 */ -		EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); -		/* j . */ -		EMIT4_PCREL(0xa7f40000, 0); +		emit_expoline(jit);  	}  	/* br %r14 */  	_EMIT2(0x07fe); -	if ((nospec_uses_trampoline()) && -	    (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) { -		jit->r1_thunk_ip = jit->prg; -		/* Generate __s390_indirect_jump_r1 thunk */ -		/* exrl %r0,.+10 */ -		EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); -		/* j . */ -		EMIT4_PCREL(0xa7f40000, 0); -		/* br %r1 */ -		_EMIT2(0x07f1); -	} +	if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) +		emit_r1_thunk(jit); + +	jit->prg = ALIGN(jit->prg, 8); +	jit->prologue_plt = jit->prg; +	if (jit->prg_buf) +		bpf_jit_plt(jit->prg_buf + jit->prg, +			    jit->prg_buf + jit->prologue_plt_ret, NULL); +	jit->prg += BPF_PLT_SIZE;  }  static int get_probe_mem_regno(const u8 *insn) @@ -663,6 +739,34 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,  }  /* + * Sign-extend the register if necessary + */ +static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags) +{ +	if (!(flags & BTF_FMODEL_SIGNED_ARG)) +		return 0; + +	switch (size) { +	case 1: +		/* lgbr %r,%r */ +		EMIT4(0xb9060000, r, r); +		return 0; +	case 2: +		/* lghr %r,%r */ +		EMIT4(0xb9070000, r, r); +		return 0; +	case 4: +		/* lgfr %r,%r */ +		EMIT4(0xb9140000, r, r); +		return 0; +	case 8: +		return 0; +	default: +		return -1; +	} +} + +/*   * Compile one eBPF instruction into s390x code   *   * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of @@ -1297,9 +1401,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,  	 */  	case BPF_JMP | BPF_CALL:  	{ -		u64 func; +		const struct btf_func_model *m;  		bool func_addr_fixed; -		int ret; +		int j, ret; +		u64 func;  		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,  					    &func, &func_addr_fixed); @@ -1308,15 +1413,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,  		REG_SET_SEEN(BPF_REG_5);  		jit->seen |= SEEN_FUNC; +		/* +		 * Copy the tail call counter to where the callee expects it. +		 * +		 * Note 1: The callee can increment the tail call counter, but +		 * we do not load it back, since the x86 JIT does not do this +		 * either. +		 * +		 * Note 2: We assume that the verifier does not let us call the +		 * main program, which clears the tail call counter on entry. +		 */ +		/* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */ +		_EMIT6(0xd203f000 | STK_OFF_TCCNT, +		       0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth)); + +		/* Sign-extend the kfunc arguments. */ +		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { +			m = bpf_jit_find_kfunc_model(fp, insn); +			if (!m) +				return -1; + +			for (j = 0; j < m->nr_args; j++) { +				if (sign_extend(jit, BPF_REG_1 + j, +						m->arg_size[j], +						m->arg_flags[j])) +					return -1; +			} +		} +  		/* lgrl %w1,func */  		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func)); -		if (nospec_uses_trampoline()) { -			/* brasl %r14,__s390_indirect_jump_r1 */ -			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); -		} else { -			/* basr %r14,%w1 */ -			EMIT2(0x0d00, REG_14, REG_W1); -		} +		/* %r1() */ +		call_r1(jit);  		/* lgr %b0,%r2: load return value into %b0 */  		EMIT4(0xb9040000, BPF_REG_0, REG_2);  		break; @@ -1329,10 +1457,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,  		 *  B1: pointer to ctx  		 *  B2: pointer to bpf_array  		 *  B3: index in bpf_array -		 */ -		jit->seen |= SEEN_TAIL_CALL; - -		/* +		 *  		 * if (index >= array->map.max_entries)  		 *         goto out;  		 */ @@ -1393,8 +1518,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,  		/* lg %r1,bpf_func(%r1) */  		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,  			      offsetof(struct bpf_prog, bpf_func)); -		/* bc 0xf,tail_call_start(%r1) */ -		_EMIT4(0x47f01000 + jit->tail_call_start); +		if (nospec_uses_trampoline()) { +			jit->seen |= SEEN_FUNC; +			/* aghi %r1,tail_call_start */ +			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start); +			/* brcl 0xf,__s390_indirect_jump_r1 */ +			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip); +		} else { +			/* bc 0xf,tail_call_start(%r1) */ +			_EMIT4(0x47f01000 + jit->tail_call_start); +		}  		/* out: */  		if (jit->prg_buf) {  			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) = @@ -1688,7 +1821,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,  	jit->prg = 0;  	jit->excnt = 0; -	bpf_jit_prologue(jit, stack_depth); +	bpf_jit_prologue(jit, fp, stack_depth);  	if (bpf_set_addr(jit, 0) < 0)  		return -1;  	for (i = 0; i < fp->len; i += insn_count) { @@ -1768,6 +1901,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)  	struct bpf_jit jit;  	int pass; +	if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE)) +		return orig_fp; +  	if (!fp->jit_requested)  		return orig_fp; @@ -1859,3 +1995,508 @@ out:  					   tmp : orig_fp);  	return fp;  } + +bool bpf_jit_supports_kfunc_call(void) +{ +	return true; +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, +		       void *old_addr, void *new_addr) +{ +	struct { +		u16 opc; +		s32 disp; +	} __packed insn; +	char expected_plt[BPF_PLT_SIZE]; +	char current_plt[BPF_PLT_SIZE]; +	char *plt; +	int err; + +	/* Verify the branch to be patched. */ +	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn)); +	if (err < 0) +		return err; +	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0))) +		return -EINVAL; + +	if (t == BPF_MOD_JUMP && +	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) { +		/* +		 * The branch already points to the destination, +		 * there is no PLT. +		 */ +	} else { +		/* Verify the PLT. */ +		plt = (char *)ip + (insn.disp << 1); +		err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE); +		if (err < 0) +			return err; +		bpf_jit_plt(expected_plt, (char *)ip + 6, old_addr); +		if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE)) +			return -EINVAL; +		/* Adjust the call address. */ +		s390_kernel_write(plt + (bpf_plt_target - bpf_plt), +				  &new_addr, sizeof(void *)); +	} + +	/* Adjust the mask of the branch. */ +	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0); +	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1); + +	/* Make the new code visible to the other CPUs. */ +	text_poke_sync_lock(); + +	return 0; +} + +struct bpf_tramp_jit { +	struct bpf_jit common; +	int orig_stack_args_off;/* Offset of arguments placed on stack by the +				 * func_addr's original caller +				 */ +	int stack_size;		/* Trampoline stack size */ +	int stack_args_off;	/* Offset of stack arguments for calling +				 * func_addr, has to be at the top +				 */ +	int reg_args_off;	/* Offset of register arguments for calling +				 * func_addr +				 */ +	int ip_off;		/* For bpf_get_func_ip(), has to be at +				 * (ctx - 16) +				 */ +	int arg_cnt_off;	/* For bpf_get_func_arg_cnt(), has to be at +				 * (ctx - 8) +				 */ +	int bpf_args_off;	/* Offset of BPF_PROG context, which consists +				 * of BPF arguments followed by return value +				 */ +	int retval_off;		/* Offset of return value (see above) */ +	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used +				 * for __bpf_prog_enter() return value and +				 * func_addr respectively +				 */ +	int r14_off;		/* Offset of saved %r14 */ +	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */ +	int do_fexit;		/* do_fexit: label */ +}; + +static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val) +{ +	/* llihf %dst_reg,val_hi */ +	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32)); +	/* oilf %rdst_reg,val_lo */ +	EMIT6_IMM(0xc00d0000, dst_reg, val); +} + +static int invoke_bpf_prog(struct bpf_tramp_jit *tjit, +			   const struct btf_func_model *m, +			   struct bpf_tramp_link *tlink, bool save_ret) +{ +	struct bpf_jit *jit = &tjit->common; +	int cookie_off = tjit->run_ctx_off + +			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie); +	struct bpf_prog *p = tlink->link.prog; +	int patch; + +	/* +	 * run_ctx.cookie = tlink->cookie; +	 */ + +	/* %r0 = tlink->cookie */ +	load_imm64(jit, REG_W0, tlink->cookie); +	/* stg %r0,cookie_off(%r15) */ +	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off); + +	/* +	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0) +	 *         goto skip; +	 */ + +	/* %r1 = __bpf_prog_enter */ +	load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p)); +	/* %r2 = p */ +	load_imm64(jit, REG_2, (u64)p); +	/* la %r3,run_ctx_off(%r15) */ +	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off); +	/* %r1() */ +	call_r1(jit); +	/* ltgr %r7,%r2 */ +	EMIT4(0xb9020000, REG_7, REG_2); +	/* brcl 8,skip */ +	patch = jit->prg; +	EMIT6_PCREL_RILC(0xc0040000, 8, 0); + +	/* +	 * retval = bpf_func(args, p->insnsi); +	 */ + +	/* %r1 = p->bpf_func */ +	load_imm64(jit, REG_1, (u64)p->bpf_func); +	/* la %r2,bpf_args_off(%r15) */ +	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off); +	/* %r3 = p->insnsi */ +	if (!p->jited) +		load_imm64(jit, REG_3, (u64)p->insnsi); +	/* %r1() */ +	call_r1(jit); +	/* stg %r2,retval_off(%r15) */ +	if (save_ret) { +		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags)) +			return -1; +		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15, +			      tjit->retval_off); +	} + +	/* skip: */ +	if (jit->prg_buf) +		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1; + +	/* +	 * __bpf_prog_exit(p, start, &run_ctx); +	 */ + +	/* %r1 = __bpf_prog_exit */ +	load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p)); +	/* %r2 = p */ +	load_imm64(jit, REG_2, (u64)p); +	/* lgr %r3,%r7 */ +	EMIT4(0xb9040000, REG_3, REG_7); +	/* la %r4,run_ctx_off(%r15) */ +	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off); +	/* %r1() */ +	call_r1(jit); + +	return 0; +} + +static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size) +{ +	int stack_offset = tjit->stack_size; + +	tjit->stack_size += size; +	return stack_offset; +} + +/* ABI uses %r2 - %r6 for parameter passing. */ +#define MAX_NR_REG_ARGS 5 + +/* The "L" field of the "mvc" instruction is 8 bits. */ +#define MAX_MVC_SIZE 256 +#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64)) + +/* -mfentry generates a 6-byte nop on s390x. */ +#define S390X_PATCH_SIZE 6 + +static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, +					 struct bpf_tramp_jit *tjit, +					 const struct btf_func_model *m, +					 u32 flags, +					 struct bpf_tramp_links *tlinks, +					 void *func_addr) +{ +	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; +	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; +	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; +	int nr_bpf_args, nr_reg_args, nr_stack_args; +	struct bpf_jit *jit = &tjit->common; +	int arg, bpf_arg_off; +	int i, j; + +	/* Support as many stack arguments as "mvc" instruction can handle. */ +	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS); +	nr_stack_args = m->nr_args - nr_reg_args; +	if (nr_stack_args > MAX_NR_STACK_ARGS) +		return -ENOTSUPP; + +	/* Return to %r14, since func_addr and %r0 are not available. */ +	if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) +		flags |= BPF_TRAMP_F_SKIP_FRAME; + +	/* +	 * Compute how many arguments we need to pass to BPF programs. +	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or +	 * smaller are packed into 1 or 2 registers; larger arguments are +	 * passed via pointers. +	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into +	 * a register; larger arguments are passed via pointers. +	 * We need to deal with this difference. +	 */ +	nr_bpf_args = 0; +	for (i = 0; i < m->nr_args; i++) { +		if (m->arg_size[i] <= 8) +			nr_bpf_args += 1; +		else if (m->arg_size[i] <= 16) +			nr_bpf_args += 2; +		else +			return -ENOTSUPP; +	} + +	/* +	 * Calculate the stack layout. +	 */ + +	/* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */ +	tjit->stack_size = STACK_FRAME_OVERHEAD; +	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64)); +	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64)); +	tjit->ip_off = alloc_stack(tjit, sizeof(u64)); +	tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64)); +	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64)); +	tjit->retval_off = alloc_stack(tjit, sizeof(u64)); +	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64)); +	tjit->r14_off = alloc_stack(tjit, sizeof(u64)); +	tjit->run_ctx_off = alloc_stack(tjit, +					sizeof(struct bpf_tramp_run_ctx)); +	/* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */ +	tjit->stack_size -= STACK_FRAME_OVERHEAD; +	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD; + +	/* aghi %r15,-stack_size */ +	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size); +	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */ +	if (nr_reg_args) +		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2, +			      REG_2 + (nr_reg_args - 1), REG_15, +			      tjit->reg_args_off); +	for (i = 0, j = 0; i < m->nr_args; i++) { +		if (i < MAX_NR_REG_ARGS) +			arg = REG_2 + i; +		else +			arg = tjit->orig_stack_args_off + +			      (i - MAX_NR_REG_ARGS) * sizeof(u64); +		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64); +		if (m->arg_size[i] <= 8) { +			if (i < MAX_NR_REG_ARGS) +				/* stg %arg,bpf_arg_off(%r15) */ +				EMIT6_DISP_LH(0xe3000000, 0x0024, arg, +					      REG_0, REG_15, bpf_arg_off); +			else +				/* mvc bpf_arg_off(8,%r15),arg(%r15) */ +				_EMIT6(0xd207f000 | bpf_arg_off, +				       0xf000 | arg); +			j += 1; +		} else { +			if (i < MAX_NR_REG_ARGS) { +				/* mvc bpf_arg_off(16,%r15),0(%arg) */ +				_EMIT6(0xd20ff000 | bpf_arg_off, +				       reg2hex[arg] << 12); +			} else { +				/* lg %r1,arg(%r15) */ +				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0, +					      REG_15, arg); +				/* mvc bpf_arg_off(16,%r15),0(%r1) */ +				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000); +			} +			j += 2; +		} +	} +	/* stmg %r7,%r8,r7_r8_off(%r15) */ +	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15, +		      tjit->r7_r8_off); +	/* stg %r14,r14_off(%r15) */ +	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off); + +	if (flags & BPF_TRAMP_F_ORIG_STACK) { +		/* +		 * The ftrace trampoline puts the return address (which is the +		 * address of the original function + S390X_PATCH_SIZE) into +		 * %r0; see ftrace_shared_hotpatch_trampoline_br and +		 * ftrace_init_nop() for details. +		 */ + +		/* lgr %r8,%r0 */ +		EMIT4(0xb9040000, REG_8, REG_0); +	} else { +		/* %r8 = func_addr + S390X_PATCH_SIZE */ +		load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE); +	} + +	/* +	 * ip = func_addr; +	 * arg_cnt = m->nr_args; +	 */ + +	if (flags & BPF_TRAMP_F_IP_ARG) { +		/* %r0 = func_addr */ +		load_imm64(jit, REG_0, (u64)func_addr); +		/* stg %r0,ip_off(%r15) */ +		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15, +			      tjit->ip_off); +	} +	/* lghi %r0,nr_bpf_args */ +	EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args); +	/* stg %r0,arg_cnt_off(%r15) */ +	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15, +		      tjit->arg_cnt_off); + +	if (flags & BPF_TRAMP_F_CALL_ORIG) { +		/* +		 * __bpf_tramp_enter(im); +		 */ + +		/* %r1 = __bpf_tramp_enter */ +		load_imm64(jit, REG_1, (u64)__bpf_tramp_enter); +		/* %r2 = im */ +		load_imm64(jit, REG_2, (u64)im); +		/* %r1() */ +		call_r1(jit); +	} + +	for (i = 0; i < fentry->nr_links; i++) +		if (invoke_bpf_prog(tjit, m, fentry->links[i], +				    flags & BPF_TRAMP_F_RET_FENTRY_RET)) +			return -EINVAL; + +	if (fmod_ret->nr_links) { +		/* +		 * retval = 0; +		 */ + +		/* xc retval_off(8,%r15),retval_off(%r15) */ +		_EMIT6(0xd707f000 | tjit->retval_off, +		       0xf000 | tjit->retval_off); + +		for (i = 0; i < fmod_ret->nr_links; i++) { +			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true)) +				return -EINVAL; + +			/* +			 * if (retval) +			 *         goto do_fexit; +			 */ + +			/* ltg %r0,retval_off(%r15) */ +			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15, +				      tjit->retval_off); +			/* brcl 7,do_fexit */ +			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit); +		} +	} + +	if (flags & BPF_TRAMP_F_CALL_ORIG) { +		/* +		 * retval = func_addr(args); +		 */ + +		/* lmg %r2,%rN,reg_args_off(%r15) */ +		if (nr_reg_args) +			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2, +				      REG_2 + (nr_reg_args - 1), REG_15, +				      tjit->reg_args_off); +		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */ +		if (nr_stack_args) +			_EMIT6(0xd200f000 | +				       (nr_stack_args * sizeof(u64) - 1) << 16 | +				       tjit->stack_args_off, +			       0xf000 | tjit->orig_stack_args_off); +		/* lgr %r1,%r8 */ +		EMIT4(0xb9040000, REG_1, REG_8); +		/* %r1() */ +		call_r1(jit); +		/* stg %r2,retval_off(%r15) */ +		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15, +			      tjit->retval_off); + +		im->ip_after_call = jit->prg_buf + jit->prg; + +		/* +		 * The following nop will be patched by bpf_tramp_image_put(). +		 */ + +		/* brcl 0,im->ip_epilogue */ +		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue); +	} + +	/* do_fexit: */ +	tjit->do_fexit = jit->prg; +	for (i = 0; i < fexit->nr_links; i++) +		if (invoke_bpf_prog(tjit, m, fexit->links[i], false)) +			return -EINVAL; + +	if (flags & BPF_TRAMP_F_CALL_ORIG) { +		im->ip_epilogue = jit->prg_buf + jit->prg; + +		/* +		 * __bpf_tramp_exit(im); +		 */ + +		/* %r1 = __bpf_tramp_exit */ +		load_imm64(jit, REG_1, (u64)__bpf_tramp_exit); +		/* %r2 = im */ +		load_imm64(jit, REG_2, (u64)im); +		/* %r1() */ +		call_r1(jit); +	} + +	/* lmg %r2,%rN,reg_args_off(%r15) */ +	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args) +		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2, +			      REG_2 + (nr_reg_args - 1), REG_15, +			      tjit->reg_args_off); +	/* lgr %r1,%r8 */ +	if (!(flags & BPF_TRAMP_F_SKIP_FRAME)) +		EMIT4(0xb9040000, REG_1, REG_8); +	/* lmg %r7,%r8,r7_r8_off(%r15) */ +	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15, +		      tjit->r7_r8_off); +	/* lg %r14,r14_off(%r15) */ +	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off); +	/* lg %r2,retval_off(%r15) */ +	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET)) +		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15, +			      tjit->retval_off); +	/* aghi %r15,stack_size */ +	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size); +	/* Emit an expoline for the following indirect jump. */ +	if (nospec_uses_trampoline()) +		emit_expoline(jit); +	if (flags & BPF_TRAMP_F_SKIP_FRAME) +		/* br %r14 */ +		_EMIT2(0x07fe); +	else +		/* br %r1 */ +		_EMIT2(0x07f1); + +	emit_r1_thunk(jit); + +	return 0; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, +				void *image_end, const struct btf_func_model *m, +				u32 flags, struct bpf_tramp_links *tlinks, +				void *func_addr) +{ +	struct bpf_tramp_jit tjit; +	int ret; +	int i; + +	for (i = 0; i < 2; i++) { +		if (i == 0) { +			/* Compute offsets, check whether the code fits. */ +			memset(&tjit, 0, sizeof(tjit)); +		} else { +			/* Generate the code. */ +			tjit.common.prg = 0; +			tjit.common.prg_buf = image; +		} +		ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags, +						    tlinks, func_addr); +		if (ret < 0) +			return ret; +		if (tjit.common.prg > (char *)image_end - (char *)image) +			/* +			 * Use the same error code as for exceeding +			 * BPF_MAX_TRAMP_LINKS. +			 */ +			return -E2BIG; +	} + +	return ret; +} + +bool bpf_jit_supports_subprog_tailcalls(void) +{ +	return true; +}  |