diff options
Diffstat (limited to 'arch/s390')
36 files changed, 646 insertions, 416 deletions
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index d68e11e0df5e..7ffd0b19135c 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 0c5d8ee657f0..d1e7b0a0feeb 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -312,6 +312,7 @@ extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); struct cio_iplinfo { + u8 ssid; u16 devno; int is_qdio; }; diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 3ad48f22de78..08e34a5dc909 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -104,6 +104,9 @@ #define HWCAP_S390_TE 1024 #define HWCAP_S390_VXRS 2048 +/* Internal bits, not exposed via elf */ +#define HWCAP_INT_SIE 1UL + /* * These are used to set parameters in the core dumps. */ @@ -169,6 +172,10 @@ extern unsigned int vdso_enabled; extern unsigned long elf_hwcap; #define ELF_HWCAP (elf_hwcap) +/* Internal hardware capabilities, not exposed via elf */ + +extern unsigned long int_hwcap; + /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. @@ -206,9 +213,16 @@ do { \ } while (0) #endif /* CONFIG_COMPAT */ -extern unsigned long mmap_rnd_mask; - -#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask) +/* + * Cache aliasing on the latest machines calls for a mapping granularity + * of 512KB. For 64-bit processes use a 512KB alignment and a randomization + * of up to 1GB. For 31-bit processes the virtual address space is limited, + * use no alignment and limit the randomization to 8MB. + */ +#define BRK_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ffffUL) +#define MMAP_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ff80UL) +#define MMAP_ALIGN_MASK (is_32bit_task() ? 0 : 0x7fUL) +#define STACK_RND_MASK MMAP_RND_MASK #define ARCH_DLINFO \ do { \ diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 39ae6a359747..86634e71b69f 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -64,7 +64,8 @@ struct ipl_block_fcp { struct ipl_block_ccw { u8 reserved1[84]; - u8 reserved2[2]; + u16 reserved2 : 13; + u8 ssid : 3; u16 devno; u8 vm_flags; u8 reserved3[3]; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index efaac2c3bb77..6742414dbd6f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,7 +25,9 @@ #include <asm/fpu/api.h> #include <asm/isc.h> -#define KVM_MAX_VCPUS 64 +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 +#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS #define KVM_USER_MEM_SLOTS 32 /* @@ -37,12 +39,41 @@ #define KVM_IRQCHIP_NUM_PINS 4096 #define KVM_HALT_POLL_NS_DEFAULT 0 +/* s390-specific vcpu->requests bit members */ +#define KVM_REQ_ENABLE_IBS 8 +#define KVM_REQ_DISABLE_IBS 9 + #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f -struct sca_entry { +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +} __packed; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +} __packed; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +} __packed; + +struct bsca_entry { __u8 reserved0; - __u8 sigp_ctrl; + union bsca_sigp_ctrl sigp_ctrl; __u16 reserved[3]; __u64 sda; __u64 reserved2[2]; @@ -57,14 +88,22 @@ union ipte_control { }; }; -struct sca_block { +struct bsca_block { union ipte_control ipte_control; __u64 reserved[5]; __u64 mcn; __u64 reserved2; - struct sca_entry cpu[64]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; } __attribute__((packed)); +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[7]; + __u64 mcn[4]; + __u64 reserved2[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +} __packed; + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -182,7 +221,8 @@ struct kvm_s390_sie_block { __u64 pp; /* 0x01de */ __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ - __u8 reserved1f0[16]; /* 0x01f0 */ + __u64 riccbd; /* 0x01f0 */ + __u8 reserved1f8[8]; /* 0x01f8 */ } __attribute__((packed)); struct kvm_s390_itdb { @@ -585,11 +625,14 @@ struct kvm_s390_crypto_cb { }; struct kvm_arch{ - struct sca_block *sca; + void *sca; + int use_esca; + rwlock_t sca_lock; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; struct gmap *gmap; + unsigned long mem_limit; int css_support; int use_irqchip; int use_cmma; diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 7a7abf1a5537..1aac41e83ea1 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -195,5 +195,7 @@ void zpci_dma_exit_device(struct zpci_dev *); void dma_free_seg_table(unsigned long); unsigned long *dma_alloc_cpu_table(void); void dma_cleanup_tables(unsigned long *); -void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int); +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr); +void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags); + #endif diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 821dde5f425d..dea883f85d66 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -29,7 +29,10 @@ struct sclp_ipl_info { struct sclp_core_entry { u8 core_id; - u8 reserved0[2]; + u8 reserved0; + u8 : 4; + u8 sief2 : 1; + u8 : 3; u8 : 3; u8 siif : 1; u8 sigpif : 1; @@ -53,6 +56,9 @@ struct sclp_info { unsigned char has_sigpif : 1; unsigned char has_core_type : 1; unsigned char has_sprp : 1; + unsigned char has_hvs : 1; + unsigned char has_esca : 1; + unsigned char has_sief2 : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h index 776f307960cc..cc6cfe7889da 100644 --- a/arch/s390/include/asm/trace/diag.h +++ b/arch/s390/include/asm/trace/diag.h @@ -19,7 +19,7 @@ #define TRACE_INCLUDE_PATH asm/trace #define TRACE_INCLUDE_FILE diag -TRACE_EVENT(diagnose, +TRACE_EVENT(s390_diagnose, TP_PROTO(unsigned short nr), TP_ARGS(nr), TP_STRUCT__entry( @@ -32,9 +32,9 @@ TRACE_EVENT(diagnose, ); #ifdef CONFIG_TRACEPOINTS -void trace_diagnose_norecursion(int diag_nr); +void trace_s390_diagnose_norecursion(int diag_nr); #else -static inline void trace_diagnose_norecursion(int diag_nr) { } +static inline void trace_s390_diagnose_norecursion(int diag_nr) { } #endif #endif /* _TRACE_S390_DIAG_H */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index ef1a5fcc6c66..fe84bd5fe7ce 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_MEM_CLR_CMMA 1 #define KVM_S390_VM_MEM_LIMIT_SIZE 2 +#define KVM_S390_NO_MEM_LIMIT U64_MAX + /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 @@ -151,6 +153,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_ARCH0 (1UL << 4) #define KVM_SYNC_PFAULT (1UL << 5) #define KVM_SYNC_VRS (1UL << 6) +#define KVM_SYNC_RICCB (1UL << 7) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -168,6 +171,8 @@ struct kvm_sync_regs { __u64 vrs[32][2]; /* vector registers */ __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* only valid with vector registers */ + __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 riccb[64]; /* runtime instrumentation controls block */ }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 296942d56e6a..d02e89d14fef 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -91,4 +91,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index a848adba1504..34ec202472c6 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -192,14 +192,14 @@ #define __NR_set_tid_address 252 #define __NR_fadvise64 253 #define __NR_timer_create 254 -#define __NR_timer_settime (__NR_timer_create+1) -#define __NR_timer_gettime (__NR_timer_create+2) -#define __NR_timer_getoverrun (__NR_timer_create+3) -#define __NR_timer_delete (__NR_timer_create+4) -#define __NR_clock_settime (__NR_timer_create+5) -#define __NR_clock_gettime (__NR_timer_create+6) -#define __NR_clock_getres (__NR_timer_create+7) -#define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_timer_settime 255 +#define __NR_timer_gettime 256 +#define __NR_timer_getoverrun 257 +#define __NR_timer_delete 258 +#define __NR_clock_settime 259 +#define __NR_clock_gettime 260 +#define __NR_clock_getres 261 +#define __NR_clock_nanosleep 262 /* Number 263 is reserved for vserver */ #define __NR_statfs64 265 #define __NR_fstatfs64 266 @@ -309,7 +309,8 @@ #define __NR_recvfrom 371 #define __NR_recvmsg 372 #define __NR_shutdown 373 -#define NR_syscalls 374 +#define __NR_mlock2 374 +#define NR_syscalls 375 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 09f194052df3..fac4eeddef91 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -176,3 +176,4 @@ COMPAT_SYSCALL_WRAP4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len); COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len); COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); +COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index f98766ede4e1..48b37b8357e6 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -121,14 +121,14 @@ device_initcall(show_diag_stat_init); void diag_stat_inc(enum diag_stat_enum nr) { this_cpu_inc(diag_stat.counter[nr]); - trace_diagnose(diag_map[nr].code); + trace_s390_diagnose(diag_map[nr].code); } EXPORT_SYMBOL(diag_stat_inc); void diag_stat_inc_norecursion(enum diag_stat_enum nr) { this_cpu_inc(diag_stat.counter[nr]); - trace_diagnose_norecursion(diag_map[nr].code); + trace_s390_diagnose_norecursion(diag_map[nr].code); } EXPORT_SYMBOL(diag_stat_inc_norecursion); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 8140d10c6785..6e72961608f0 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1920,16 +1920,23 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) } if (separator) ptr += sprintf(ptr, "%c", separator); + /* + * Use four '%' characters below because of the + * following two conversions: + * + * 1) sprintf: %%%%r -> %%r + * 2) printk : %%r -> %r + */ if (operand->flags & OPERAND_GPR) - ptr += sprintf(ptr, "%%r%i", value); + ptr += sprintf(ptr, "%%%%r%i", value); else if (operand->flags & OPERAND_FPR) - ptr += sprintf(ptr, "%%f%i", value); + ptr += sprintf(ptr, "%%%%f%i", value); else if (operand->flags & OPERAND_AR) - ptr += sprintf(ptr, "%%a%i", value); + ptr += sprintf(ptr, "%%%%a%i", value); else if (operand->flags & OPERAND_CR) - ptr += sprintf(ptr, "%%c%i", value); + ptr += sprintf(ptr, "%%%%c%i", value); else if (operand->flags & OPERAND_VR) - ptr += sprintf(ptr, "%%v%i", value); + ptr += sprintf(ptr, "%%%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 1255c6c5353e..301ee9c70688 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -26,6 +26,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/ptrace.h> #define ARCH_OFFSET 4 @@ -59,19 +60,6 @@ __HEAD .long 0x020006e0,0x20000050 .org 0x200 -# -# subroutine to set architecture mode -# -.Lsetmode: - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode - bras %r13,0f - .fill 16,4,0x0 -0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs - sam31 # switch to 31 bit addressing mode - br %r14 # # subroutine to wait for end I/O @@ -159,7 +147,14 @@ __HEAD .long 0x02200050,0x00000000 iplstart: - bas %r14,.Lsetmode # Immediately switch to 64 bit mode + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode lh %r1,0xb8 # test if subchannel number bct %r1,.Lnoload # is valid l %r1,0xb8 # load ipl subchannel number @@ -269,71 +264,6 @@ iplstart: .Lcpuid:.fill 8,1,0 # -# SALIPL loader support. Based on a patch by Rob van der Heij. -# This entry point is called directly from the SALIPL loader and -# doesn't need a builtin ipl record. -# - .org 0x800 -ENTRY(start) - stm %r0,%r15,0x07b0 # store registers - bas %r14,.Lsetmode # Immediately switch to 64 bit mode - basr %r12,%r0 -.base: - l %r11,.parm - l %r8,.cmd # pointer to command buffer - - ltr %r9,%r9 # do we have SALIPL parameters? - bp .sk8x8 - - mvc 0(64,%r8),0x00b0 # copy saved registers - xc 64(240-64,%r8),0(%r8) # remainder of buffer - tr 0(64,%r8),.lowcase - b .gotr -.sk8x8: - mvc 0(240,%r8),0(%r9) # copy iplparms into buffer -.gotr: - slr %r0,%r0 - st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) - st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) - j startup # continue with startup -.cmd: .long COMMAND_LINE # address of command line buffer -.parm: .long PARMAREA -.lowcase: - .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 - .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f - .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17 - .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f - .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27 - .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f - .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37 - .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f - .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47 - .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f - .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57 - .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f - .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67 - .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f - .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77 - .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f - - .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87 - .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f - .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97 - .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f - .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 - .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf - .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7 - .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf - .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg - .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi - .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop - .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr - .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx - .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz - .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 - .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff - -# # startup-code at 0x10000, running in absolute addressing mode # this is called either by the ipl loader or directly by PSW restart # or linload or SALIPL @@ -364,7 +294,7 @@ ENTRY(startup_kdump) bras %r13,0f .fill 16,4,0x0 0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs - sam31 # switch to 31 bit addressing mode + sam64 # switch to 64 bit addressing mode basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore @@ -395,7 +325,7 @@ ENTRY(startup_kdump) jnz 1b j 4f 2: l %r15,.Lstack-.LPG0(%r13) - ahi %r15,-96 + ahi %r15,-STACK_FRAME_OVERHEAD la %r2,.Lals_string-.LPG0(%r13) l %r3,.Lsclp_print-.LPG0(%r13) basr %r14,%r3 @@ -429,8 +359,7 @@ ENTRY(startup_kdump) .long 1, 0xc0000000 #endif 4: - /* Continue with 64bit startup code in head64.S */ - sam64 # switch to 64 bit mode + /* Continue with startup code in head64.S */ jg startup_continue .align 8 diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f6d8acd7e136..b1f0a90f933b 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -121,6 +121,7 @@ static char *dump_type_str(enum dump_type type) * Must be in data section since the bss section * is not cleared when these are accessed. */ +static u8 ipl_ssid __attribute__((__section__(".data"))) = 0; static u16 ipl_devno __attribute__((__section__(".data"))) = 0; u32 ipl_flags __attribute__((__section__(".data"))) = 0; @@ -197,6 +198,33 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ return snprintf(page, PAGE_SIZE, _format, ##args); \ } +#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + unsigned long long ssid, devno; \ + \ + if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2) \ + return -EINVAL; \ + \ + if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL) \ + return -EINVAL; \ + \ + _ipl_blk.ssid = ssid; \ + _ipl_blk.devno = devno; \ + return len; \ +} + +#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk) \ +IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \ + _ipl_blk.ssid, _ipl_blk.devno); \ +IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, (S_IRUGO | S_IWUSR), \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store) \ + #define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ @@ -395,7 +423,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, switch (ipl_info.type) { case IPL_TYPE_CCW: - return sprintf(page, "0.0.%04x\n", ipl_devno); + return sprintf(page, "0.%x.%04x\n", ipl_ssid, ipl_devno); case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno); @@ -687,21 +715,14 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { + size_t scpdata_len = count; size_t padding; - size_t scpdata_len; - - if (off < 0) - return -EINVAL; - if (off >= DIAG308_SCPDATA_SIZE) - return -ENOSPC; - if (count > DIAG308_SCPDATA_SIZE - off) - count = DIAG308_SCPDATA_SIZE - off; - - memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count); - scpdata_len = off + count; + if (off) + return -EINVAL; + memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count); if (scpdata_len % 8) { padding = 8 - (scpdata_len % 8); memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len, @@ -717,7 +738,7 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, } static struct bin_attribute sys_reipl_fcp_scp_data_attr = __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read, - reipl_fcp_scpdata_write, PAGE_SIZE); + reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE); static struct bin_attribute *reipl_fcp_bin_attrs[] = { &sys_reipl_fcp_scp_data_attr, @@ -814,9 +835,7 @@ static struct attribute_group reipl_fcp_attr_group = { }; /* CCW reipl device attributes */ - -DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", - reipl_block_ccw->ipl_info.ccw.devno); +DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw); /* NSS wrapper */ static ssize_t reipl_nss_loadparm_show(struct kobject *kobj, @@ -1056,8 +1075,8 @@ static void __reipl_run(void *unused) switch (reipl_method) { case REIPL_METHOD_CCW_CIO: + devid.ssid = reipl_block_ccw->ipl_info.ccw.ssid; devid.devno = reipl_block_ccw->ipl_info.ccw.devno; - devid.ssid = 0; reipl_ccw_dev(&devid); break; case REIPL_METHOD_CCW_VM: @@ -1192,6 +1211,7 @@ static int __init reipl_ccw_init(void) reipl_block_ccw_init(reipl_block_ccw); if (ipl_info.type == IPL_TYPE_CCW) { + reipl_block_ccw->ipl_info.ccw.ssid = ipl_ssid; reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; reipl_block_ccw_fill_parms(reipl_block_ccw); } @@ -1336,9 +1356,7 @@ static struct attribute_group dump_fcp_attr_group = { }; /* CCW dump device attributes */ - -DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", - dump_block_ccw->ipl_info.ccw.devno); +DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw); static struct attribute *dump_ccw_attrs[] = { &sys_dump_ccw_device_attr.attr, @@ -1418,8 +1436,8 @@ static void __dump_run(void *unused) switch (dump_method) { case DUMP_METHOD_CCW_CIO: + devid.ssid = dump_block_ccw->ipl_info.ccw.ssid; devid.devno = dump_block_ccw->ipl_info.ccw.devno; - devid.ssid = 0; reipl_ccw_dev(&devid); break; case DUMP_METHOD_CCW_VM: @@ -1939,14 +1957,14 @@ void __init setup_ipl(void) ipl_info.type = get_ipl_type(); switch (ipl_info.type) { case IPL_TYPE_CCW: + ipl_info.data.ccw.dev_id.ssid = ipl_ssid; ipl_info.data.ccw.dev_id.devno = ipl_devno; - ipl_info.data.ccw.dev_id.ssid = 0; break; case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: + ipl_info.data.fcp.dev_id.ssid = 0; ipl_info.data.fcp.dev_id.devno = IPL_PARMBLOCK_START->ipl_info.fcp.devno; - ipl_info.data.fcp.dev_id.ssid = 0; ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn; ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun; break; @@ -1978,6 +1996,7 @@ void __init ipl_save_parameters(void) if (cio_get_iplinfo(&iplinfo)) return; + ipl_ssid = iplinfo.ssid; ipl_devno = iplinfo.devno; ipl_flags |= IPL_DEVNO_VALID; if (!iplinfo.is_qdio) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 688a3aad9c79..114ee8b96f17 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -243,11 +243,7 @@ unsigned long arch_align_stack(unsigned long sp) static inline unsigned long brk_rnd(void) { - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; - else - return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; + return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; } unsigned long arch_randomize_brk(struct mm_struct *mm) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 7ce00e7a709a..647128d5b983 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", "highgprs", "te", "vx" }; + static const char * const int_hwcap_str[] = { + "sie" + }; unsigned long n = (unsigned long) v - 1; int i; @@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) + seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); show_cacheinfo(m); } diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c index fa0bdff1d413..9fe7781a45cd 100644 --- a/arch/s390/kernel/sclp.c +++ b/arch/s390/kernel/sclp.c @@ -21,7 +21,7 @@ static void _sclp_wait_int(void) __ctl_load(cr0_new, 0, 0); psw_ext_save = S390_lowcore.external_new_psw; - psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA); + psw_mask = __extract_psw(); S390_lowcore.external_new_psw.mask = psw_mask; psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT; S390_lowcore.ext_int_code = 0; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ce0cbd6ba7ca..dc83ae66a730 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq); unsigned long elf_hwcap __read_mostly = 0; char elf_platform[ELF_PLATFORM_SIZE]; +unsigned long int_hwcap = 0; + int __initdata memory_end_set; unsigned long __initdata memory_end; unsigned long __initdata max_physmem_end; @@ -764,9 +766,6 @@ static int __init setup_hwcaps(void) get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { - case 0x9672: - strcpy(elf_platform, "g5"); - break; case 0x2064: case 0x2066: default: /* Use "z900" as default for 64 bit kernels. */ @@ -796,6 +795,13 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + + /* + * Virtualization support HWCAP_INT_SIE is bit 0. + */ + if (sclp.has_sief2) + int_hwcap |= HWCAP_INT_SIE; + return 0; } arch_initcall(setup_hwcaps); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 8c56929c8d82..5378c3ea1b98 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -382,3 +382,4 @@ SYSCALL(sys_sendmsg,compat_sys_sendmsg) /* 370 */ SYSCALL(sys_recvfrom,compat_sys_recvfrom) SYSCALL(sys_recvmsg,compat_sys_recvmsg) SYSCALL(sys_shutdown,sys_shutdown) +SYSCALL(sys_mlock2,compat_sys_mlock2) diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c index 73239bb576c4..21a5df99552b 100644 --- a/arch/s390/kernel/trace.c +++ b/arch/s390/kernel/trace.c @@ -9,11 +9,11 @@ #define CREATE_TRACE_POINTS #include <asm/trace/diag.h> -EXPORT_TRACEPOINT_SYMBOL(diagnose); +EXPORT_TRACEPOINT_SYMBOL(s390_diagnose); static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth); -void trace_diagnose_norecursion(int diag_nr) +void trace_s390_diagnose_norecursion(int diag_nr) { unsigned long flags; unsigned int *depth; @@ -22,7 +22,7 @@ void trace_diagnose_norecursion(int diag_nr) depth = this_cpu_ptr(&diagnose_trace_depth); if (*depth == 0) { (*depth)++; - trace_diagnose(diag_nr); + trace_s390_diagnose(diag_nr); (*depth)--; } local_irq_restore(flags); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5fbfb88f8477..05f7de9869a9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tcpu; int tid; - int i; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; vcpu->stat.diagnose_9c++; @@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) if (tid == vcpu->vcpu_id) return 0; - kvm_for_each_vcpu(i, tcpu, kvm) - if (tcpu->vcpu_id == tid) { - kvm_vcpu_yield_to(tcpu); - break; - } - + tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); + if (tcpu) + kvm_vcpu_yield_to(tcpu); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a7559f7207df..d30db40437dc 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -259,10 +259,14 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + if (vcpu->arch.sie_block->eca & 1) { + int rc; - if (vcpu->arch.sie_block->eca & 1) - return ic->kh != 0; + read_lock(&vcpu->kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; + read_unlock(&vcpu->kvm->arch.sca_lock); + return rc; + } return vcpu->kvm->arch.ipte_lock_count != 0; } @@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count++; if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.k) { + if (old.k) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } @@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count--; if (vcpu->kvm->arch.ipte_lock_count) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); wake_up(&vcpu->kvm->arch.ipte_wq); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); @@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.kg) { + if (old.kg) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; new.kh++; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); } static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; @@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); if (!new.kh) wake_up(&vcpu->kvm->arch.ipte_wq); } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b4a5aa110cec..d53c10753c46 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { - case 0x0: - vcpu->stat.exit_null++; - break; case 0x10: vcpu->stat.exit_external_request++; break; @@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) + return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->icptcode) { - case 0x00: case 0x10: case 0x18: return handle_noop(vcpu); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 373e32346d68..62ec925aa196 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -34,6 +34,106 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 +/* handle external calls via sigp interpretation facility */ +static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) +{ + int c, scn; + + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 0; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (src_id) + *src_id = scn; + + return c; +} + +static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) +{ + int expect, rc; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (rc != expect) { + /* another external call is pending */ + return -EBUSY; + } + atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + return 0; +} + +static void sca_clear_ext_call(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc, expect; + + atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + WARN_ON(rc != expect); /* cannot clear? */ +} + int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -792,13 +892,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (!sclp.has_sigpif) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); - return (sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); + return sca_ext_call_pending(vcpu, NULL); } int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) @@ -909,9 +1007,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); - /* clear pending external calls set by sigp interpretation facility */ - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; + sca_clear_ext_call(vcpu); } int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) @@ -1003,21 +1099,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) -{ - unsigned char new_val, old_val; - uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); - old_val = *sigp_ctrl & ~SIGP_CTRL_C; - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { - /* another external call is pending */ - return -EBUSY; - } - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); - return 0; -} - static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1030,12 +1111,11 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) src_id, 0); /* sending vcpu invalid */ - if (src_id >= KVM_MAX_VCPUS || - kvm_get_vcpu(vcpu->kvm, src_id) == NULL) + if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; if (sclp.has_sigpif) - return __inject_extcall_sigpif(vcpu, src_id); + return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; @@ -1110,6 +1190,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, irq->u.emerg.code, 0); + /* sending vcpu invalid */ + if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL) + return -EINVAL; + set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); atomic_or(CPUSTAT_EXT_INT, li->cpuflags); @@ -2200,7 +2284,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) { - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + int scn; unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; unsigned long pending_irqs; @@ -2240,14 +2324,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) } } - if ((sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & - CPUSTAT_ECALL_PEND)) { + if (sca_ext_call_pending(vcpu, &scn)) { if (n + sizeof(irq) > len) return -ENOBUFS; memset(&irq, 0, sizeof(irq)); irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + irq.u.extcall.code = scn; if (copy_to_user(&buf[n], &irq, sizeof(irq))) return -EFAULT; n += sizeof(irq); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8fe2f1c722dc..5927c61d322a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = KVM_MAX_VCPUS; + r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS + : KVM_S390_BSCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -257,6 +258,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VECTOR_REGISTERS: r = MACHINE_HAS_VX; break; + case KVM_CAP_S390_RI: + r = test_facility(64); + break; default: r = 0; } @@ -283,6 +287,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, } /* Section: vm related */ +static void sca_del_vcpu(struct kvm_vcpu *vcpu); + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -342,15 +348,33 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS: - if (MACHINE_HAS_VX) { + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac->mask, 129); set_kvm_facility(kvm->arch.model.fac->list, 129); r = 0; } else r = -EINVAL; + mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_RI: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(64)) { + set_kvm_facility(kvm->arch.model.fac->mask, 64); + set_kvm_facility(kvm->arch.model.fac->list, 64); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -371,8 +395,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", - kvm->arch.gmap->asce_end); - if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) + kvm->arch.mem_limit); + if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) ret = -EFAULT; break; default: @@ -424,9 +448,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT; - if (new_limit > kvm->arch.gmap->asce_end) + if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && + new_limit > kvm->arch.mem_limit) return -E2BIG; + if (!new_limit) + return -EINVAL; + + /* gmap_alloc takes last usable address */ + if (new_limit != KVM_S390_NO_MEM_LIMIT) + new_limit -= 1; + ret = -EBUSY; mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { @@ -443,7 +475,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); - VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); + VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); + VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + (void *) kvm->arch.gmap->asce); break; } default: @@ -1020,7 +1054,7 @@ static int kvm_s390_apxa_installed(void) u8 config[128]; int cc; - if (test_facility(2) && test_facility(12)) { + if (test_facility(12)) { cc = kvm_s390_query_ap_config(config); if (cc) @@ -1071,6 +1105,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm) return 0; } +static void sca_dispose(struct kvm *kvm) +{ + if (kvm->arch.use_esca) + free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); + else + free_page((unsigned long)(kvm->arch.sca)); + kvm->arch.sca = NULL; +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int i, rc; @@ -1094,14 +1137,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; - kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.use_esca = 0; /* start with basic SCA */ + rwlock_init(&kvm->arch.sca_lock); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); sca_offset += 16; - if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; - kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -1153,8 +1199,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; + kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { - kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); + if (sclp.hamax == U64_MAX) + kvm->arch.mem_limit = TASK_MAX_SIZE; + else + kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + sclp.hamax + 1); + kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; kvm->arch.gmap->private = kvm; @@ -1166,14 +1218,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); - KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; out_err: kfree(kvm->arch.crypto.crycb); free_page((unsigned long)kvm->arch.model.fac); debug_unregister(kvm->arch.dbf); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1184,14 +1236,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); kvm_s390_clear_local_irqs(vcpu); kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_is_ucontrol(vcpu->kvm)) { - clear_bit(63 - vcpu->vcpu_id, - (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; - } - smp_mb(); + if (!kvm_is_ucontrol(vcpu->kvm)) + sca_del_vcpu(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); @@ -1224,14 +1270,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); free_page((unsigned long)kvm->arch.model.fac); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); debug_unregister(kvm->arch.dbf); kfree(kvm->arch.crypto.crycb); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); - KVM_EVENT(3, "vm 0x%p destroyed", kvm); + KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } /* Section: vcpu related */ @@ -1245,6 +1291,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +static void sca_del_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +static void sca_add_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + vcpu->arch.sie_block->ecb2 |= 0x04U; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +/* Basic SCA to Extended SCA data copy routines */ +static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) +{ + d->sda = s->sda; + d->sigp_ctrl.c = s->sigp_ctrl.c; + d->sigp_ctrl.scn = s->sigp_ctrl.scn; +} + +static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) +{ + int i; + + d->ipte_control = s->ipte_control; + d->mcn[0] = s->mcn; + for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) + sca_copy_entry(&d->cpu[i], &s->cpu[i]); +} + +static int sca_switch_to_extended(struct kvm *kvm) +{ + struct bsca_block *old_sca = kvm->arch.sca; + struct esca_block *new_sca; + struct kvm_vcpu *vcpu; + unsigned int vcpu_idx; + u32 scaol, scaoh; + + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + if (!new_sca) + return -ENOMEM; + + scaoh = (u32)((u64)(new_sca) >> 32); + scaol = (u32)(u64)(new_sca) & ~0x3fU; + + kvm_s390_vcpu_block_all(kvm); + write_lock(&kvm->arch.sca_lock); + + sca_copy_b_to_e(new_sca, old_sca); + + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { + vcpu->arch.sie_block->scaoh = scaoh; + vcpu->arch.sie_block->scaol = scaol; + vcpu->arch.sie_block->ecb2 |= 0x04U; + } + kvm->arch.sca = new_sca; + kvm->arch.use_esca = 1; + + write_unlock(&kvm->arch.sca_lock); + kvm_s390_vcpu_unblock_all(kvm); + + free_page((unsigned long)old_sca); + + VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + old_sca, kvm->arch.sca); + return 0; +} + +static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) +{ + int rc; + + if (id < KVM_S390_BSCA_CPU_SLOTS) + return true; + if (!sclp.has_esca) + return false; + + mutex_lock(&kvm->lock); + rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + mutex_unlock(&kvm->lock); + + return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1255,6 +1412,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; if (test_kvm_facility(vcpu->kvm, 129)) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; @@ -1365,8 +1524,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); - if (!kvm_is_ucontrol(vcpu->kvm)) + if (!kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = vcpu->kvm->arch.gmap; + sca_add_vcpu(vcpu); + } + } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) @@ -1435,10 +1597,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) vcpu->arch.sie_block->eca |= 0x10000000U; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->arch.sie_block->ecb3 |= 0x01; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= 0x00020000; vcpu->arch.sie_block->ecd |= 0x20000000; } + vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (vcpu->kvm->arch.use_cmma) { @@ -1461,7 +1626,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct sie_page *sie_page; int rc = -EINVAL; - if (id >= KVM_MAX_VCPUS) + if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) goto out; rc = -ENOMEM; @@ -1478,20 +1643,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->icpua = id; - if (!kvm_is_ucontrol(kvm)) { - if (!kvm->arch.sca) { - WARN_ON_ONCE(1); - goto out_free_cpu; - } - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = - (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = - (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); - } - spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; @@ -1505,15 +1656,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, */ vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) { - rc = -ENOMEM; + if (!vcpu->arch.guest_fpregs.fprs) goto out_free_sie_block; - } rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); @@ -2009,7 +2158,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) */ kvm_check_async_pf_completion(vcpu); - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; + vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; if (need_resched()) schedule(); @@ -2067,8 +2217,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc = -1; - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2076,40 +2224,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (guestdbg_enabled(vcpu)) kvm_s390_restore_guest_per_regs(vcpu); - if (exit_reason >= 0) { - rc = 0; + vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; + vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; + + if (vcpu->arch.sie_block->icptcode > 0) { + int rc = kvm_handle_sie_intercept(vcpu); + + if (rc != -EOPNOTSUPP) + return rc; + vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; + vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + return -EREMOTE; + } else if (exit_reason != -EFAULT) { + vcpu->stat.exit_null++; + return 0; } else if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; vcpu->run->s390_ucontrol.trans_exc_code = current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; - rc = -EREMOTE; - + return -EREMOTE; } else if (current->thread.gmap_pfault) { trace_kvm_s390_major_guest_pfault(vcpu); current->thread.gmap_pfault = 0; - if (kvm_arch_setup_async_pf(vcpu)) { - rc = 0; - } else { - gpa_t gpa = current->thread.gmap_addr; - rc = kvm_arch_fault_in_page(vcpu, gpa, 1); - } + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } - - if (rc == -1) - rc = vcpu_post_run_fault_in_sie(vcpu); - - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); - - if (rc == 0) { - if (kvm_is_ucontrol(vcpu->kvm)) - /* Don't exit for host interrupts. */ - rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; - else - rc = kvm_handle_sie_intercept(vcpu); - } - - return rc; + return vcpu_post_run_fault_in_sie(vcpu); } static int __vcpu_run(struct kvm_vcpu *vcpu) @@ -2229,18 +2373,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } - if (rc == -EOPNOTSUPP) { - /* intercept cannot be handled in-kernel, prepare kvm-run */ - kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; - kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; - kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; - rc = 0; - } - if (rc == -EREMOTE) { - /* intercept was handled, but userspace support is needed - * kvm_run has been prepared by the handler */ + /* userspace support is needed, kvm_run has been prepared */ rc = 0; } @@ -2732,6 +2866,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; + if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) + return -EINVAL; + return 0; } @@ -2763,6 +2900,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, static int __init kvm_s390_init(void) { + if (!sclp.has_sief2) { + pr_info("SIE not available\n"); + return -ENODEV; + } + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1e70e00d3c5e..df1abada1f36 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -340,4 +340,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); +/* support for Basic/Extended SCA handling */ +static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) +{ + struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ + + return &sca->ipte_control; +} #endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 77191b85ea7a..d76b51cb4b62 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -660,7 +660,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - if (!MACHINE_HAS_PFMF) + if (!test_kvm_facility(vcpu->kvm, 8)) return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index da690b69f9fe..77c22d685c7a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -291,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, u16 cpu_addr, u32 parameter, u64 *status_reg) { int rc; - struct kvm_vcpu *dst_vcpu; + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; @@ -478,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); if (order_code == SIGP_EXTERNAL_CALL) { - dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); kvm_s390_vcpu_wakeup(dest_vcpu); diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index cc1d6c68356f..396485bca191 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at %p, sie block at %p", __entry->id, - __entry->vcpu, __entry->sie_block) + TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + __entry->id, __entry->vcpu, __entry->sie_block) ); TRACE_EVENT(kvm_s390_destroy_vcpu, @@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %p)\n", + TP_printk("enabling channel I/O support (kvm @ %pK)\n", __entry->kvm) ); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index c3c07d3505ba..c722400c7697 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -48,37 +48,13 @@ EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { - struct cpuid cpu_id; unsigned int order; struct page *page; int i; - get_cpu_id(&cpu_id); - switch (cpu_id.machine) { - case 0x9672: /* g5 */ - case 0x2064: /* z900 */ - case 0x2066: /* z900 */ - case 0x2084: /* z990 */ - case 0x2086: /* z990 */ - case 0x2094: /* z9-109 */ - case 0x2096: /* z9-109 */ - order = 0; - break; - case 0x2097: /* z10 */ - case 0x2098: /* z10 */ - case 0x2817: /* z196 */ - case 0x2818: /* z196 */ - order = 2; - break; - case 0x2827: /* zEC12 */ - case 0x2828: /* zEC12 */ - order = 5; - break; - case 0x2964: /* z13 */ - default: - order = 7; - break; - } + /* Latest machines require a mapping granularity of 512KB */ + order = 7; + /* Limit number of empty zero pages for small memory sizes */ while (order > 2 && (totalram_pages >> 10) < (1UL << order)) order--; diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 6e552af08c76..ea01477b4aa6 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -31,9 +31,6 @@ #include <linux/security.h> #include <asm/pgalloc.h> -unsigned long mmap_rnd_mask; -static unsigned long mmap_align_mask; - static unsigned long stack_maxrandom_size(void) { if (!(current->flags & PF_RANDOMIZE)) @@ -62,10 +59,7 @@ static inline int mmap_is_legacy(void) unsigned long arch_mmap_rnd(void) { - if (is_32bit_task()) - return (get_random_int() & 0x7ff) << PAGE_SHIFT; - else - return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; + return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT; } static unsigned long mmap_base_legacy(unsigned long rnd) @@ -92,7 +86,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; - int do_color_align; if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; @@ -108,15 +101,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = !is_32bit_task(); - info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; - info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0; + if (filp || (flags & MAP_SHARED)) + info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT; + else + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } @@ -130,7 +122,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; - int do_color_align; /* requested length too big for entire address space */ if (len > TASK_SIZE - mmap_min_addr) @@ -148,15 +139,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = !is_32bit_task(); - info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = mm->mmap_base; - info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0; + if (filp || (flags & MAP_SHARED)) + info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT; + else + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; addr = vm_unmapped_area(&info); @@ -254,35 +244,3 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = s390_get_unmapped_area_topdown; } } - -static int __init setup_mmap_rnd(void) -{ - struct cpuid cpu_id; - - get_cpu_id(&cpu_id); - switch (cpu_id.machine) { - case 0x9672: - case 0x2064: - case 0x2066: - case 0x2084: - case 0x2086: - case 0x2094: - case 0x2096: - case 0x2097: - case 0x2098: - case 0x2817: - case 0x2818: - case 0x2827: - case 0x2828: - mmap_rnd_mask = 0x7ffUL; - mmap_align_mask = 0UL; - break; - case 0x2964: /* z13 */ - default: - mmap_rnd_mask = 0x3ff80UL; - mmap_align_mask = 0x7fUL; - break; - } - return 0; -} -early_initcall(setup_mmap_rnd); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 54ef3bc01b43..63b039899a5e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) /** * gmap_alloc - allocate a guest address space * @mm: pointer to the parent mm_struct - * @limit: maximum size of the gmap address space + * @limit: maximum address of the gmap address space * * Returns a guest address space structure. */ @@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a0c4c22e536..3c0bfc1f2694 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) * Save registers and create stack frame if necessary. * See stack frame layout desription in "bpf_jit.h"! */ -static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) +static void bpf_jit_prologue(struct bpf_jit *jit) { if (jit->seen & SEEN_TAIL_CALL) { /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ @@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_SKBP); - /* Clear A (%b0) and X (%b7) registers for converted BPF programs */ - if (is_classic) { - if (REG_SEEN(BPF_REG_A)) - /* lghi %ba,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_A, 0); - if (REG_SEEN(BPF_REG_X)) - /* lghi %bx,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_X, 0); - } } /* @@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit, bpf_prog_was_classic(fp)); + bpf_jit_prologue(jit); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 37d10f74425a..32da0a6ecec2 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -33,7 +33,7 @@ unsigned long *dma_alloc_cpu_table(void) return NULL; for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) - *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED; + *entry = ZPCI_TABLE_INVALID; return table; } @@ -51,7 +51,7 @@ static unsigned long *dma_alloc_page_table(void) return NULL; for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) - *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED; + *entry = ZPCI_PTE_INVALID; return table; } @@ -95,7 +95,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *entry) return pto; } -static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) { unsigned long *sto, *pto; unsigned int rtx, sx, px; @@ -114,20 +114,10 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr return &pto[px]; } -void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr, - dma_addr_t dma_addr, int flags) +void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) { - unsigned long *entry; - - entry = dma_walk_cpu_trans(dma_table, dma_addr); - if (!entry) { - WARN_ON_ONCE(1); - return; - } - if (flags & ZPCI_PTE_INVALID) { invalidate_pt_entry(entry); - return; } else { set_pt_pfaa(entry, page_addr); validate_pt_entry(entry); @@ -146,18 +136,25 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, u8 *page_addr = (u8 *) (pa & PAGE_MASK); dma_addr_t start_dma_addr = dma_addr; unsigned long irq_flags; + unsigned long *entry; int i, rc = 0; if (!nr_pages) return -EINVAL; spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); - if (!zdev->dma_table) + if (!zdev->dma_table) { + rc = -EINVAL; goto no_refresh; + } for (i = 0; i < nr_pages; i++) { - dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr, - flags); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + if (!entry) { + rc = -ENOMEM; + goto undo_cpu_trans; + } + dma_update_cpu_trans(entry, page_addr, flags); page_addr += PAGE_SIZE; dma_addr += PAGE_SIZE; } @@ -176,6 +173,18 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, nr_pages * PAGE_SIZE); +undo_cpu_trans: + if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { + flags = ZPCI_PTE_INVALID; + while (i-- > 0) { + page_addr -= PAGE_SIZE; + dma_addr -= PAGE_SIZE; + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + if (!entry) + break; + dma_update_cpu_trans(entry, page_addr, flags); + } + } no_refresh: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); @@ -260,6 +269,16 @@ out: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); } +static inline void zpci_err_dma(unsigned long rc, unsigned long addr) +{ + struct { + unsigned long rc; + unsigned long addr; + } __packed data = {rc, addr}; + + zpci_err_hex(&data, sizeof(data)); +} + static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, @@ -270,33 +289,40 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, unsigned long pa = page_to_phys(page) + offset; int flags = ZPCI_PTE_VALID; dma_addr_t dma_addr; + int ret; /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); iommu_page_index = dma_alloc_iommu(zdev, nr_pages); - if (iommu_page_index == -1) + if (iommu_page_index == -1) { + ret = -ENOSPC; goto out_err; + } /* Use rounded up size */ size = nr_pages * PAGE_SIZE; dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE; - if (dma_addr + size > zdev->end_dma) + if (dma_addr + size > zdev->end_dma) { + ret = -ERANGE; goto out_free; + } if (direction == DMA_NONE || direction == DMA_TO_DEVICE) flags |= ZPCI_TABLE_PROTECTED; - if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { - atomic64_add(nr_pages, &zdev->mapped_pages); - return dma_addr + (offset & ~PAGE_MASK); - } + ret = dma_update_trans(zdev, pa, dma_addr, size, flags); + if (ret) + goto out_free; + + atomic64_add(nr_pages, &zdev->mapped_pages); + return dma_addr + (offset & ~PAGE_MASK); out_free: dma_free_iommu(zdev, iommu_page_index, nr_pages); out_err: zpci_err("map error:\n"); - zpci_err_hex(&pa, sizeof(pa)); + zpci_err_dma(ret, pa); return DMA_ERROR_CODE; } @@ -306,14 +332,16 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long iommu_page_index; - int npages; + int npages, ret; npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr = dma_addr & PAGE_MASK; - if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, - ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) { + ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, + ZPCI_PTE_INVALID); + if (ret) { zpci_err("unmap error:\n"); - zpci_err_hex(&dma_addr, sizeof(dma_addr)); + zpci_err_dma(ret, dma_addr); + return; } atomic64_add(npages, &zdev->unmapped_pages); @@ -338,8 +366,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, pa = page_to_phys(page); memset((void *) pa, 0, size); - map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE, - size, DMA_BIDIRECTIONAL, NULL); + map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL); if (dma_mapping_error(dev, map)) { free_pages(pa, get_order(size)); return NULL; |