diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-04 14:59:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-04 14:59:54 -0700 |
commit | 7c5c3a6177fa9646884114fc7f2e970b0bc50dc9 (patch) | |
tree | 956857522574ae7cb07d2227dc16e53d7e9e00e7 /include | |
parent | f0a892f599c46af673e47418c47c15e69a7b67f4 (diff) | |
parent | 281106f938d3daaea6f8b6723a8217a2a1ef6936 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"Quite a large pull request due to a selftest API overhaul and some
patches that had come in too late for 5.19.
ARM:
- Unwinder implementations for both nVHE modes (classic and
protected), complete with an overflow stack
- Rework of the sysreg access from userspace, with a complete rewrite
of the vgic-v3 view to allign with the rest of the infrastructure
- Disagregation of the vcpu flags in separate sets to better track
their use model.
- A fix for the GICv2-on-v3 selftest
- A small set of cosmetic fixes
RISC-V:
- Track ISA extensions used by Guest using bitmap
- Added system instruction emulation framework
- Added CSR emulation framework
- Added gfp_custom flag in struct kvm_mmu_memory_cache
- Added G-stage ioremap() and iounmap() functions
- Added support for Svpbmt inside Guest
s390:
- add an interface to provide a hypervisor dump for secure guests
- improve selftests to use TAP interface
- enable interpretive execution of zPCI instructions (for PCI
passthrough)
- First part of deferred teardown
- CPU Topology
- PV attestation
- Minor fixes
x86:
- Permit guests to ignore single-bit ECC errors
- Intel IPI virtualization
- Allow getting/setting pending triple fault with
KVM_GET/SET_VCPU_EVENTS
- PEBS virtualization
- Simplify PMU emulation by just using PERF_TYPE_RAW events
- More accurate event reinjection on SVM (avoid retrying
instructions)
- Allow getting/setting the state of the speaker port data bit
- Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls
are inconsistent
- "Notify" VM exit (detect microarchitectural hangs) for Intel
- Use try_cmpxchg64 instead of cmpxchg64
- Ignore benign host accesses to PMU MSRs when PMU is disabled
- Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior
- Allow NX huge page mitigation to be disabled on a per-vm basis
- Port eager page splitting to shadow MMU as well
- Enable CMCI capability by default and handle injected UCNA errors
- Expose pid of vcpu threads in debugfs
- x2AVIC support for AMD
- cleanup PIO emulation
- Fixes for LLDT/LTR emulation
- Don't require refcounted "struct page" to create huge SPTEs
- Miscellaneous cleanups:
- MCE MSR emulation
- Use separate namespaces for guest PTEs and shadow PTEs bitmasks
- PIO emulation
- Reorganize rmap API, mostly around rmap destruction
- Do not workaround very old KVM bugs for L0 that runs with nesting enabled
- new selftests API for CPUID
Generic:
- Fix races in gfn->pfn cache refresh; do not pin pages tracked by
the cache
- new selftests API using struct kvm_vcpu instead of a (vm, id)
tuple"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (606 commits)
selftests: kvm: set rax before vmcall
selftests: KVM: Add exponent check for boolean stats
selftests: KVM: Provide descriptive assertions in kvm_binary_stats_test
selftests: KVM: Check stat name before other fields
KVM: x86/mmu: remove unused variable
RISC-V: KVM: Add support for Svpbmt inside Guest/VM
RISC-V: KVM: Use PAGE_KERNEL_IO in kvm_riscv_gstage_ioremap()
RISC-V: KVM: Add G-stage ioremap() and iounmap() functions
KVM: Add gfp_custom flag in struct kvm_mmu_memory_cache
RISC-V: KVM: Add extensible CSR emulation framework
RISC-V: KVM: Add extensible system instruction emulation framework
RISC-V: KVM: Factor-out instruction emulation into separate sources
RISC-V: KVM: move preempt_disable() call in kvm_arch_vcpu_ioctl_run
RISC-V: KVM: Make kvm_riscv_guest_timer_init a void function
RISC-V: KVM: Fix variable spelling mistake
RISC-V: KVM: Improve ISA extension by using a bitmap
KVM, x86/mmu: Fix the comment around kvm_tdp_mmu_zap_leafs()
KVM: SVM: Dump Virtual Machine Save Area (VMSA) to klog
KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT
KVM: x86: Do not block APIC write for non ICR registers
...
Diffstat (limited to 'include')
-rw-r--r-- | include/kvm/arm_vgic.h | 2 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 20 | ||||
-rw-r--r-- | include/linux/kvm_types.h | 9 | ||||
-rw-r--r-- | include/linux/sched/user.h | 3 | ||||
-rw-r--r-- | include/linux/vfio_pci_core.h | 12 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 108 | ||||
-rw-r--r-- | include/uapi/linux/vfio_zdev.h | 7 |
7 files changed, 142 insertions, 19 deletions
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2d8f2e90edc2..4df9e73a8bb5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -364,7 +364,7 @@ struct vgic_cpu { extern struct static_key_false vgic_v2_cpuif_trap; extern struct static_key_false vgic_v3_cpuif_trap; -int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); +int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr); void kvm_vgic_early_init(struct kvm *kvm); int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); int kvm_vgic_create(struct kvm *kvm, u32 type); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 90a45ef7203b..1c480b1821e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -907,11 +907,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) -{ - return vcpu->vcpu_idx; -} - void kvm_destroy_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); @@ -1139,7 +1134,6 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -void kvm_set_page_accessed(struct page *page); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, @@ -1231,7 +1225,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); @@ -1358,6 +1351,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); +int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min); int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); @@ -1436,6 +1430,8 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state); #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); +#else +static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif int kvm_arch_hardware_enable(void); @@ -1572,8 +1568,8 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); -bool kvm_is_reserved_pfn(kvm_pfn_t pfn); -bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); +struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn); +bool kvm_is_zone_device_page(struct page *page); struct kvm_irq_ack_notifier { struct hlist_node link; @@ -1719,12 +1715,6 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } -static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu, - gpa_t gpa) -{ - return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa)); -} - static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index ac1ebb37a0ff..3ca3db020e0e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -19,6 +19,7 @@ struct kvm_memslots; enum kvm_mr_change; #include <linux/bits.h> +#include <linux/mutex.h> #include <linux/types.h> #include <linux/spinlock_types.h> @@ -69,6 +70,7 @@ struct gfn_to_pfn_cache { struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; + struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; enum pfn_cache_usage usage; @@ -83,12 +85,17 @@ struct gfn_to_pfn_cache { * MMU flows is problematic, as is triggering reclaim, I/O, etc... while * holding MMU locks. Note, these caches act more like prefetch buffers than * classical caches, i.e. objects are not returned to the cache on being freed. + * + * The @capacity field and @objects array are lazily initialized when the cache + * is topped up (__kvm_mmu_topup_memory_cache()). */ struct kvm_mmu_memory_cache { int nobjs; gfp_t gfp_zero; + gfp_t gfp_custom; struct kmem_cache *kmem_cache; - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; + int capacity; + void **objects; }; #endif diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 00ed419dd464..f054d0360a75 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -24,7 +24,8 @@ struct user_struct { kuid_t uid; #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ - defined(CONFIG_NET) || defined(CONFIG_IO_URING) + defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \ + defined(CONFIG_VFIO_PCI_ZDEV_KVM) atomic_long_t locked_vm; #endif #ifdef CONFIG_WATCH_QUEUE diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 23c176d4b073..d5d9e17f0156 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -206,15 +206,25 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) } #endif -#ifdef CONFIG_S390 +#ifdef CONFIG_VFIO_PCI_ZDEV_KVM extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps); +int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev); +void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev); #else static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) { return -ENODEV; } + +static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) +{ + return 0; +} + +static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) +{} #endif /* Will be exported for vfio pci drivers usage */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cb6e3846d27b..eed0315a77a6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -270,6 +270,8 @@ struct kvm_xen_exit { #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 +#define KVM_EXIT_RISCV_CSR 36 +#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -496,6 +498,18 @@ struct kvm_run { unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_RISCV_CSR */ + struct { + unsigned long csr_num; + unsigned long new_value; + unsigned long write_mask; + unsigned long ret_value; + } riscv_csr; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1157,6 +1171,12 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_TSC_CONTROL 214 #define KVM_CAP_SYSTEM_EVENT_DATA 215 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 +#define KVM_CAP_S390_PROTECTED_DUMP 217 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 +#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 +#define KVM_CAP_S390_ZPCI_OP 221 +#define KVM_CAP_S390_CPU_TOPOLOGY 222 #ifdef KVM_CAP_IRQ_ROUTING @@ -1660,6 +1680,55 @@ struct kvm_s390_pv_unp { __u64 tweak; }; +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + enum pv_cmd_id { KVM_PV_ENABLE, KVM_PV_DISABLE, @@ -1668,6 +1737,8 @@ enum pv_cmd_id { KVM_PV_VERIFY, KVM_PV_PREP_RESET, KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, }; struct kvm_pv_cmd { @@ -2119,4 +2190,41 @@ struct kvm_stats_desc { /* Available with KVM_CAP_XSAVE2 */ #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +/* Available with KVM_CAP_S390_PROTECTED_DUMP */ +#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) + +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + +/* Available with KVM_CAP_S390_ZPCI_OP */ +#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + #endif /* __LINUX_KVM_H */ diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h index b4309397b6b2..77f2aff1f27e 100644 --- a/include/uapi/linux/vfio_zdev.h +++ b/include/uapi/linux/vfio_zdev.h @@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base { __u16 fmb_length; /* Measurement Block Length (in bytes) */ __u8 pft; /* PCI Function Type */ __u8 gid; /* PCI function group ID */ + /* End of version 1 */ + __u32 fh; /* PCI function handle */ + /* End of version 2 */ }; /** @@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group { __u16 noi; /* Maximum number of MSIs */ __u16 maxstbl; /* Maximum Store Block Length */ __u8 version; /* Supported PCI Version */ + /* End of version 1 */ + __u8 reserved; + __u16 imaxstbl; /* Maximum Interpreted Store Block Length */ + /* End of version 2 */ }; /** |