aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-04 14:59:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-04 14:59:54 -0700
commit7c5c3a6177fa9646884114fc7f2e970b0bc50dc9 (patch)
tree956857522574ae7cb07d2227dc16e53d7e9e00e7 /include
parentf0a892f599c46af673e47418c47c15e69a7b67f4 (diff)
parent281106f938d3daaea6f8b6723a8217a2a1ef6936 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "Quite a large pull request due to a selftest API overhaul and some patches that had come in too late for 5.19. ARM: - Unwinder implementations for both nVHE modes (classic and protected), complete with an overflow stack - Rework of the sysreg access from userspace, with a complete rewrite of the vgic-v3 view to allign with the rest of the infrastructure - Disagregation of the vcpu flags in separate sets to better track their use model. - A fix for the GICv2-on-v3 selftest - A small set of cosmetic fixes RISC-V: - Track ISA extensions used by Guest using bitmap - Added system instruction emulation framework - Added CSR emulation framework - Added gfp_custom flag in struct kvm_mmu_memory_cache - Added G-stage ioremap() and iounmap() functions - Added support for Svpbmt inside Guest s390: - add an interface to provide a hypervisor dump for secure guests - improve selftests to use TAP interface - enable interpretive execution of zPCI instructions (for PCI passthrough) - First part of deferred teardown - CPU Topology - PV attestation - Minor fixes x86: - Permit guests to ignore single-bit ECC errors - Intel IPI virtualization - Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS - PEBS virtualization - Simplify PMU emulation by just using PERF_TYPE_RAW events - More accurate event reinjection on SVM (avoid retrying instructions) - Allow getting/setting the state of the speaker port data bit - Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent - "Notify" VM exit (detect microarchitectural hangs) for Intel - Use try_cmpxchg64 instead of cmpxchg64 - Ignore benign host accesses to PMU MSRs when PMU is disabled - Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior - Allow NX huge page mitigation to be disabled on a per-vm basis - Port eager page splitting to shadow MMU as well - Enable CMCI capability by default and handle injected UCNA errors - Expose pid of vcpu threads in debugfs - x2AVIC support for AMD - cleanup PIO emulation - Fixes for LLDT/LTR emulation - Don't require refcounted "struct page" to create huge SPTEs - Miscellaneous cleanups: - MCE MSR emulation - Use separate namespaces for guest PTEs and shadow PTEs bitmasks - PIO emulation - Reorganize rmap API, mostly around rmap destruction - Do not workaround very old KVM bugs for L0 that runs with nesting enabled - new selftests API for CPUID Generic: - Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache - new selftests API using struct kvm_vcpu instead of a (vm, id) tuple" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (606 commits) selftests: kvm: set rax before vmcall selftests: KVM: Add exponent check for boolean stats selftests: KVM: Provide descriptive assertions in kvm_binary_stats_test selftests: KVM: Check stat name before other fields KVM: x86/mmu: remove unused variable RISC-V: KVM: Add support for Svpbmt inside Guest/VM RISC-V: KVM: Use PAGE_KERNEL_IO in kvm_riscv_gstage_ioremap() RISC-V: KVM: Add G-stage ioremap() and iounmap() functions KVM: Add gfp_custom flag in struct kvm_mmu_memory_cache RISC-V: KVM: Add extensible CSR emulation framework RISC-V: KVM: Add extensible system instruction emulation framework RISC-V: KVM: Factor-out instruction emulation into separate sources RISC-V: KVM: move preempt_disable() call in kvm_arch_vcpu_ioctl_run RISC-V: KVM: Make kvm_riscv_guest_timer_init a void function RISC-V: KVM: Fix variable spelling mistake RISC-V: KVM: Improve ISA extension by using a bitmap KVM, x86/mmu: Fix the comment around kvm_tdp_mmu_zap_leafs() KVM: SVM: Dump Virtual Machine Save Area (VMSA) to klog KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT KVM: x86: Do not block APIC write for non ICR registers ...
Diffstat (limited to 'include')
-rw-r--r--include/kvm/arm_vgic.h2
-rw-r--r--include/linux/kvm_host.h20
-rw-r--r--include/linux/kvm_types.h9
-rw-r--r--include/linux/sched/user.h3
-rw-r--r--include/linux/vfio_pci_core.h12
-rw-r--r--include/uapi/linux/kvm.h108
-rw-r--r--include/uapi/linux/vfio_zdev.h7
7 files changed, 142 insertions, 19 deletions
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 2d8f2e90edc2..4df9e73a8bb5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -364,7 +364,7 @@ struct vgic_cpu {
extern struct static_key_false vgic_v2_cpuif_trap;
extern struct static_key_false vgic_v3_cpuif_trap;
-int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
+int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr);
void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
int kvm_vgic_create(struct kvm *kvm, u32 type);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 90a45ef7203b..1c480b1821e1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -907,11 +907,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
return NULL;
}
-static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
-{
- return vcpu->vcpu_idx;
-}
-
void kvm_destroy_vcpus(struct kvm *kvm);
void vcpu_load(struct kvm_vcpu *vcpu);
@@ -1139,7 +1134,6 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
bool *writable);
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
-void kvm_set_page_accessed(struct page *page);
kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
@@ -1231,7 +1225,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
@@ -1358,6 +1351,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
+int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min);
int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc);
void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
@@ -1436,6 +1430,8 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
+#else
+static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
#endif
int kvm_arch_hardware_enable(void);
@@ -1572,8 +1568,8 @@ void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
-bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
-bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
+struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
+bool kvm_is_zone_device_page(struct page *page);
struct kvm_irq_ack_notifier {
struct hlist_node link;
@@ -1719,12 +1715,6 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
return (hpa_t)pfn << PAGE_SHIFT;
}
-static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu,
- gpa_t gpa)
-{
- return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa));
-}
-
static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
{
unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index ac1ebb37a0ff..3ca3db020e0e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -19,6 +19,7 @@ struct kvm_memslots;
enum kvm_mr_change;
#include <linux/bits.h>
+#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/spinlock_types.h>
@@ -69,6 +70,7 @@ struct gfn_to_pfn_cache {
struct kvm_vcpu *vcpu;
struct list_head list;
rwlock_t lock;
+ struct mutex refresh_lock;
void *khva;
kvm_pfn_t pfn;
enum pfn_cache_usage usage;
@@ -83,12 +85,17 @@ struct gfn_to_pfn_cache {
* MMU flows is problematic, as is triggering reclaim, I/O, etc... while
* holding MMU locks. Note, these caches act more like prefetch buffers than
* classical caches, i.e. objects are not returned to the cache on being freed.
+ *
+ * The @capacity field and @objects array are lazily initialized when the cache
+ * is topped up (__kvm_mmu_topup_memory_cache()).
*/
struct kvm_mmu_memory_cache {
int nobjs;
gfp_t gfp_zero;
+ gfp_t gfp_custom;
struct kmem_cache *kmem_cache;
- void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE];
+ int capacity;
+ void **objects;
};
#endif
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 00ed419dd464..f054d0360a75 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -24,7 +24,8 @@ struct user_struct {
kuid_t uid;
#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
- defined(CONFIG_NET) || defined(CONFIG_IO_URING)
+ defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \
+ defined(CONFIG_VFIO_PCI_ZDEV_KVM)
atomic_long_t locked_vm;
#endif
#ifdef CONFIG_WATCH_QUEUE
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 23c176d4b073..d5d9e17f0156 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -206,15 +206,25 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
}
#endif
-#ifdef CONFIG_S390
+#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
struct vfio_info_cap *caps);
+int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev);
+void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev);
#else
static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
struct vfio_info_cap *caps)
{
return -ENODEV;
}
+
+static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
+{
+ return 0;
+}
+
+static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
+{}
#endif
/* Will be exported for vfio pci drivers usage */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cb6e3846d27b..eed0315a77a6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -270,6 +270,8 @@ struct kvm_xen_exit {
#define KVM_EXIT_X86_BUS_LOCK 33
#define KVM_EXIT_XEN 34
#define KVM_EXIT_RISCV_SBI 35
+#define KVM_EXIT_RISCV_CSR 36
+#define KVM_EXIT_NOTIFY 37
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -496,6 +498,18 @@ struct kvm_run {
unsigned long args[6];
unsigned long ret[2];
} riscv_sbi;
+ /* KVM_EXIT_RISCV_CSR */
+ struct {
+ unsigned long csr_num;
+ unsigned long new_value;
+ unsigned long write_mask;
+ unsigned long ret_value;
+ } riscv_csr;
+ /* KVM_EXIT_NOTIFY */
+ struct {
+#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
+ __u32 flags;
+ } notify;
/* Fix the size of the union. */
char padding[256];
};
@@ -1157,6 +1171,12 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_TSC_CONTROL 214
#define KVM_CAP_SYSTEM_EVENT_DATA 215
#define KVM_CAP_ARM_SYSTEM_SUSPEND 216
+#define KVM_CAP_S390_PROTECTED_DUMP 217
+#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218
+#define KVM_CAP_X86_NOTIFY_VMEXIT 219
+#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
+#define KVM_CAP_S390_ZPCI_OP 221
+#define KVM_CAP_S390_CPU_TOPOLOGY 222
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1660,6 +1680,55 @@ struct kvm_s390_pv_unp {
__u64 tweak;
};
+enum pv_cmd_dmp_id {
+ KVM_PV_DUMP_INIT,
+ KVM_PV_DUMP_CONFIG_STOR_STATE,
+ KVM_PV_DUMP_COMPLETE,
+ KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+ __u64 subcmd;
+ __u64 buff_addr;
+ __u64 buff_len;
+ __u64 gaddr; /* For dump storage state */
+ __u64 reserved[4];
+};
+
+enum pv_cmd_info_id {
+ KVM_PV_INFO_VM,
+ KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+ __u64 dump_cpu_buffer_len;
+ __u64 dump_config_mem_buffer_per_1m;
+ __u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+ __u64 inst_calls_list[4];
+ __u64 max_cpus;
+ __u64 max_guests;
+ __u64 max_guest_addr;
+ __u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+ __u32 id;
+ __u32 len_max;
+ __u32 len_written;
+ __u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+ struct kvm_s390_pv_info_header header;
+ union {
+ struct kvm_s390_pv_info_dump dump;
+ struct kvm_s390_pv_info_vm vm;
+ };
+};
+
enum pv_cmd_id {
KVM_PV_ENABLE,
KVM_PV_DISABLE,
@@ -1668,6 +1737,8 @@ enum pv_cmd_id {
KVM_PV_VERIFY,
KVM_PV_PREP_RESET,
KVM_PV_UNSHARE_ALL,
+ KVM_PV_INFO,
+ KVM_PV_DUMP,
};
struct kvm_pv_cmd {
@@ -2119,4 +2190,41 @@ struct kvm_stats_desc {
/* Available with KVM_CAP_XSAVE2 */
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+/* Available with KVM_CAP_S390_PROTECTED_DUMP */
+#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd)
+
+/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */
+#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0)
+#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1)
+
+/* Available with KVM_CAP_S390_ZPCI_OP */
+#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op)
+
+struct kvm_s390_zpci_op {
+ /* in */
+ __u32 fh; /* target device */
+ __u8 op; /* operation to perform */
+ __u8 pad[3];
+ union {
+ /* for KVM_S390_ZPCIOP_REG_AEN */
+ struct {
+ __u64 ibv; /* Guest addr of interrupt bit vector */
+ __u64 sb; /* Guest addr of summary bit */
+ __u32 flags;
+ __u32 noi; /* Number of interrupts */
+ __u8 isc; /* Guest interrupt subclass */
+ __u8 sbo; /* Offset of guest summary bit vector */
+ __u16 pad;
+ } reg_aen;
+ __u64 reserved[8];
+ } u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN 0
+#define KVM_S390_ZPCIOP_DEREG_AEN 1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
+
#endif /* __LINUX_KVM_H */
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
index b4309397b6b2..77f2aff1f27e 100644
--- a/include/uapi/linux/vfio_zdev.h
+++ b/include/uapi/linux/vfio_zdev.h
@@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base {
__u16 fmb_length; /* Measurement Block Length (in bytes) */
__u8 pft; /* PCI Function Type */
__u8 gid; /* PCI function group ID */
+ /* End of version 1 */
+ __u32 fh; /* PCI function handle */
+ /* End of version 2 */
};
/**
@@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group {
__u16 noi; /* Maximum number of MSIs */
__u16 maxstbl; /* Maximum Store Block Length */
__u8 version; /* Supported PCI Version */
+ /* End of version 1 */
+ __u8 reserved;
+ __u16 imaxstbl; /* Maximum Interpreted Store Block Length */
+ /* End of version 2 */
};
/**