diff options
41 files changed, 1171 insertions, 383 deletions
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 25ed956f9af1..6844a7550392 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -187,10 +187,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +static inline void __clean_dcache_guest_page(void *va, size_t size) { - void *va = page_address(pfn_to_page(pfn)); - /* * With FWB, we ensure that the guest always accesses memory using * cacheable attributes, and we don't have to clean to PoC when @@ -203,16 +201,13 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) kvm_flush_dcache_to_poc(va, size); } -static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, - unsigned long size) +static inline void __invalidate_icache_guest_page(void *va, size_t size) { if (icache_is_aliasing()) { /* any kind of VIPT cache */ __flush_icache_all(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ - void *va = page_address(pfn_to_page(pfn)); - invalidate_icache_range((unsigned long)va, (unsigned long)va + size); } diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c3674c47d48c..f004c0115d89 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -27,23 +27,29 @@ typedef u64 kvm_pte_t; /** * struct kvm_pgtable_mm_ops - Memory management callbacks. - * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter - * can be used by the walker to pass a memcache. The - * initial refcount of the page is 1. - * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The - * @size parameter is in bytes, and is rounded-up to the - * next page boundary. The resulting allocation is - * physically contiguous. - * @free_pages_exact: Free an exact number of memory pages previously - * allocated by zalloc_pages_exact. - * @get_page: Increment the refcount on a page. - * @put_page: Decrement the refcount on a page. When the refcount - * reaches 0 the page is automatically freed. - * @page_count: Return the refcount of a page. - * @phys_to_virt: Convert a physical address into a virtual address mapped - * in the current context. - * @virt_to_phys: Convert a virtual address mapped in the current context - * into a physical address. + * @zalloc_page: Allocate a single zeroed memory page. + * The @arg parameter can be used by the walker + * to pass a memcache. The initial refcount of + * the page is 1. + * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. + * The @size parameter is in bytes, and is rounded + * up to the next page boundary. The resulting + * allocation is physically contiguous. + * @free_pages_exact: Free an exact number of memory pages previously + * allocated by zalloc_pages_exact. + * @get_page: Increment the refcount on a page. + * @put_page: Decrement the refcount on a page. When the + * refcount reaches 0 the page is automatically + * freed. + * @page_count: Return the refcount of a page. + * @phys_to_virt: Convert a physical address into a virtual + * address mapped in the current context. + * @virt_to_phys: Convert a virtual address mapped in the current + * context into a physical address. + * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC + * for the specified memory address range. + * @icache_inval_pou: Invalidate the instruction cache to the PoU + * for the specified memory address range. */ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); @@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops { int (*page_count)(void *addr); void* (*phys_to_virt)(phys_addr_t phys); phys_addr_t (*virt_to_phys)(void *addr); + void (*dcache_clean_inval_poc)(void *addr, size_t size); + void (*icache_inval_pou)(void *addr, size_t size); }; /** diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 74e0699661e9..3df67c127489 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -9,6 +9,7 @@ #include <linux/kvm_host.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/uaccess.h> #include <clocksource/arm_arch_timer.h> @@ -973,36 +974,154 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } -int kvm_timer_hyp_init(bool has_gic) +static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) { - struct arch_timer_kvm_info *info; - int err; + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); - info = arch_timer_get_kvm_info(); - timecounter = &info->timecounter; + return 0; +} - if (!timecounter->cc) { - kvm_err("kvm_arch_timer: uninitialized timecounter\n"); - return -ENODEV; +static int timer_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool val) +{ + if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) + return irq_chip_set_parent_state(d, which, val); + + if (val) + irq_chip_mask_parent(d); + else + irq_chip_unmask_parent(d); + + return 0; +} + +static void timer_irq_eoi(struct irq_data *d) +{ + if (!irqd_is_forwarded_to_vcpu(d)) + irq_chip_eoi_parent(d); +} + +static void timer_irq_ack(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_ack) + d->chip->irq_ack(d); +} + +static struct irq_chip timer_chip = { + .name = "KVM", + .irq_ack = timer_irq_ack, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = timer_irq_eoi, + .irq_set_type = irq_chip_set_type_parent, + .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, + .irq_set_irqchip_state = timer_irq_set_irqchip_state, +}; + +static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + irq_hw_number_t hwirq = (uintptr_t)arg; + + return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + &timer_chip, NULL); +} + +static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ +} + +static const struct irq_domain_ops timer_domain_ops = { + .alloc = timer_irq_domain_alloc, + .free = timer_irq_domain_free, +}; + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; + +static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) +{ + *flags = irq_get_trigger_type(virq); + if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", + virq); + *flags = IRQF_TRIGGER_LOW; } +} - /* First, do the virtual EL1 timer irq */ +static int kvm_irq_init(struct arch_timer_kvm_info *info) +{ + struct irq_domain *domain = NULL; if (info->virtual_irq <= 0) { kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", info->virtual_irq); return -ENODEV; } + host_vtimer_irq = info->virtual_irq; + kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); + + if (kvm_vgic_global_state.no_hw_deactivation) { + struct fwnode_handle *fwnode; + struct irq_data *data; + + fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); + if (!fwnode) + return -ENOMEM; + + /* Assume both vtimer and ptimer in the same parent */ + data = irq_get_irq_data(host_vtimer_irq); + domain = irq_domain_create_hierarchy(data->domain, 0, + NR_KVM_TIMERS, fwnode, + &timer_domain_ops, NULL); + if (!domain) { + irq_domain_free_fwnode(fwnode); + return -ENOMEM; + } + + arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; + WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, + (void *)TIMER_VTIMER)); + } - host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); - if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && - host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", - host_vtimer_irq); - host_vtimer_irq_flags = IRQF_TRIGGER_LOW; + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); + + if (domain) + WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, + (void *)TIMER_PTIMER)); } + return 0; +} + +int kvm_timer_hyp_init(bool has_gic) +{ + struct arch_timer_kvm_info *info; + int err; + + info = arch_timer_get_kvm_info(); + timecounter = &info->timecounter; + + if (!timecounter->cc) { + kvm_err("kvm_arch_timer: uninitialized timecounter\n"); + return -ENODEV; + } + + err = kvm_irq_init(info); + if (err) + return err; + + /* First, do the virtual EL1 timer irq */ + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, "kvm guest vtimer", kvm_get_running_vcpus()); if (err) { @@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic) /* Now let's do the physical EL1 timer irq */ if (info->physical_irq > 0) { - host_ptimer_irq = info->physical_irq; - host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); - if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && - host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", - host_ptimer_irq); - host_ptimer_irq_flags = IRQF_TRIGGER_LOW; - } - err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, "kvm guest ptimer", kvm_get_running_vcpus()); if (err) { @@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, map.direct_vtimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); if (ret) return ret; @@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, map.direct_ptimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); } if (ret) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5f49df4ffdd8..9aa9b73475c9 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -76,6 +76,7 @@ el1_trap: b __guest_exit el1_irq: +el1_fiq: get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit @@ -131,7 +132,6 @@ SYM_CODE_END(\label) invalid_vector el2t_error_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid - invalid_vector el1_fiq_invalid .ltorg @@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_sync // Synchronous 64-bit EL1 valid_vect el1_irq // IRQ 64-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 + valid_vect el1_fiq // FIQ 64-bit EL1 valid_vect el1_error // Error 64-bit EL1 valid_vect el1_sync // Synchronous 32-bit EL1 valid_vect el1_irq // IRQ 32-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 + valid_vect el1_fiq // FIQ 32-bit EL1 valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 18a4494337bd..fb0f523d1492 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -7,7 +7,7 @@ #include <nvhe/memory.h> #include <nvhe/spinlock.h> -#define HYP_NO_ORDER UINT_MAX +#define HYP_NO_ORDER USHRT_MAX struct hyp_pool { /* @@ -19,48 +19,13 @@ struct hyp_pool { struct list_head free_area[MAX_ORDER]; phys_addr_t range_start; phys_addr_t range_end; - unsigned int max_order; + unsigned short max_order; }; -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - p->refcount++; - hyp_spin_unlock(&pool->lock); -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - int ret; - - hyp_spin_lock(&pool->lock); - p->refcount--; - ret = (p->refcount == 0); - hyp_spin_unlock(&pool->lock); - - return ret; -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - if (p->refcount) { - hyp_spin_unlock(&pool->lock); - BUG(); - } - p->refcount = 1; - hyp_spin_unlock(&pool->lock); -} - /* Allocation */ -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); -void hyp_get_page(void *addr); -void hyp_put_page(void *addr); +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void hyp_get_page(struct hyp_pool *pool, void *addr); +void hyp_put_page(struct hyp_pool *pool, void *addr); /* Used pages cannot be freed */ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 42d81ec739fa..9c227d87c36d 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -23,7 +23,7 @@ extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool); +int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); static __always_inline void __load_host_stage2(void) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index fd78bde939ee..592b7edb3edb 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,12 +7,9 @@ #include <linux/types.h> -struct hyp_pool; struct hyp_page { - unsigned int refcount; - unsigned int order; - struct hyp_pool *pool; - struct list_head node; + unsigned short refcount; + unsigned short order; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 0095f6289742..8ec3a5a7744b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void) return res; } -static inline unsigned long host_s2_mem_pgtable_pages(void) +static inline unsigned long host_s2_pgtable_pages(void) { + unsigned long res; + /* * Include an extra 16 pages to safely upper-bound the worst case of * concatenated pgds. */ - return __hyp_pgtable_total_pages() + 16; -} + res = __hyp_pgtable_total_pages() + 16; -static inline unsigned long host_s2_dev_pgtable_pages(void) -{ /* Allow 1 GiB for MMIO mappings */ - return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; } #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 4b60c0056c04..d938ce95d3bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -23,8 +23,7 @@ extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; -static struct hyp_pool host_s2_mem; -static struct hyp_pool host_s2_dev; +static struct hyp_pool host_s2_pool; /* * Copies of the host's CPU features registers holding sanitized values. @@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_mem, get_order(size)); + return hyp_alloc_pages(&host_s2_pool, get_order(size)); } static void *host_s2_zalloc_page(void *pool) @@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool) return hyp_alloc_pages(pool, 0); } -static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) +static void host_s2_get_page(void *addr) +{ + hyp_get_page(&host_s2_pool, addr); +} + +static void host_s2_put_page(void *addr) +{ + hyp_put_page(&host_s2_pool, addr); +} + +static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; int ret; - pfn = hyp_virt_to_pfn(mem_pgt_pool); - nr_pages = host_s2_mem_pgtable_pages(); - ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0); - if (ret) - return ret; - - pfn = hyp_virt_to_pfn(dev_pgt_pool); - nr_pages = host_s2_dev_pgtable_pages(); - ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0); + pfn = hyp_virt_to_pfn(pgt_pool_base); + nr_pages = host_s2_pgtable_pages(); + ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); if (ret) return ret; @@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = host_s2_get_page, + .put_page = host_s2_put_page, }; return 0; @@ -86,7 +89,7 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) +int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; int ret; @@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) prepare_host_vtcr(); hyp_spin_lock_init(&host_kvm.lock); - ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool); + ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; @@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end) } static inline int __host_stage2_idmap(u64 start, u64 end, - enum kvm_pgtable_prot prot, - struct hyp_pool *pool) + enum kvm_pgtable_prot prot) { return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, pool); + prot, &host_s2_pool); } static int host_stage2_idmap(u64 addr) @@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr) enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); - struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev; int ret; if (is_memory) @@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); - if (is_memory || ret != -ENOMEM) + ret = __host_stage2_idmap(range.start, range.end, prot); + if (ret != -ENOMEM) goto unlock; /* - * host_s2_mem has been provided with enough pages to cover all of - * memory with page granularity, so we should never hit the ENOMEM case. - * However, it is difficult to know how much of the MMIO range we will - * need to cover upfront, so we may need to 'recycle' the pages if we - * run out. + * The pool has been provided with enough pages to cover all of memory + * with page granularity, but it is difficult to know how much of the + * MMIO range we will need to cover upfront, so we may need to 'recycle' + * the pages if we run out. */ ret = host_stage2_unmap_dev_all(); if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); + ret = __host_stage2_idmap(range.start, range.end, prot); unlock: hyp_spin_unlock(&host_kvm.lock); @@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) hyp_spin_lock(&host_kvm.lock); ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_mem, pkvm_hyp_id); + &host_s2_pool, pkvm_hyp_id); hyp_spin_unlock(&host_kvm.lock); return ret != -EAGAIN ? ret : 0; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 237e03bf0cb1..41fc25bdfb34 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -32,7 +32,7 @@ u64 __hyp_vmemmap; */ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { phys_addr_t addr = hyp_page_to_phys(p); @@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, /* Find a buddy page currently available for allocation */ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); - if (!buddy || buddy->order != order || list_empty(&buddy->node)) + if (!buddy || buddy->order != order || buddy->refcount) return NULL; return buddy; } +/* + * Pages that are available for allocation are tracked in free-lists, so we use + * the pages themselves to store the list nodes to avoid wasting space. As the + * allocator always returns zeroed pages (which are zeroed on the hyp_put_page() + * path to optimize allocation speed), we also need to clean-up the list node in + * each page when we take it out of the list. + */ +static inline void page_remove_from_list(struct hyp_page *p) +{ + struct list_head *node = hyp_page_to_virt(p); + + __list_del_entry(node); + memset(node, 0, sizeof(*node)); +} + +static inline void page_add_to_list(struct hyp_page *p, struct list_head *head) +{ + struct list_head *node = hyp_page_to_virt(p); + + INIT_LIST_HEAD(node); + list_add_tail(node, head); +} + +static inline struct hyp_page *node_to_page(struct list_head *node) +{ + return hyp_virt_to_page(node); +} + static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { - unsigned int order = p->order; + unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); @@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool, break; /* Take the buddy out of its list, and coallesce with @p */ - list_del_init(&buddy->node); + page_remove_from_list(buddy); buddy->order = HYP_NO_ORDER; p = min(p, buddy); } /* Mark the new head, and insert it */ p->order = order; - list_add_tail(&p->node, &pool->free_area[order]); -} - -static void hyp_attach_page(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - __hyp_attach_page(pool, p); - hyp_spin_unlock(&pool->lock); + page_add_to_list(p, &pool->free_area[order]); } static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy; - list_del_init(&p->node); + page_remove_from_list(p); while (p->order > order) { /* * The buddy of order n - 1 currently has HYP_NO_ORDER as it @@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, p->order--; buddy = __find_buddy_nocheck(pool, p, p->order); buddy->order = p->order; - list_add_tail(&buddy->node, &pool->free_area[buddy->order]); + page_add_to_list(buddy, &pool->free_area[buddy->order]); } return p; } -void hyp_put_page(void *addr) +static inline void hyp_page_ref_inc(struct hyp_page *p) { - struct hyp_page *p = hyp_virt_to_page(addr); + BUG_ON(p->refcount == USHRT_MAX); + p->refcount++; +} +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + p->refcount--; + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} + +static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) +{ if (hyp_page_ref_dec_and_test(p)) - hyp_attach_page(p); + __hyp_attach_page(pool, p); +} + +/* + * Changes to the buddy tree and page refcounts must be done with the hyp_pool + * lock held. If a refcount change requires an update to the buddy tree (e.g. + * hyp_put_page()), both operations must be done within the same critical + * section to guarantee transient states (e.g. a page with null refcount but + * not yet attached to a free list) can't be observed by well-behaved readers. + */ +void hyp_put_page(struct hyp_pool *pool, void *addr) +{ + struct hyp_page *p = hyp_virt_to_page(addr); + + hyp_spin_lock(&pool->lock); + __hyp_put_page(pool, p); + hyp_spin_unlock(&pool->lock); } -void hyp_get_page(void *addr) +void hyp_get_page(struct hyp_pool *pool, void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); + hyp_spin_lock(&pool->lock); hyp_page_ref_inc(p); + hyp_spin_unlock(&pool->lock); } -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) { - unsigned int i = order; + unsigned short i = order; struct hyp_page *p; hyp_spin_lock(&pool->lock); @@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) } /* Extract it from the tree at the right order */ - p = list_first_entry(&pool->free_area[i], struct hyp_page, node); + p = node_to_page(pool->free_area[i].next); p = __hyp_extract_page(pool, p, order); - hyp_spin_unlock(&pool->lock); hyp_set_page_refcounted(p); + hyp_spin_unlock(&pool->lock); return hyp_page_to_virt(p); } @@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - memset(p, 0, sizeof(*p) * nr_pages); for (i = 0; i < nr_pages; i++) { - p[i].pool = pool; - INIT_LIST_HEAD(&p[i].node); + p[i].order = 0; + hyp_set_page_refcounted(&p[i]); } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) - __hyp_attach_page(pool, &p[i]); + __hyp_put_page(pool, &p[i]); return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a3d3a275344e..f834833ac921 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus; static void *vmemmap_base; static void *hyp_pgt_base; -static void *host_s2_mem_pgt_base; -static void *host_s2_dev_pgt_base; +static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static int divide_memory_pool(void *virt, unsigned long size) @@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!hyp_pgt_base) return -ENOMEM; - nr_pages = host_s2_mem_pgtable_pages(); - host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_mem_pgt_base) - return -ENOMEM; - - nr_pages = host_s2_dev_pgtable_pages(); - host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_dev_pgt_base) + nr_pages = host_s2_pgtable_pages(); + host_s2_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!host_s2_pgt_base) return -ENOMEM; return 0; @@ -143,6 +137,16 @@ static void *hyp_zalloc_hyp_page(void *arg) return hyp_alloc_pages(&hpool, 0); } +static void hpool_get_page(void *addr) +{ + hyp_get_page(&hpool, addr); +} + +static void hpool_put_page(void *addr) +{ + hyp_put_page(&hpool, addr); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -158,7 +162,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base); + ret = kvm_host_prepare_stage2(host_s2_pgt_base); if (ret) goto out; @@ -166,8 +170,8 @@ void __noreturn __pkvm_init_finalise(void) .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = hpool_get_page, + .put_page = hpool_put_page, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c37c1dc4feaf..72f1d8f50094 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, mm_ops->put_page(ptep); } +static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) +{ + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; + return memattr == KVM_S2_MEMATTR(pgt, NORMAL); +} + +static bool stage2_pte_executable(kvm_pte_t pte) +{ + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; + struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_block_mapping_supported(addr, end, phys, level)) @@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } + /* Perform CMOs before installation of the guest stage-2 PTE */ + if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), + granule); + + if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) + mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); + smp_store_release(ptep, new); if (stage2_pte_is_counted(new)) mm_ops->get_page(ptep); @@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) -{ - u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); -} - static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) @@ -861,10 +875,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) } struct stage2_attr_data { - kvm_pte_t attr_set; - kvm_pte_t attr_clr; - kvm_pte_t pte; - u32 level; + kvm_pte_t attr_set; + kvm_pte_t attr_clr; + kvm_pte_t pte; + u32 level; + struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, @@ -873,6 +888,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, { kvm_pte_t pte = *ptep; struct stage2_attr_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return 0; @@ -887,8 +903,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * but worst-case the access flag update gets lost and will be * set on the next access instead. */ - if (data->pte != pte) + if (data->pte != pte) { + /* + * Invalidate instruction cache before updating the guest + * stage-2 PTE if we are going to add executable permission. + */ + if (mm_ops->icache_inval_pou && + stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); WRITE_ONCE(*ptep, pte); + } return 0; } @@ -903,6 +928,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, + .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index 83ca23ac259b..d654921dd09b 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void) } hyp_mem_pages += hyp_s1_pgtable_pages(); - hyp_mem_pages += host_s2_mem_pgtable_pages(); - hyp_mem_pages += host_s2_dev_pgtable_pages(); + hyp_mem_pages += host_s2_pgtable_pages(); /* * The hyp_vmemmap needs to be backed by pages, but these pages diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..bf389dfc885d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys) return __va(phys); } +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(va, size); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(va, size); +} + /* * Unmapping vs dcache management: * @@ -432,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .page_count = kvm_host_page_count, .phys_to_virt = kvm_host_va, .virt_to_phys = kvm_host_pa, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, }; /** @@ -693,16 +705,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __clean_dcache_guest_page(pfn, size); -} - -static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __invalidate_icache_guest_page(pfn, size); -} - static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); @@ -822,6 +824,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) +{ + unsigned long pa; + + if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP)) + return huge_page_shift(hstate_vma(vma)); + + if (!(vma->vm_flags & VM_PFNMAP)) + return PAGE_SHIFT; + + VM_BUG_ON(is_vm_hugetlb_page(vma)); + + pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start); + +#ifndef __PAGETABLE_PMD_FOLDED + if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) && + ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start && + ALIGN(hva, PUD_SIZE) <= vma->vm_end) + return PUD_SHIFT; +#endif + + if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) && + ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start && + ALIGN(hva, PMD_SIZE) <= vma->vm_end) + return PMD_SHIFT; + + return PAGE_SHIFT; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -853,7 +884,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* Let's check if we will get back a huge page backed by hugetlbfs */ + /* + * Let's check if we will get back a huge page backed by hugetlbfs, or + * get block mapping for device MMIO region. + */ mmap_read_lock(current->mm); vma = find_vma_intersection(current->mm, hva, hva + 1); if (unlikely(!vma)) { @@ -862,15 +896,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma)) - vma_shift = huge_page_shift(hstate_vma(vma)); - else - vma_shift = PAGE_SHIFT; - - if (logging_active || - (vma->vm_flags & VM_PFNMAP)) { + /* + * logging_active is guaranteed to never be true for VM_PFNMAP + * memslots. + */ + if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; + } else { + vma_shift = get_vma_page_shift(vma, hva); } switch (vma_shift) { @@ -943,8 +977,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; if (kvm_is_device_pfn(pfn)) { + /* + * If the page was identified as device early by looking at + * the VMA flags, vma_pagesize is already representing the + * largest quantity we can map. If instead it was mapped + * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE + * and must not be upgraded. + * + * In both cases, we don't let transparent_hugepage_adjust() + * change things at the last minute. + */ device = true; - force_pte = true; } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -965,19 +1008,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !force_pte) + if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) vma_pagesize = transparent_hugepage_adjust(memslot, hva, &pfn, &fault_ipa); if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) - clean_dcache_guest_page(pfn, vma_pagesize); - - if (exec_fault) { + if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - invalidate_icache_guest_page(pfn, vma_pagesize); - } if (device) prot |= KVM_PGTABLE_PROT_DEVICE; @@ -1175,12 +1213,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(range->end - range->start != 1); /* - * We've moved a page around, probably through CoW, so let's treat it - * just like a translation fault and clean the cache to the PoC. - */ - clean_dcache_guest_page(pfn, PAGE_SIZE); - - /* + * We've moved a page around, probably through CoW, so let's treat + * it just like a translation fault and the map handler will clean + * the cache to the PoC. + * * The MMU notifiers will have unmapped a huge PMD before calling * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and * therefore we never need to clear out a huge PMD through this @@ -1346,7 +1382,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { hva_t hva = mem->userspace_addr; hva_t reg_end = hva + mem->memory_size; - bool writable = !(mem->flags & KVM_MEM_READONLY); int ret = 0; if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && @@ -1363,8 +1398,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and any holes - * between them, so iterate over all of them to find out if we can map - * any of them right now. + * between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -1375,51 +1409,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) break; - /* - * Take the intersection of this VMA with the memory region - */ - vm_start = max(hva, vma->vm_start); - vm_end = min(reg_end, vma->vm_end); - if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = mem->guest_phys_addr + - (vm_start - mem->userspace_addr); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, gpa, pa, - vm_end - vm_start, - writable); - if (ret) break; + } } - hva = vm_end; + hva = min(reg_end, vma->vm_end); } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - spin_lock(&kvm->mmu_lock); - if (ret) - unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); - else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) - stage2_flush_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); -out: mmap_read_unlock(current->mm); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 58cbda00e56d..340c51d87677 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +static struct gic_kvm_info *gic_kvm_info; + +void __init vgic_set_kvm_info(const struct gic_kvm_info *info) +{ + BUG_ON(gic_kvm_info != NULL); + gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL); + if (gic_kvm_info) + *gic_kvm_info = *info; +} + /** * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware * @@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void) */ int kvm_vgic_hyp_init(void) { - const struct gic_kvm_info *gic_kvm_info; + bool has_mask; int ret; - gic_kvm_info = gic_get_kvm_info(); if (!gic_kvm_info) return -ENODEV; - if (!gic_kvm_info->maint_irq) { + has_mask = !gic_kvm_info->no_maint_irq_mask; + + if (has_mask && !gic_kvm_info->maint_irq) { kvm_err("No vgic maintenance irq\n"); return -ENXIO; } + /* + * If we get one of these oddball non-GICs, taint the kernel, + * as we have no idea of how they *really* behave. + */ + if (gic_kvm_info->no_hw_deactivation) { + kvm_info("Non-architectural vgic, tainting kernel\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + kvm_vgic_global_state.no_hw_deactivation = true; + } + switch (gic_kvm_info->type) { case GIC_V2: ret = vgic_v2_probe(gic_kvm_info); @@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void) ret = -ENODEV; } + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + + kfree(gic_kvm_info); + gic_kvm_info = NULL; + if (ret) return ret; - kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + if (!has_mask) + return 0; + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 11934c2af2f4..2c580204f1dc 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (val & GICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } - if (!irq->line_level) + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->group) val |= GICH_LR_GROUP1; - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; /* diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 41ecf219c333..66004f61cd83 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (val & ICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } - if (!irq->line_level) + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= ICH_LR_HW; val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; /* diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 15b666200f0b..111bff47e471 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq) BUG_ON(!irq->hw); - if (irq->get_input_level) - return irq->get_input_level(irq->intid); + if (irq->ops && irq->ops->get_input_level) + return irq->ops->get_input_level(irq->intid); WARN_ON(irq_get_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING, @@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, /* @irq->irq_lock must be held */ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, unsigned int host_irq, - bool (*get_input_level)(int vindid)) + struct irq_ops *ops) { struct irq_desc *desc; struct irq_data *data; @@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->hw = true; irq->host_irq = host_irq; irq->hwintid = data->hwirq; - irq->get_input_level = get_input_level; + irq->ops = ops; return 0; } @@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) { irq->hw = false; irq->hwintid = 0; - irq->get_input_level = NULL; + irq->ops = NULL; } int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, bool (*get_input_level)(int vindid)) + u32 vintid, struct irq_ops *ops) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); unsigned long flags; @@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, BUG_ON(!irq); raw_spin_lock_irqsave(&irq->irq_lock, flags); - ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index c179e27062fd..b8c06bd8659e 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -50,6 +50,7 @@ #include <linux/cpuhotplug.h> #include <linux/io.h> #include <linux/irqchip.h> +#include <linux/irqchip/arm-vgic-info.h> #include <linux/irqdomain.h> #include <linux/limits.h> #include <linux/of_address.h> @@ -787,6 +788,12 @@ static int aic_init_cpu(unsigned int cpu) return 0; } +static struct gic_kvm_info vgic_info __initdata = { + .type = GIC_V3, + .no_maint_irq_mask = true, + .no_hw_deactivation = true, +}; + static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent) { int i; @@ -843,6 +850,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p "irqchip/apple-aic/ipi:starting", aic_init_cpu, NULL); + vgic_set_kvm_info(&vgic_info); + pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n", irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI); diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index f47b41dfd023..a610821c8ff2 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -12,19 +12,6 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock); -static const struct gic_kvm_info *gic_kvm_info; - -const struct gic_kvm_info *gic_get_kvm_info(void) -{ - return gic_kvm_info; -} - -void gic_set_kvm_info(const struct gic_kvm_info *info) -{ - BUG_ON(gic_kvm_info != NULL); - gic_kvm_info = info; -} - void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data) { diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index ccba8b0fe0f5..27e3d4ed4f32 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -28,6 +28,4 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data); -void gic_set_kvm_info(const struct gic_kvm_info *info); - #endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 37a23aa6de37..453fc425eede 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -103,7 +103,7 @@ EXPORT_SYMBOL(gic_nonsecure_priorities); /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ static refcount_t *ppi_nmi_refs; -static struct gic_kvm_info gic_v3_kvm_info; +static struct gic_kvm_info gic_v3_kvm_info __initdata; static DEFINE_PER_CPU(bool, has_rss); #define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4) @@ -1852,7 +1852,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; - gic_set_kvm_info(&gic_v3_kvm_info); + vgic_set_kvm_info(&gic_v3_kvm_info); } static int __init gic_of_init(struct device_node *node, struct device_node *parent) @@ -2168,7 +2168,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; - gic_set_kvm_info(&gic_v3_kvm_info); + vgic_set_kvm_info(&gic_v3_kvm_info); } static int __init diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index b1d9c22caf2e..2de9ec8ece0c 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -119,7 +119,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly; -static struct gic_kvm_info gic_v2_kvm_info; +static struct gic_kvm_info gic_v2_kvm_info __initdata; static DEFINE_PER_CPU(u32, sgi_intid); @@ -1451,7 +1451,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) return; if (static_branch_likely(&supports_deactivate_key)) - gic_set_kvm_info(&gic_v2_kvm_info); + vgic_set_kvm_info(&gic_v2_kvm_info); } int __init @@ -1618,7 +1618,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v2_kvm_info.maint_irq = irq; - gic_set_kvm_info(&gic_v2_kvm_info); + vgic_set_kvm_info(&gic_v2_kvm_info); } static int __init gic_v2_acpi_init(union acpi_subtable_headers *header, diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ec621180ef09..e602d848fc1a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -72,6 +72,9 @@ struct vgic_global { bool has_gicv4; bool has_gicv4_1; + /* Pseudo GICv3 from outer space */ + bool no_hw_deactivation; + /* GIC system register CPU interface */ struct static_key_false gicv3_cpuif; @@ -89,6 +92,26 @@ enum vgic_irq_config { VGIC_CONFIG_LEVEL }; +/* + * Per-irq ops overriding some common behavious. + * + * Always called in non-preemptible section and the functions can use + * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs. + */ +struct irq_ops { + /* Per interrupt flags for special-cased interrupts */ + unsigned long flags; + +#define VGIC_IRQ_SW_RESAMPLE BIT(0) /* Clear the active state for resampling */ + + /* + * Callback function pointer to in-kernel devices that can tell us the + * state of the input level of mapped level-triggered IRQ faster than + * peaking into the physical GIC. + */ + bool (*get_input_level)(int vintid); +}; + struct vgic_irq { raw_spinlock_t irq_lock; /* Protects the content of the struct */ struct list_head lpi_list; /* Used to link all LPIs together */ @@ -126,21 +149,17 @@ struct vgic_irq { u8 group; /* 0 == group 0, 1 == group 1 */ enum vgic_irq_config config; /* Level or edge */ - /* - * Callback function pointer to in-kernel devices that can tell us the - * state of the input level of mapped level-triggered IRQ faster than - * peaking into the physical GIC. - * - * Always called in non-preemptible section and the functions can use - * kvm_arm_get_running_vcpu() to get the vcpu pointer for private - * IRQs. - */ - bool (*get_input_level)(int vintid); + struct irq_ops *ops; void *owner; /* Opaque pointer to reserve an interrupt for in-kernel devices. */ }; +static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq) +{ + return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE); +} + struct vgic_register_region; struct vgic_its; @@ -352,7 +371,7 @@ void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, bool (*get_input_level)(int vindid)); + u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index fa8c0455c352..1177f3a1aed5 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -7,8 +7,7 @@ #ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H #define __LINUX_IRQCHIP_ARM_GIC_COMMON_H -#include <linux/types.h> -#include <linux/ioport.h> +#include <linux/irqchip/arm-vgic-info.h> #define GICD_INT_DEF_PRI 0xa0 #define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ @@ -16,28 +15,6 @@ (GICD_INT_DEF_PRI << 8) |\ GICD_INT_DEF_PRI) -enum gic_type { - GIC_V2, - GIC_V3, -}; - -struct gic_kvm_info { - /* GIC type */ - enum gic_type type; - /* Virtual CPU interface */ - struct resource vcpu; - /* Interrupt number */ - unsigned int maint_irq; - /* Virtual control interface */ - struct resource vctrl; - /* vlpi support */ - bool has_v4; - /* rvpeid support */ - bool has_v4_1; -}; - -const struct gic_kvm_info *gic_get_kvm_info(void); - struct irq_domain; struct fwnode_handle; int gicv2m_init(struct fwnode_handle *parent_handle, diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h new file mode 100644 index 000000000000..a75b2c7de69d --- /dev/null +++ b/include/linux/irqchip/arm-vgic-info.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * include/linux/irqchip/arm-vgic-info.h + * + * Copyright (C) 2016 ARM Limited, All Rights Reserved. + */ +#ifndef __LINUX_IRQCHIP_ARM_VGIC_INFO_H +#define __LINUX_IRQCHIP_ARM_VGIC_INFO_H + +#include <linux/types.h> +#include <linux/ioport.h> + +enum gic_type { + /* Full GICv2 */ + GIC_V2, + /* Full GICv3, optionally with v2 compat */ + GIC_V3, +}; + +struct gic_kvm_info { + /* GIC type */ + enum gic_type type; + /* Virtual CPU interface */ + struct resource vcpu; + /* Interrupt number */ + unsigned int maint_irq; + /* No interrupt mask, no need to use the above field */ + bool no_maint_irq_mask; + /* Virtual control interface */ + struct resource vctrl; + /* vlpi support */ + bool has_v4; + /* rvpeid support */ + bool has_v4_1; + /* Deactivation impared, subpar stuff */ + bool no_hw_deactivation; +}; + +#ifdef CONFIG_KVM +void vgic_set_kvm_info(const struct gic_kvm_info *info); +#else +static inline void vgic_set_kvm_info(const struct gic_kvm_info *info) {} +#endif + +#endif diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 524c857a049c..7e2c66155b06 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/get-reg-list-sve /aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index daaee1888b12..36e4ebcc82f0 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -35,7 +35,7 @@ endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S -LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c +LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test @@ -78,6 +78,7 @@ TEST_GEN_PROGS_x86_64 += memslot_perf_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c new file mode 100644 index 000000000000..e5e6c92b60da --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +#define VCPU_ID 0 + +#define MDSCR_KDE (1 << 13) +#define MDSCR_MDE (1 << 15) +#define MDSCR_SS (1 << 0) + +#define DBGBCR_LEN8 (0xff << 5) +#define DBGBCR_EXEC (0x0 << 3) +#define DBGBCR_EL1 (0x1 << 1) +#define DBGBCR_E (0x1 << 0) + +#define DBGWCR_LEN8 (0xff << 5) +#define DBGWCR_RD (0x1 << 3) +#define DBGWCR_WR (0x2 << 3) +#define DBGWCR_EL1 (0x1 << 1) +#define DBGWCR_E (0x1 << 0) + +#define SPSR_D (1 << 9) +#define SPSR_SS (1 << 21) + +extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start; +static volatile uint64_t sw_bp_addr, hw_bp_addr; +static volatile uint64_t wp_addr, wp_data_addr; +static volatile uint64_t svc_addr; +static volatile uint64_t ss_addr[4], ss_idx; +#define PC(v) ((uint64_t)&(v)) + +static void reset_debug_state(void) +{ + asm volatile("msr daifset, #8"); + + write_sysreg(osdlr_el1, 0); + write_sysreg(oslar_el1, 0); + isb(); + + write_sysreg(mdscr_el1, 0); + /* This test only uses the first bp and wp slot. */ + write_sysreg(dbgbvr0_el1, 0); + write_sysreg(dbgbcr0_el1, 0); + write_sysreg(dbgwcr0_el1, 0); + write_sysreg(dbgwvr0_el1, 0); + isb(); +} + +static void install_wp(uint64_t addr) +{ + uint32_t wcr; + uint32_t mdscr; + + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; + write_sysreg(dbgwcr0_el1, wcr); + write_sysreg(dbgwvr0_el1, addr); + isb(); + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static void install_hw_bp(uint64_t addr) +{ + uint32_t bcr; + uint32_t mdscr; + + bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; + write_sysreg(dbgbcr0_el1, bcr); + write_sysreg(dbgbvr0_el1, addr); + isb(); + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static void install_ss(void) +{ + uint32_t mdscr; + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static volatile char write_data; + +static void guest_code(void) +{ + GUEST_SYNC(0); + + /* Software-breakpoint */ + asm volatile("sw_bp: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); + + GUEST_SYNC(1); + + /* Hardware-breakpoint */ + reset_debug_state(); + install_hw_bp(PC(hw_bp)); + asm volatile("hw_bp: nop"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); + + GUEST_SYNC(2); + + /* Hardware-breakpoint + svc */ + reset_debug_state(); + install_hw_bp(PC(bp_svc)); + asm volatile("bp_svc: svc #0"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); + GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); + + GUEST_SYNC(3); + + /* Hardware-breakpoint + software-breakpoint */ + reset_debug_state(); + install_hw_bp(PC(bp_brk)); + asm volatile("bp_brk: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); + + GUEST_SYNC(4); + + /* Watchpoint */ + reset_debug_state(); + install_wp(PC(write_data)); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + + GUEST_SYNC(5); + + /* Single-step */ + reset_debug_state(); + install_ss(); + ss_idx = 0; + asm volatile("ss_start:\n" + "mrs x0, esr_el1\n" + "add x0, x0, #1\n" + "msr daifset, #8\n" + : : : "x0"); + GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start)); + GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); + GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); + + GUEST_DONE(); +} + +static void guest_sw_bp_handler(struct ex_regs *regs) +{ + sw_bp_addr = regs->pc; + regs->pc += 4; +} + +static void guest_hw_bp_handler(struct ex_regs *regs) +{ + hw_bp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_wp_handler(struct ex_regs *regs) +{ + wp_data_addr = read_sysreg(far_el1); + wp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_ss_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(ss_idx < 4, ss_idx); + ss_addr[ss_idx++] = regs->pc; + regs->pstate |= SPSR_SS; +} + +static void guest_svc_handler(struct ex_regs *regs) +{ + svc_addr = regs->pc; +} + +static int debug_version(struct kvm_vm *vm) +{ + uint64_t id_aa64dfr0; + + get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0); + return id_aa64dfr0 & 0xf; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct ucall uc; + int stage; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + ucall_init(vm, NULL); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + if (debug_version(vm) < 6) { + print_skip("Armv8 debug architecture not supported."); + kvm_vm_free(vm); + exit(KSFT_SKIP); + } + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_BRK_INS, guest_sw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_WP_CURRENT, guest_wp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_SSTEP_CURRENT, guest_ss_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_SVC64, guest_svc_handler); + + for (stage = 0; stage < 7; stage++) { + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Stage %d: Unexpected sync ucall, got %lx", + stage, (ulong)uc.args[1]); + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index b7fa0c8551db..27dc5c2e56b9 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -8,16 +8,20 @@ #define SELFTEST_KVM_PROCESSOR_H #include "kvm_util.h" +#include <linux/stringify.h> #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) -#define CPACR_EL1 3, 0, 1, 0, 2 -#define TCR_EL1 3, 0, 2, 0, 2 -#define MAIR_EL1 3, 0, 10, 2, 0 -#define TTBR0_EL1 3, 0, 2, 0, 0 -#define SCTLR_EL1 3, 0, 1, 0, 0 +#define CPACR_EL1 3, 0, 1, 0, 2 +#define TCR_EL1 3, 0, 2, 0, 2 +#define MAIR_EL1 3, 0, 10, 2, 0 +#define TTBR0_EL1 3, 0, 2, 0, 0 +#define SCTLR_EL1 3, 0, 1, 0, 0 +#define VBAR_EL1 3, 0, 12, 0, 0 + +#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0 /* * Default MAIR @@ -56,4 +60,73 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init, void *guest_code); +struct ex_regs { + u64 regs[31]; + u64 sp; + u64 pc; + u64 pstate; +}; + +#define VECTOR_NUM 16 + +enum { + VECTOR_SYNC_CURRENT_SP0, + VECTOR_IRQ_CURRENT_SP0, + VECTOR_FIQ_CURRENT_SP0, + VECTOR_ERROR_CURRENT_SP0, + + VECTOR_SYNC_CURRENT, + VECTOR_IRQ_CURRENT, + VECTOR_FIQ_CURRENT, + VECTOR_ERROR_CURRENT, + + VECTOR_SYNC_LOWER_64, + VECTOR_IRQ_LOWER_64, + VECTOR_FIQ_LOWER_64, + VECTOR_ERROR_LOWER_64, + + VECTOR_SYNC_LOWER_32, + VECTOR_IRQ_LOWER_32, + VECTOR_FIQ_LOWER_32, + VECTOR_ERROR_LOWER_32, +}; + +#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \ + (v) == VECTOR_SYNC_CURRENT || \ + (v) == VECTOR_SYNC_LOWER_64 || \ + (v) == VECTOR_SYNC_LOWER_32) + +#define ESR_EC_NUM 64 +#define ESR_EC_SHIFT 26 +#define ESR_EC_MASK (ESR_EC_NUM - 1) + +#define ESR_EC_SVC64 0x15 +#define ESR_EC_HW_BP_CURRENT 0x31 +#define ESR_EC_SSTEP_CURRENT 0x33 +#define ESR_EC_WP_CURRENT 0x35 +#define ESR_EC_BRK_INS 0x3c + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); + +typedef void(*handler_fn)(struct ex_regs *); +void vm_install_exception_handler(struct kvm_vm *vm, + int vector, handler_fn handler); +void vm_install_sync_handler(struct kvm_vm *vm, + int vector, int ec, handler_fn handler); + +#define write_sysreg(reg, val) \ +({ \ + u64 __val = (u64)(val); \ + asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \ +}) + +#define read_sysreg(reg) \ +({ u64 val; \ + asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\ + val; \ +}) + +#define isb() asm volatile("isb" : : : "memory") + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index fcd8e3855111..ce49e22843d8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -349,6 +349,7 @@ enum { UCALL_SYNC, UCALL_ABORT, UCALL_DONE, + UCALL_UNHANDLED, }; #define UCALL_MAX_ARGS 6 @@ -367,26 +368,28 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) #define GUEST_DONE() ucall(UCALL_DONE, 0) -#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, 2 + _nargs, \ - "Failed guest assert: " \ - #_condition, __LINE__, _args); \ +#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, 2 + _nargs, \ + "Failed guest assert: " \ + _condstr, __LINE__, _args); \ } while (0) #define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT((_condition), 0, 0) + __GUEST_ASSERT(_condition, #_condition, 0, 0) #define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT((_condition), 1, (arg1)) + __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) #define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT((_condition), 2, (arg1), (arg2)) + __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3)) + __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4)) + __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) + +#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0b30b4e15c38..92a62c6999bc 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -53,8 +53,6 @@ #define CPUID_PKU (1ul << 3) #define CPUID_LA57 (1ul << 16) -#define UNEXPECTED_VECTOR_PORT 0xfff0u - /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@ -391,7 +389,7 @@ struct ex_regs { void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); -void vm_handle_exception(struct kvm_vm *vm, int vector, +void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); /* diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S new file mode 100644 index 000000000000..0e443eadfac6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +.macro save_registers + add sp, sp, #-16 * 17 + + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + + /* + * This stores sp_el1 into ex_regs.sp so exception handlers can "look" + * at it. It will _not_ be used to restore the sp on return from the + * exception so handlers can not update it. + */ + add x1, sp, #16 * 17 + stp x30, x1, [sp, #16 * 15] /* x30, SP */ + + mrs x1, elr_el1 + mrs x2, spsr_el1 + stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ +.endm + +.macro restore_registers + ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ + msr elr_el1, x1 + msr spsr_el1, x2 + + /* sp is not restored */ + ldp x30, xzr, [sp, #16 * 15] /* x30, SP */ + + ldp x28, x29, [sp, #16 * 14] + ldp x26, x27, [sp, #16 * 13] + ldp x24, x25, [sp, #16 * 12] + ldp x22, x23, [sp, #16 * 11] + ldp x20, x21, [sp, #16 * 10] + ldp x18, x19, [sp, #16 * 9] + ldp x16, x17, [sp, #16 * 8] + ldp x14, x15, [sp, #16 * 7] + ldp x12, x13, [sp, #16 * 6] + ldp x10, x11, [sp, #16 * 5] + ldp x8, x9, [sp, #16 * 4] + ldp x6, x7, [sp, #16 * 3] + ldp x4, x5, [sp, #16 * 2] + ldp x2, x3, [sp, #16 * 1] + ldp x0, x1, [sp, #16 * 0] + + add sp, sp, #16 * 17 + + eret +.endm + +.pushsection ".entry.text", "ax" +.balign 0x800 +.global vectors +vectors: +.popsection + +.set vector, 0 + +/* + * Build an exception handler for vector and append a jump to it into + * vectors (while making sure that it's 0x80 aligned). + */ +.macro HANDLER, label +handler_\label: + save_registers + mov x0, sp + mov x1, #vector + bl route_exception + restore_registers + +.pushsection ".entry.text", "ax" +.balign 0x80 + b handler_\label +.popsection + +.set vector, vector + 1 +.endm + +.macro HANDLER_INVALID +.pushsection ".entry.text", "ax" +.balign 0x80 +/* This will abort so no need to save and restore registers. */ + mov x0, #vector + mov x1, #0 /* ec */ + mov x2, #0 /* valid_ec */ + b kvm_exit_unexpected_exception +.popsection + +.set vector, vector + 1 +.endm + +/* + * Caution: be sure to not add anything between the declaration of vectors + * above and these macro calls that will build the vectors table below it. + */ + HANDLER_INVALID // Synchronous EL1t + HANDLER_INVALID // IRQ EL1t + HANDLER_INVALID // FIQ EL1t + HANDLER_INVALID // Error EL1t + + HANDLER el1h_sync // Synchronous EL1h + HANDLER el1h_irq // IRQ EL1h + HANDLER el1h_fiq // FIQ EL1h + HANDLER el1h_error // Error EL1h + + HANDLER el0_sync_64 // Synchronous 64-bit EL0 + HANDLER el0_irq_64 // IRQ 64-bit EL0 + HANDLER el0_fiq_64 // FIQ 64-bit EL0 + HANDLER el0_error_64 // Error 64-bit EL0 + + HANDLER el0_sync_32 // Synchronous 32-bit EL0 + HANDLER el0_irq_32 // IRQ 32-bit EL0 + HANDLER el0_fiq_32 // FIQ 32-bit EL0 + HANDLER el0_error_32 // Error 32-bit EL0 diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index cee92d477dc0..48b55c93f858 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -6,6 +6,7 @@ */ #include <linux/compiler.h> +#include <assert.h> #include "kvm_util.h" #include "../kvm_util_internal.h" @@ -14,6 +15,8 @@ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 +static vm_vaddr_t exception_handlers; + static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { return (v + vm->page_size) & ~(vm->page_size - 1); @@ -334,6 +337,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } +void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) +{ + ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); + while (1) + ; +} + void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) { + struct ucall uc; + + if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED) + return; + + if (uc.args[2]) /* valid_ec */ { + assert(VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", + uc.args[0], uc.args[1]); + } else { + assert(!VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx)", + uc.args[0]); + } +} + +struct handlers { + handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; +}; + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + extern char vectors; + + set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors); +} + +void route_exception(struct ex_regs *regs, int vector) +{ + struct handlers *handlers = (struct handlers *)exception_handlers; + bool valid_ec; + int ec = 0; + + switch (vector) { + case VECTOR_SYNC_CURRENT: + case VECTOR_SYNC_LOWER_64: + ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + valid_ec = true; + break; + case VECTOR_IRQ_CURRENT: + case VECTOR_IRQ_LOWER_64: + case VECTOR_FIQ_CURRENT: + case VECTOR_FIQ_LOWER_64: + case VECTOR_ERROR_CURRENT: + case VECTOR_ERROR_LOWER_64: + ec = 0; + valid_ec = false; + break; + default: + valid_ec = false; + goto unexpected_exception; + } + + if (handlers && handlers->exception_handlers[vector][ec]) + return handlers->exception_handlers[vector][ec](regs); + +unexpected_exception: + kvm_exit_unexpected_exception(vector, ec, valid_ec); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, 0, 0); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + assert(ec < ESR_EC_NUM); + handlers->exception_handlers[vector][ec] = handler; +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(!VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector][0] = handler; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index efe235044421..a217515a9bc2 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1201,7 +1201,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, void kvm_exit_unexpected_vector(uint32_t value) { - outl(UNEXPECTED_VECTOR_PORT, value); + ucall(UCALL_UNHANDLED, 1, value); } void route_exception(struct ex_regs *regs) @@ -1244,8 +1244,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } -void vm_handle_exception(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) { vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); @@ -1254,16 +1254,13 @@ void vm_handle_exception(struct kvm_vm *vm, int vector, void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) { - if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO - && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT - && vcpu_state(vm, vcpuid)->io.size == 4) { - /* Grab pointer to io data */ - uint32_t *data = (void *)vcpu_state(vm, vcpuid) - + vcpu_state(vm, vcpuid)->io.data_offset; - - TEST_ASSERT(false, - "Unexpected vectored event in guest (vector:0x%x)", - *data); + struct ucall uc; + + if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) { + uint64_t vector = uc.args[0]; + + TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", + vector); } } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 63096cea26c6..0864b2e3fd9e 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -154,8 +154,8 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); - vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); pr_info("Running L1 which uses EVMCS to run L2\n"); diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 732b244d6956..04ed975662c9 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -227,7 +227,7 @@ int main(void) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); enter_guest(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index d672f0a473f8..fc03a150278d 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -24,6 +24,10 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) +struct ucall uc_none = { + .cmd = UCALL_NONE, +}; + /* * ucall is embedded here to protect against compiler reshuffling registers * before calling a function. In this test we only need to get KVM_EXIT_IO @@ -34,7 +38,8 @@ void guest_code(void) asm volatile("1: in %[port], %%al\n" "add $0x1, %%rbx\n" "jmp 1b" - : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx"); + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none) + : "rax", "rbx"); } static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index e357d8e222d4..5a6a662f2e59 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -18,15 +18,6 @@ #define rounded_rdmsr(x) ROUND(rdmsr(x)) #define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x)) -#define GUEST_ASSERT_EQ(a, b) do { \ - __typeof(a) _a = (a); \ - __typeof(b) _b = (b); \ - if (_a != _b) \ - ucall(UCALL_ABORT, 4, \ - "Failed guest assert: " \ - #a " == " #b, __LINE__, _a, _b); \ - } while(0) - static void guest_code(void) { u64 val = 0; diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index 72c0d0797522..e3e20e8848d0 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) { vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); /* Process guest code userspace exits. */ run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); @@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) { run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); - vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); run_guest(vm); - vm_handle_exception(vm, UD_VECTOR, NULL); + vm_install_exception_handler(vm, UD_VECTOR, NULL); if (process_ucall(vm) != UCALL_DONE) { - vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); /* Process emulated rdmsr and wrmsr instructions. */ run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 2f964cdc273c..ed27269a01bb 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -462,7 +462,7 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); - vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler); + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0); |