aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/kvm/arch_timer.c162
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S6
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h45
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/memory.h7
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c60
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c112
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c30
-rw-r--r--arch/arm64/kvm/hyp/reserved_mem.c3
-rw-r--r--arch/arm64/kvm/mmu.c99
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c36
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic.c14
-rw-r--r--drivers/irqchip/irq-apple-aic.c9
-rw-r--r--drivers/irqchip/irq-gic-common.c13
-rw-r--r--drivers/irqchip/irq-gic-common.h2
-rw-r--r--drivers/irqchip/irq-gic-v3.c6
-rw-r--r--drivers/irqchip/irq-gic.c6
-rw-r--r--include/kvm/arm_vgic.h41
-rw-r--r--include/linux/irqchip/arm-gic-common.h25
-rw-r--r--include/linux/irqchip/arm-vgic-info.h45
23 files changed, 498 insertions, 276 deletions
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 74e0699661e9..3df67c127489 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -9,6 +9,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/uaccess.h>
#include <clocksource/arm_arch_timer.h>
@@ -973,36 +974,154 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
return 0;
}
-int kvm_timer_hyp_init(bool has_gic)
+static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
- struct arch_timer_kvm_info *info;
- int err;
+ if (vcpu)
+ irqd_set_forwarded_to_vcpu(d);
+ else
+ irqd_clr_forwarded_to_vcpu(d);
- info = arch_timer_get_kvm_info();
- timecounter = &info->timecounter;
+ return 0;
+}
- if (!timecounter->cc) {
- kvm_err("kvm_arch_timer: uninitialized timecounter\n");
- return -ENODEV;
+static int timer_irq_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which, bool val)
+{
+ if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
+ return irq_chip_set_parent_state(d, which, val);
+
+ if (val)
+ irq_chip_mask_parent(d);
+ else
+ irq_chip_unmask_parent(d);
+
+ return 0;
+}
+
+static void timer_irq_eoi(struct irq_data *d)
+{
+ if (!irqd_is_forwarded_to_vcpu(d))
+ irq_chip_eoi_parent(d);
+}
+
+static void timer_irq_ack(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_ack)
+ d->chip->irq_ack(d);
+}
+
+static struct irq_chip timer_chip = {
+ .name = "KVM",
+ .irq_ack = timer_irq_ack,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = timer_irq_eoi,
+ .irq_set_type = irq_chip_set_type_parent,
+ .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
+ .irq_set_irqchip_state = timer_irq_set_irqchip_state,
+};
+
+static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ irq_hw_number_t hwirq = (uintptr_t)arg;
+
+ return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+ &timer_chip, NULL);
+}
+
+static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+}
+
+static const struct irq_domain_ops timer_domain_ops = {
+ .alloc = timer_irq_domain_alloc,
+ .free = timer_irq_domain_free,
+};
+
+static struct irq_ops arch_timer_irq_ops = {
+ .get_input_level = kvm_arch_timer_get_input_level,
+};
+
+static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
+{
+ *flags = irq_get_trigger_type(virq);
+ if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
+ kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
+ virq);
+ *flags = IRQF_TRIGGER_LOW;
}
+}
- /* First, do the virtual EL1 timer irq */
+static int kvm_irq_init(struct arch_timer_kvm_info *info)
+{
+ struct irq_domain *domain = NULL;
if (info->virtual_irq <= 0) {
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
info->virtual_irq);
return -ENODEV;
}
+
host_vtimer_irq = info->virtual_irq;
+ kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
+
+ if (kvm_vgic_global_state.no_hw_deactivation) {
+ struct fwnode_handle *fwnode;
+ struct irq_data *data;
+
+ fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
+ if (!fwnode)
+ return -ENOMEM;
+
+ /* Assume both vtimer and ptimer in the same parent */
+ data = irq_get_irq_data(host_vtimer_irq);
+ domain = irq_domain_create_hierarchy(data->domain, 0,
+ NR_KVM_TIMERS, fwnode,
+ &timer_domain_ops, NULL);
+ if (!domain) {
+ irq_domain_free_fwnode(fwnode);
+ return -ENOMEM;
+ }
+
+ arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
+ WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
+ (void *)TIMER_VTIMER));
+ }
- host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
- if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
- host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
- kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
- host_vtimer_irq);
- host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+ if (info->physical_irq > 0) {
+ host_ptimer_irq = info->physical_irq;
+ kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
+
+ if (domain)
+ WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
+ (void *)TIMER_PTIMER));
}
+ return 0;
+}
+
+int kvm_timer_hyp_init(bool has_gic)
+{
+ struct arch_timer_kvm_info *info;
+ int err;
+
+ info = arch_timer_get_kvm_info();
+ timecounter = &info->timecounter;
+
+ if (!timecounter->cc) {
+ kvm_err("kvm_arch_timer: uninitialized timecounter\n");
+ return -ENODEV;
+ }
+
+ err = kvm_irq_init(info);
+ if (err)
+ return err;
+
+ /* First, do the virtual EL1 timer irq */
+
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest vtimer", kvm_get_running_vcpus());
if (err) {
@@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic)
/* Now let's do the physical EL1 timer irq */
if (info->physical_irq > 0) {
- host_ptimer_irq = info->physical_irq;
- host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
- if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
- host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
- kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
- host_ptimer_irq);
- host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
- }
-
err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
"kvm guest ptimer", kvm_get_running_vcpus());
if (err) {
@@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_vtimer->host_timer_irq,
map.direct_vtimer->irq.irq,
- kvm_arch_timer_get_input_level);
+ &arch_timer_irq_ops);
if (ret)
return ret;
@@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_ptimer->host_timer_irq,
map.direct_ptimer->irq.irq,
- kvm_arch_timer_get_input_level);
+ &arch_timer_irq_ops);
}
if (ret)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5f49df4ffdd8..9aa9b73475c9 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -76,6 +76,7 @@ el1_trap:
b __guest_exit
el1_irq:
+el1_fiq:
get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
@@ -131,7 +132,6 @@ SYM_CODE_END(\label)
invalid_vector el2t_error_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
- invalid_vector el1_fiq_invalid
.ltorg
@@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector)
valid_vect el1_sync // Synchronous 64-bit EL1
valid_vect el1_irq // IRQ 64-bit EL1
- invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
+ valid_vect el1_fiq // FIQ 64-bit EL1
valid_vect el1_error // Error 64-bit EL1
valid_vect el1_sync // Synchronous 32-bit EL1
valid_vect el1_irq // IRQ 32-bit EL1
- invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
+ valid_vect el1_fiq // FIQ 32-bit EL1
valid_vect el1_error // Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 18a4494337bd..fb0f523d1492 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_NO_ORDER UINT_MAX
+#define HYP_NO_ORDER USHRT_MAX
struct hyp_pool {
/*
@@ -19,48 +19,13 @@ struct hyp_pool {
struct list_head free_area[MAX_ORDER];
phys_addr_t range_start;
phys_addr_t range_end;
- unsigned int max_order;
+ unsigned short max_order;
};
-static inline void hyp_page_ref_inc(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- p->refcount++;
- hyp_spin_unlock(&pool->lock);
-}
-
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
- int ret;
-
- hyp_spin_lock(&pool->lock);
- p->refcount--;
- ret = (p->refcount == 0);
- hyp_spin_unlock(&pool->lock);
-
- return ret;
-}
-
-static inline void hyp_set_page_refcounted(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- if (p->refcount) {
- hyp_spin_unlock(&pool->lock);
- BUG();
- }
- p->refcount = 1;
- hyp_spin_unlock(&pool->lock);
-}
-
/* Allocation */
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
-void hyp_get_page(void *addr);
-void hyp_put_page(void *addr);
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_get_page(struct hyp_pool *pool, void *addr);
+void hyp_put_page(struct hyp_pool *pool, void *addr);
/* Used pages cannot be freed */
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 42d81ec739fa..9c227d87c36d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -23,7 +23,7 @@ extern struct host_kvm host_kvm;
int __pkvm_prot_finalize(void);
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
+int kvm_host_prepare_stage2(void *pgt_pool_base);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
static __always_inline void __load_host_stage2(void)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index fd78bde939ee..592b7edb3edb 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -7,12 +7,9 @@
#include <linux/types.h>
-struct hyp_pool;
struct hyp_page {
- unsigned int refcount;
- unsigned int order;
- struct hyp_pool *pool;
- struct list_head node;
+ unsigned short refcount;
+ unsigned short order;
};
extern u64 __hyp_vmemmap;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 0095f6289742..8ec3a5a7744b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
return res;
}
-static inline unsigned long host_s2_mem_pgtable_pages(void)
+static inline unsigned long host_s2_pgtable_pages(void)
{
+ unsigned long res;
+
/*
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
- return __hyp_pgtable_total_pages() + 16;
-}
+ res = __hyp_pgtable_total_pages() + 16;
-static inline unsigned long host_s2_dev_pgtable_pages(void)
-{
/* Allow 1 GiB for MMIO mappings */
- return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
}
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 4b60c0056c04..d938ce95d3bd 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -23,8 +23,7 @@
extern unsigned long hyp_nr_cpus;
struct host_kvm host_kvm;
-static struct hyp_pool host_s2_mem;
-static struct hyp_pool host_s2_dev;
+static struct hyp_pool host_s2_pool;
/*
* Copies of the host's CPU features registers holding sanitized values.
@@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
{
- return hyp_alloc_pages(&host_s2_mem, get_order(size));
+ return hyp_alloc_pages(&host_s2_pool, get_order(size));
}
static void *host_s2_zalloc_page(void *pool)
@@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool)
return hyp_alloc_pages(pool, 0);
}
-static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
+static void host_s2_get_page(void *addr)
+{
+ hyp_get_page(&host_s2_pool, addr);
+}
+
+static void host_s2_put_page(void *addr)
+{
+ hyp_put_page(&host_s2_pool, addr);
+}
+
+static int prepare_s2_pool(void *pgt_pool_base)
{
unsigned long nr_pages, pfn;
int ret;
- pfn = hyp_virt_to_pfn(mem_pgt_pool);
- nr_pages = host_s2_mem_pgtable_pages();
- ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
- if (ret)
- return ret;
-
- pfn = hyp_virt_to_pfn(dev_pgt_pool);
- nr_pages = host_s2_dev_pgtable_pages();
- ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
+ pfn = hyp_virt_to_pfn(pgt_pool_base);
+ nr_pages = host_s2_pgtable_pages();
+ ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
if (ret)
return ret;
@@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.page_count = hyp_page_count,
- .get_page = hyp_get_page,
- .put_page = hyp_put_page,
+ .get_page = host_s2_get_page,
+ .put_page = host_s2_put_page,
};
return 0;
@@ -86,7 +89,7 @@ static void prepare_host_vtcr(void)
id_aa64mmfr1_el1_sys_val, phys_shift);
}
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
+int kvm_host_prepare_stage2(void *pgt_pool_base)
{
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
int ret;
@@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
- ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
+ ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
@@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end)
}
static inline int __host_stage2_idmap(u64 start, u64 end,
- enum kvm_pgtable_prot prot,
- struct hyp_pool *pool)
+ enum kvm_pgtable_prot prot)
{
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
- prot, pool);
+ prot, &host_s2_pool);
}
static int host_stage2_idmap(u64 addr)
@@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr)
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
struct kvm_mem_range range;
bool is_memory = find_mem_range(addr, &range);
- struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
int ret;
if (is_memory)
@@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr)
if (ret)
goto unlock;
- ret = __host_stage2_idmap(range.start, range.end, prot, pool);
- if (is_memory || ret != -ENOMEM)
+ ret = __host_stage2_idmap(range.start, range.end, prot);
+ if (ret != -ENOMEM)
goto unlock;
/*
- * host_s2_mem has been provided with enough pages to cover all of
- * memory with page granularity, so we should never hit the ENOMEM case.
- * However, it is difficult to know how much of the MMIO range we will
- * need to cover upfront, so we may need to 'recycle' the pages if we
- * run out.
+ * The pool has been provided with enough pages to cover all of memory
+ * with page granularity, but it is difficult to know how much of the
+ * MMIO range we will need to cover upfront, so we may need to 'recycle'
+ * the pages if we run out.
*/
ret = host_stage2_unmap_dev_all();
if (ret)
goto unlock;
- ret = __host_stage2_idmap(range.start, range.end, prot, pool);
+ ret = __host_stage2_idmap(range.start, range.end, prot);
unlock:
hyp_spin_unlock(&host_kvm.lock);
@@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
hyp_spin_lock(&host_kvm.lock);
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
- &host_s2_mem, pkvm_hyp_id);
+ &host_s2_pool, pkvm_hyp_id);
hyp_spin_unlock(&host_kvm.lock);
return ret != -EAGAIN ? ret : 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 237e03bf0cb1..41fc25bdfb34 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
*/
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
phys_addr_t addr = hyp_page_to_phys(p);
@@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
/* Find a buddy page currently available for allocation */
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
- if (!buddy || buddy->order != order || list_empty(&buddy->node))
+ if (!buddy || buddy->order != order || buddy->refcount)
return NULL;
return buddy;
}
+/*
+ * Pages that are available for allocation are tracked in free-lists, so we use
+ * the pages themselves to store the list nodes to avoid wasting space. As the
+ * allocator always returns zeroed pages (which are zeroed on the hyp_put_page()
+ * path to optimize allocation speed), we also need to clean-up the list node in
+ * each page when we take it out of the list.
+ */
+static inline void page_remove_from_list(struct hyp_page *p)
+{
+ struct list_head *node = hyp_page_to_virt(p);
+
+ __list_del_entry(node);
+ memset(node, 0, sizeof(*node));
+}
+
+static inline void page_add_to_list(struct hyp_page *p, struct list_head *head)
+{
+ struct list_head *node = hyp_page_to_virt(p);
+
+ INIT_LIST_HEAD(node);
+ list_add_tail(node, head);
+}
+
+static inline struct hyp_page *node_to_page(struct list_head *node)
+{
+ return hyp_virt_to_page(node);
+}
+
static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
- unsigned int order = p->order;
+ unsigned short order = p->order;
struct hyp_page *buddy;
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool,
break;
/* Take the buddy out of its list, and coallesce with @p */
- list_del_init(&buddy->node);
+ page_remove_from_list(buddy);
buddy->order = HYP_NO_ORDER;
p = min(p, buddy);
}
/* Mark the new head, and insert it */
p->order = order;
- list_add_tail(&p->node, &pool->free_area[order]);
-}
-
-static void hyp_attach_page(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- __hyp_attach_page(pool, p);
- hyp_spin_unlock(&pool->lock);
+ page_add_to_list(p, &pool->free_area[order]);
}
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
struct hyp_page *buddy;
- list_del_init(&p->node);
+ page_remove_from_list(p);
while (p->order > order) {
/*
* The buddy of order n - 1 currently has HYP_NO_ORDER as it
@@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
p->order--;
buddy = __find_buddy_nocheck(pool, p, p->order);
buddy->order = p->order;
- list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
+ page_add_to_list(buddy, &pool->free_area[buddy->order]);
}
return p;
}
-void hyp_put_page(void *addr)
+static inline void hyp_page_ref_inc(struct hyp_page *p)
{
- struct hyp_page *p = hyp_virt_to_page(addr);
+ BUG_ON(p->refcount == USHRT_MAX);
+ p->refcount++;
+}
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+ p->refcount--;
+ return (p->refcount == 0);
+}
+
+static inline void hyp_set_page_refcounted(struct hyp_page *p)
+{
+ BUG_ON(p->refcount);
+ p->refcount = 1;
+}
+
+static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
+{
if (hyp_page_ref_dec_and_test(p))
- hyp_attach_page(p);
+ __hyp_attach_page(pool, p);
+}
+
+/*
+ * Changes to the buddy tree and page refcounts must be done with the hyp_pool
+ * lock held. If a refcount change requires an update to the buddy tree (e.g.
+ * hyp_put_page()), both operations must be done within the same critical
+ * section to guarantee transient states (e.g. a page with null refcount but
+ * not yet attached to a free list) can't be observed by well-behaved readers.
+ */
+void hyp_put_page(struct hyp_pool *pool, void *addr)
+{
+ struct hyp_page *p = hyp_virt_to_page(addr);
+
+ hyp_spin_lock(&pool->lock);
+ __hyp_put_page(pool, p);
+ hyp_spin_unlock(&pool->lock);
}
-void hyp_get_page(void *addr)
+void hyp_get_page(struct hyp_pool *pool, void *addr)
{
struct hyp_page *p = hyp_virt_to_page(addr);
+ hyp_spin_lock(&pool->lock);
hyp_page_ref_inc(p);
+ hyp_spin_unlock(&pool->lock);
}
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{
- unsigned int i = order;
+ unsigned short i = order;
struct hyp_page *p;
hyp_spin_lock(&pool->lock);
@@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
}
/* Extract it from the tree at the right order */
- p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
+ p = node_to_page(pool->free_area[i].next);
p = __hyp_extract_page(pool, p, order);
- hyp_spin_unlock(&pool->lock);
hyp_set_page_refcounted(p);
+ hyp_spin_unlock(&pool->lock);
return hyp_page_to_virt(p);
}
@@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
/* Init the vmemmap portion */
p = hyp_phys_to_page(phys);
- memset(p, 0, sizeof(*p) * nr_pages);
for (i = 0; i < nr_pages; i++) {
- p[i].pool = pool;
- INIT_LIST_HEAD(&p[i].node);
+ p[i].order = 0;
+ hyp_set_page_refcounted(&p[i]);
}
/* Attach the unused pages to the buddy tree */
for (i = reserved_pages; i < nr_pages; i++)
- __hyp_attach_page(pool, &p[i]);
+ __hyp_put_page(pool, &p[i]);
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index a3d3a275344e..f834833ac921 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus;
static void *vmemmap_base;
static void *hyp_pgt_base;
-static void *host_s2_mem_pgt_base;
-static void *host_s2_dev_pgt_base;
+static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static int divide_memory_pool(void *virt, unsigned long size)
@@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size)
if (!hyp_pgt_base)
return -ENOMEM;
- nr_pages = host_s2_mem_pgtable_pages();
- host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
- if (!host_s2_mem_pgt_base)
- return -ENOMEM;
-
- nr_pages = host_s2_dev_pgtable_pages();
- host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
- if (!host_s2_dev_pgt_base)
+ nr_pages = host_s2_pgtable_pages();
+ host_s2_pgt_base = hyp_early_alloc_contig(nr_pages);
+ if (!host_s2_pgt_base)
return -ENOMEM;
return 0;
@@ -143,6 +137,16 @@ static void *hyp_zalloc_hyp_page(void *arg)
return hyp_alloc_pages(&hpool, 0);
}
+static void hpool_get_page(void *addr)
+{
+ hyp_get_page(&hpool, addr);
+}
+
+static void hpool_put_page(void *addr)
+{
+ hyp_put_page(&hpool, addr);
+}
+
void __noreturn __pkvm_init_finalise(void)
{
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -158,7 +162,7 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
+ ret = kvm_host_prepare_stage2(host_s2_pgt_base);
if (ret)
goto out;
@@ -166,8 +170,8 @@ void __noreturn __pkvm_init_finalise(void)
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
- .get_page = hyp_get_page,
- .put_page = hyp_put_page,
+ .get_page = hpool_get_page,
+ .put_page = hpool_put_page,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
index 83ca23ac259b..d654921dd09b 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void)
}
hyp_mem_pages += hyp_s1_pgtable_pages();
- hyp_mem_pages += host_s2_mem_pgtable_pages();
- hyp_mem_pages += host_s2_dev_pgtable_pages();
+ hyp_mem_pages += host_s2_pgtable_pages();
/*
* The hyp_vmemmap needs to be backed by pages, but these pages
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c10207fed2f3..5742ba765ff9 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
return PAGE_SIZE;
}
+static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
+{
+ unsigned long pa;
+
+ if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
+ return huge_page_shift(hstate_vma(vma));
+
+ if (!(vma->vm_flags & VM_PFNMAP))
+ return PAGE_SHIFT;
+
+ VM_BUG_ON(is_vm_hugetlb_page(vma));
+
+ pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PUD_SIZE) <= vma->vm_end)
+ return PUD_SHIFT;
+#endif
+
+ if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PMD_SIZE) <= vma->vm_end)
+ return PMD_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- /* Let's check if we will get back a huge page backed by hugetlbfs */
+ /*
+ * Let's check if we will get back a huge page backed by hugetlbfs, or
+ * get block mapping for device MMIO region.
+ */
mmap_read_lock(current->mm);
vma = find_vma_intersection(current->mm, hva, hva + 1);
if (unlikely(!vma)) {
@@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma))
- vma_shift = huge_page_shift(hstate_vma(vma));
- else
- vma_shift = PAGE_SHIFT;
-
- if (logging_active ||
- (vma->vm_flags & VM_PFNMAP)) {
+ /*
+ * logging_active is guaranteed to never be true for VM_PFNMAP
+ * memslots.
+ */
+ if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
+ } else {
+ vma_shift = get_vma_page_shift(vma, hva);
}
switch (vma_shift) {
@@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
if (kvm_is_device_pfn(pfn)) {
+ /*
+ * If the page was identified as device early by looking at
+ * the VMA flags, vma_pagesize is already representing the
+ * largest quantity we can map. If instead it was mapped
+ * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
+ * and must not be upgraded.
+ *
+ * In both cases, we don't let transparent_hugepage_adjust()
+ * change things at the last minute.
+ */
device = true;
- force_pte = true;
} else if (logging_active && !write_fault) {
/*
* Only actually map the page as writable if this was a write
@@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !force_pte)
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
if (writable)
@@ -1346,7 +1387,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
hva_t hva = mem->userspace_addr;
hva_t reg_end = hva + mem->memory_size;
- bool writable = !(mem->flags & KVM_MEM_READONLY);
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1363,8 +1403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
- * between them, so iterate over all of them to find out if we can map
- * any of them right now.
+ * between them, so iterate over all of them.
*
* +--------------------------------------------+
* +---------------+----------------+ +----------------+
@@ -1375,51 +1414,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
*/
do {
struct vm_area_struct *vma;
- hva_t vm_start, vm_end;
vma = find_vma_intersection(current->mm, hva, reg_end);
if (!vma)
break;
- /*
- * Take the intersection of this VMA with the memory region
- */
- vm_start = max(hva, vma->vm_start);
- vm_end = min(reg_end, vma->vm_end);
-
if (vma->vm_flags & VM_PFNMAP) {
- gpa_t gpa = mem->guest_phys_addr +
- (vm_start - mem->userspace_addr);
- phys_addr_t pa;
-
- pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
- pa += vm_start - vma->vm_start;
-
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
- goto out;
- }
-
- ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
- vm_end - vm_start,
- writable);
- if (ret)
break;
+ }
}
- hva = vm_end;
+ hva = min(reg_end, vma->vm_end);
} while (hva < reg_end);
- if (change == KVM_MR_FLAGS_ONLY)
- goto out;
-
- spin_lock(&kvm->mmu_lock);
- if (ret)
- unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
- else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
- stage2_flush_memslot(kvm, memslot);
- spin_unlock(&kvm->mmu_lock);
-out:
mmap_read_unlock(current->mm);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 58cbda00e56d..340c51d87677 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static struct gic_kvm_info *gic_kvm_info;
+
+void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
+{
+ BUG_ON(gic_kvm_info != NULL);
+ gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (gic_kvm_info)
+ *gic_kvm_info = *info;
+}
+
/**
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
*
@@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void)
*/
int kvm_vgic_hyp_init(void)
{
- const struct gic_kvm_info *gic_kvm_info;
+ bool has_mask;
int ret;
- gic_kvm_info = gic_get_kvm_info();
if (!gic_kvm_info)
return -ENODEV;
- if (!gic_kvm_info->maint_irq) {
+ has_mask = !gic_kvm_info->no_maint_irq_mask;
+
+ if (has_mask && !gic_kvm_info->maint_irq) {
kvm_err("No vgic maintenance irq\n");
return -ENXIO;
}
+ /*
+ * If we get one of these oddball non-GICs, taint the kernel,
+ * as we have no idea of how they *really* behave.
+ */
+ if (gic_kvm_info->no_hw_deactivation) {
+ kvm_info("Non-architectural vgic, tainting kernel\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ kvm_vgic_global_state.no_hw_deactivation = true;
+ }
+
switch (gic_kvm_info->type) {
case GIC_V2:
ret = vgic_v2_probe(gic_kvm_info);
@@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void)
ret = -ENODEV;
}
+ kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+
+ kfree(gic_kvm_info);
+ gic_kvm_info = NULL;
+
if (ret)
return ret;
- kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+ if (!has_mask)
+ return 0;
+
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
vgic_maintenance_handler,
"vgic", kvm_get_running_vcpus());
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 11934c2af2f4..2c580204f1dc 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand
+ * on deactivation (no HW deactivation, for example).
*/
- if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
- irq->line_level = vgic_get_phys_line_level(irq);
+ if (vgic_irq_is_mapped_level(irq)) {
+ bool resample = false;
+
+ if (val & GICH_LR_PENDING_BIT) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+ resample = !irq->line_level;
+ } else if (vgic_irq_needs_resampling(irq) &&
+ !(irq->active || irq->pending_latch)) {
+ resample = true;
+ }
- if (!irq->line_level)
+ if (resample)
vgic_irq_set_phys_active(irq, false);
}
@@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (irq->group)
val |= GICH_LR_GROUP1;
- if (irq->hw) {
+ if (irq->hw && !vgic_irq_needs_resampling(irq)) {
val |= GICH_LR_HW;
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
/*
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 41ecf219c333..66004f61cd83 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand
+ * on deactivation (no HW deactivation, for example).
*/
- if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
- irq->line_level = vgic_get_phys_line_level(irq);
+ if (vgic_irq_is_mapped_level(irq)) {
+ bool resample = false;
+
+ if (val & ICH_LR_PENDING_BIT) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+ resample = !irq->line_level;
+ } else if (vgic_irq_needs_resampling(irq) &&
+ !(irq->active || irq->pending_latch)) {
+ resample = true;
+ }
- if (!irq->line_level)
+ if (resample)
vgic_irq_set_phys_active(irq, false);
}
@@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
}
- if (irq->hw) {
+ if (irq->hw && !vgic_irq_needs_resampling(irq)) {
val |= ICH_LR_HW;
val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
/*
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 15b666200f0b..111bff47e471 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq)
BUG_ON(!irq->hw);
- if (irq->get_input_level)
- return irq->get_input_level(irq->intid);
+ if (irq->ops && irq->ops->get_input_level)
+ return irq->ops->get_input_level(irq->intid);
WARN_ON(irq_get_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING,
@@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
/* @irq->irq_lock must be held */
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
unsigned int host_irq,
- bool (*get_input_level)(int vindid))
+ struct irq_ops *ops)
{
struct irq_desc *desc;
struct irq_data *data;
@@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->hw = true;
irq->host_irq = host_irq;
irq->hwintid = data->hwirq;
- irq->get_input_level = get_input_level;
+ irq->ops = ops;
return 0;
}
@@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
{
irq->hw = false;
irq->hwintid = 0;
- irq->get_input_level = NULL;
+ irq->ops = NULL;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid, bool (*get_input_level)(int vindid))
+ u32 vintid, struct irq_ops *ops)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
unsigned long flags;
@@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
BUG_ON(!irq);
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
+ ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index c179e27062fd..b8c06bd8659e 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -50,6 +50,7 @@
#include <linux/cpuhotplug.h>
#include <linux/io.h>
#include <linux/irqchip.h>
+#include <linux/irqchip/arm-vgic-info.h>
#include <linux/irqdomain.h>
#include <linux/limits.h>
#include <linux/of_address.h>
@@ -787,6 +788,12 @@ static int aic_init_cpu(unsigned int cpu)
return 0;
}
+static struct gic_kvm_info vgic_info __initdata = {
+ .type = GIC_V3,
+ .no_maint_irq_mask = true,
+ .no_hw_deactivation = true,
+};
+
static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
{
int i;
@@ -843,6 +850,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
"irqchip/apple-aic/ipi:starting",
aic_init_cpu, NULL);
+ vgic_set_kvm_info(&vgic_info);
+
pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index f47b41dfd023..a610821c8ff2 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -12,19 +12,6 @@
static DEFINE_RAW_SPINLOCK(irq_controller_lock);
-static const struct gic_kvm_info *gic_kvm_info;
-
-const struct gic_kvm_info *gic_get_kvm_info(void)
-{
- return gic_kvm_info;
-}
-
-void gic_set_kvm_info(const struct gic_kvm_info *info)
-{
- BUG_ON(gic_kvm_info != NULL);
- gic_kvm_info = info;
-}
-
void gic_enable_of_quirks(const struct device_node *np,
const struct gic_quirk *quirks, void *data)
{
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index ccba8b0fe0f5..27e3d4ed4f32 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -28,6 +28,4 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void gic_enable_of_quirks(const struct device_node *np,
const struct gic_quirk *quirks, void *data);
-void gic_set_kvm_info(const struct gic_kvm_info *info);
-
#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 37a23aa6de37..453fc425eede 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -103,7 +103,7 @@ EXPORT_SYMBOL(gic_nonsecure_priorities);
/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
static refcount_t *ppi_nmi_refs;
-static struct gic_kvm_info gic_v3_kvm_info;
+static struct gic_kvm_info gic_v3_kvm_info __initdata;
static DEFINE_PER_CPU(bool, has_rss);
#define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4)
@@ -1852,7 +1852,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
- gic_set_kvm_info(&gic_v3_kvm_info);
+ vgic_set_kvm_info(&gic_v3_kvm_info);
}
static int __init gic_of_init(struct device_node *node, struct device_node *parent)
@@ -2168,7 +2168,7 @@ static void __init gic_acpi_setup_kvm_info(void)
gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
- gic_set_kvm_info(&gic_v3_kvm_info);
+ vgic_set_kvm_info(&gic_v3_kvm_info);
}
static int __init
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b1d9c22caf2e..2de9ec8ece0c 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -119,7 +119,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
-static struct gic_kvm_info gic_v2_kvm_info;
+static struct gic_kvm_info gic_v2_kvm_info __initdata;
static DEFINE_PER_CPU(u32, sgi_intid);
@@ -1451,7 +1451,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
return;
if (static_branch_likely(&supports_deactivate_key))
- gic_set_kvm_info(&gic_v2_kvm_info);
+ vgic_set_kvm_info(&gic_v2_kvm_info);
}
int __init
@@ -1618,7 +1618,7 @@ static void __init gic_acpi_setup_kvm_info(void)
gic_v2_kvm_info.maint_irq = irq;
- gic_set_kvm_info(&gic_v2_kvm_info);
+ vgic_set_kvm_info(&gic_v2_kvm_info);
}
static int __init gic_v2_acpi_init(union acpi_subtable_headers *header,
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ec621180ef09..e602d848fc1a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -72,6 +72,9 @@ struct vgic_global {
bool has_gicv4;
bool has_gicv4_1;
+ /* Pseudo GICv3 from outer space */
+ bool no_hw_deactivation;
+
/* GIC system register CPU interface */
struct static_key_false gicv3_cpuif;
@@ -89,6 +92,26 @@ enum vgic_irq_config {
VGIC_CONFIG_LEVEL
};
+/*
+ * Per-irq ops overriding some common behavious.
+ *
+ * Always called in non-preemptible section and the functions can use
+ * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs.
+ */
+struct irq_ops {
+ /* Per interrupt flags for special-cased interrupts */
+ unsigned long flags;
+
+#define VGIC_IRQ_SW_RESAMPLE BIT(0) /* Clear the active state for resampling */
+
+ /*
+ * Callback function pointer to in-kernel devices that can tell us the
+ * state of the input level of mapped level-triggered IRQ faster than
+ * peaking into the physical GIC.
+ */
+ bool (*get_input_level)(int vintid);
+};
+
struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */
struct list_head lpi_list; /* Used to link all LPIs together */
@@ -126,21 +149,17 @@ struct vgic_irq {
u8 group; /* 0 == group 0, 1 == group 1 */
enum vgic_irq_config config; /* Level or edge */
- /*
- * Callback function pointer to in-kernel devices that can tell us the
- * state of the input level of mapped level-triggered IRQ faster than
- * peaking into the physical GIC.
- *
- * Always called in non-preemptible section and the functions can use
- * kvm_arm_get_running_vcpu() to get the vcpu pointer for private
- * IRQs.
- */
- bool (*get_input_level)(int vintid);
+ struct irq_ops *ops;
void *owner; /* Opaque pointer to reserve an interrupt
for in-kernel devices. */
};
+static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq)
+{
+ return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE);
+}
+
struct vgic_register_region;
struct vgic_its;
@@ -352,7 +371,7 @@ void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level, void *owner);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid, bool (*get_input_level)(int vindid));
+ u32 vintid, struct irq_ops *ops);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
index fa8c0455c352..1177f3a1aed5 100644
--- a/include/linux/irqchip/arm-gic-common.h
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -7,8 +7,7 @@
#ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H
#define __LINUX_IRQCHIP_ARM_GIC_COMMON_H
-#include <linux/types.h>
-#include <linux/ioport.h>
+#include <linux/irqchip/arm-vgic-info.h>
#define GICD_INT_DEF_PRI 0xa0
#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
@@ -16,28 +15,6 @@
(GICD_INT_DEF_PRI << 8) |\
GICD_INT_DEF_PRI)
-enum gic_type {
- GIC_V2,
- GIC_V3,
-};
-
-struct gic_kvm_info {
- /* GIC type */
- enum gic_type type;
- /* Virtual CPU interface */
- struct resource vcpu;
- /* Interrupt number */
- unsigned int maint_irq;
- /* Virtual control interface */
- struct resource vctrl;
- /* vlpi support */
- bool has_v4;
- /* rvpeid support */
- bool has_v4_1;
-};
-
-const struct gic_kvm_info *gic_get_kvm_info(void);
-
struct irq_domain;
struct fwnode_handle;
int gicv2m_init(struct fwnode_handle *parent_handle,
diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
new file mode 100644
index 000000000000..a75b2c7de69d
--- /dev/null
+++ b/include/linux/irqchip/arm-vgic-info.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * include/linux/irqchip/arm-vgic-info.h
+ *
+ * Copyright (C) 2016 ARM Limited, All Rights Reserved.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_VGIC_INFO_H
+#define __LINUX_IRQCHIP_ARM_VGIC_INFO_H
+
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+enum gic_type {
+ /* Full GICv2 */
+ GIC_V2,
+ /* Full GICv3, optionally with v2 compat */
+ GIC_V3,
+};
+
+struct gic_kvm_info {
+ /* GIC type */
+ enum gic_type type;
+ /* Virtual CPU interface */
+ struct resource vcpu;
+ /* Interrupt number */
+ unsigned int maint_irq;
+ /* No interrupt mask, no need to use the above field */
+ bool no_maint_irq_mask;
+ /* Virtual control interface */
+ struct resource vctrl;
+ /* vlpi support */
+ bool has_v4;
+ /* rvpeid support */
+ bool has_v4_1;
+ /* Deactivation impared, subpar stuff */
+ bool no_hw_deactivation;
+};
+
+#ifdef CONFIG_KVM
+void vgic_set_kvm_info(const struct gic_kvm_info *info);
+#else
+static inline void vgic_set_kvm_info(const struct gic_kvm_info *info) {}
+#endif
+
+#endif