From 43934a38d7cb39ff33baedc7f2c40a2a891116fa Mon Sep 17 00:00:00 2001 From: Jeremy Katz Date: Mon, 19 Feb 2007 14:37:46 +0200 Subject: KVM: Move virtualization deactivation from CPU_DEAD state to CPU_DOWN_PREPARE This gives it more chances of surviving suspend. Signed-off-by: Jeremy Katz Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index af866147ff25..0a96c2c7e191 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2080,13 +2080,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, int cpu = (long)v; switch (val) { - case CPU_DEAD: + case CPU_DOWN_PREPARE: case CPU_UP_CANCELED: + printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", + cpu); decache_vcpus_on_cpu(cpu); smp_call_function_single(cpu, kvm_arch_ops->hardware_disable, NULL, 0, 1); break; - case CPU_UP_PREPARE: + case CPU_ONLINE: + printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", + cpu); smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, NULL, 0, 1); break; -- cgit From d27d4aca184ac0ca6b7e32caf79e1c2b91959be9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Feb 2007 14:37:46 +0200 Subject: KVM: Cosmetics Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 21 ++++++++------------- drivers/kvm/paging_tmpl.h | 3 +-- drivers/kvm/svm.c | 10 +++++----- drivers/kvm/vmx.c | 4 +--- 4 files changed, 15 insertions(+), 23 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 0a96c2c7e191..13a99cac3679 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -126,10 +126,8 @@ static inline int valid_vcpu(int n) return likely(n >= 0 && n < KVM_MAX_VCPUS); } -int kvm_read_guest(struct kvm_vcpu *vcpu, - gva_t addr, - unsigned long size, - void *dest) +int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, + void *dest) { unsigned char *host_buf = dest; unsigned long req_size = size; @@ -161,10 +159,8 @@ int kvm_read_guest(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvm_read_guest); -int kvm_write_guest(struct kvm_vcpu *vcpu, - gva_t addr, - unsigned long size, - void *data) +int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, + void *data) { unsigned char *host_buf = data; unsigned long req_size = size; @@ -457,7 +453,7 @@ EXPORT_SYMBOL_GPL(set_cr4); void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (is_long_mode(vcpu)) { - if ( cr3 & CR3_L_MODE_RESEVED_BITS) { + if (cr3 & CR3_L_MODE_RESEVED_BITS) { printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); inject_gp(vcpu); return; @@ -774,7 +770,6 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm, if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) goto out; - if (any) { cleared = 0; for (i = 0; i < KVM_MAX_VCPUS; ++i) { @@ -903,8 +898,9 @@ static int emulator_read_emulated(unsigned long addr, return X86EMUL_CONTINUE; else { gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); + if (gpa == UNMAPPED_GVA) - return vcpu_printf(vcpu, "not present\n"), X86EMUL_PROPAGATE_FAULT; + return X86EMUL_PROPAGATE_FAULT; vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; vcpu->mmio_size = bytes; @@ -1801,12 +1797,11 @@ static long kvm_dev_ioctl(struct file *filp, case KVM_GET_API_VERSION: r = KVM_API_VERSION; break; - case KVM_CREATE_VCPU: { + case KVM_CREATE_VCPU: r = kvm_dev_ioctl_create_vcpu(kvm, arg); if (r) goto out; break; - } case KVM_RUN: { struct kvm_run kvm_run; diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 6507ccb1ea2a..f3bcee904651 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -441,9 +441,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, /* * mmio: emulate if accessible, otherwise its a guest fault. */ - if (is_io_pte(*shadow_pte)) { + if (is_io_pte(*shadow_pte)) return 1; - } ++kvm_stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 83da4ea150a3..31836444bc62 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1042,22 +1042,22 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); if (!addr_mask) { - printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); + printk(KERN_DEBUG "%s: get io address failed\n", + __FUNCTION__); return 1; } if (kvm_run->io.rep) { - kvm_run->io.count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; + kvm_run->io.count + = vcpu->regs[VCPU_REGS_RCX] & addr_mask; kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; } - } else { + } else kvm_run->io.value = vcpu->svm->vmcb->save.rax; - } return 0; } - static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { return 1; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index fd4e91734388..ff3bfc5c390a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -27,7 +27,6 @@ #include "segment_descriptor.h" - MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -418,10 +417,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_IA32_SYSENTER_ESP: vmcs_write32(GUEST_SYSENTER_ESP, data); break; - case MSR_IA32_TIME_STAMP_COUNTER: { + case MSR_IA32_TIME_STAMP_COUNTER: guest_write_tsc(data); break; - } default: msr = find_msr_entry(vcpu, msr_index); if (msr) { -- cgit From 5972e9535e94bf875eb8eab8a667ba04c7583874 Mon Sep 17 00:00:00 2001 From: Markus Rechberger Date: Mon, 19 Feb 2007 14:37:47 +0200 Subject: KVM: Use page_private()/set_page_private() apis Besides using an established api, this allows using kvm in older kernels. Signed-off-by: Markus Rechberger Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 +- drivers/kvm/kvm_main.c | 2 +- drivers/kvm/mmu.c | 36 ++++++++++++++++++------------------ 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 04574a9d4430..9a49b2ed2a1e 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -523,7 +523,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); - return (struct kvm_mmu_page *)page->private; + return (struct kvm_mmu_page *)page_private(page); } static inline u16 read_fs(void) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 13a99cac3679..122c05f283e1 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -670,7 +670,7 @@ raced: | __GFP_ZERO); if (!new.phys_mem[i]) goto out_free; - new.phys_mem[i]->private = 0; + set_page_private(new.phys_mem[i],0); } } diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index be793770f31b..a1a93368f314 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -298,18 +298,18 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - if (!page->private) { + if (!page_private(page)) { rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); - page->private = (unsigned long)spte; - } else if (!(page->private & 1)) { + set_page_private(page,(unsigned long)spte); + } else if (!(page_private(page) & 1)) { rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); desc = mmu_alloc_rmap_desc(vcpu); - desc->shadow_ptes[0] = (u64 *)page->private; + desc->shadow_ptes[0] = (u64 *)page_private(page); desc->shadow_ptes[1] = spte; - page->private = (unsigned long)desc | 1; + set_page_private(page,(unsigned long)desc | 1); } else { rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) desc = desc->more; if (desc->shadow_ptes[RMAP_EXT-1]) { @@ -337,12 +337,12 @@ static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, if (j != 0) return; if (!prev_desc && !desc->more) - page->private = (unsigned long)desc->shadow_ptes[0]; + set_page_private(page,(unsigned long)desc->shadow_ptes[0]); else if (prev_desc) prev_desc->more = desc->more; else - page->private = (unsigned long)desc->more | 1; + set_page_private(page,(unsigned long)desc->more | 1); mmu_free_rmap_desc(vcpu, desc); } @@ -356,20 +356,20 @@ static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - if (!page->private) { + if (!page_private(page)) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); BUG(); - } else if (!(page->private & 1)) { + } else if (!(page_private(page) & 1)) { rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); - if ((u64 *)page->private != spte) { + if ((u64 *)page_private(page) != spte) { printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", spte, *spte); BUG(); } - page->private = 0; + set_page_private(page,0); } else { rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); prev_desc = NULL; while (desc) { for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) @@ -398,11 +398,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) BUG_ON(!slot); page = gfn_to_page(slot, gfn); - while (page->private) { - if (!(page->private & 1)) - spte = (u64 *)page->private; + while (page_private(page)) { + if (!(page_private(page) & 1)) + spte = (u64 *)page_private(page); else { - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); spte = desc->shadow_ptes[0]; } BUG_ON(!spte); @@ -1218,7 +1218,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&page_header->link); if ((page = alloc_page(GFP_KERNEL)) == NULL) goto error_1; - page->private = (unsigned long)page_header; + set_page_private(page, (unsigned long)page_header); page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; memset(__va(page_header->page_hpa), 0, PAGE_SIZE); list_add(&page_header->link, &vcpu->free_pages); -- cgit From 102d8325a1d2f266d3d0a03fdde948544e72c12d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Feb 2007 14:37:47 +0200 Subject: KVM: add MSR based hypercall API This adds a special MSR based hypercall API to KVM. This is to be used by paravirtual kernels and virtual drivers. Signed-off-by: Ingo Molnar Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 6 ++++ drivers/kvm/kvm_main.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/svm.c | 13 +++++++++ drivers/kvm/vmx.c | 13 +++++++++ include/linux/kvm_para.h | 55 ++++++++++++++++++++++++++++++++++++ 5 files changed, 160 insertions(+) create mode 100644 include/linux/kvm_para.h (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 9a49b2ed2a1e..fd7746a2bc3e 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -14,6 +14,7 @@ #include "vmx.h" #include +#include #define CR0_PE_MASK (1ULL << 0) #define CR0_TS_MASK (1ULL << 3) @@ -237,6 +238,9 @@ struct kvm_vcpu { unsigned long cr0; unsigned long cr2; unsigned long cr3; + gpa_t para_state_gpa; + struct page *para_state_page; + gpa_t hypercall_gpa; unsigned long cr4; unsigned long cr8; u64 pdptrs[4]; /* pae */ @@ -382,6 +386,8 @@ struct kvm_arch_ops { int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); int (*vcpu_setup)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*patch_hypercall)(struct kvm_vcpu *vcpu, + unsigned char *hypercall_addr); }; extern struct kvm_stat kvm_stat; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 122c05f283e1..757a41f1db84 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1204,6 +1204,73 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, } } +/* + * Register the para guest with the host: + */ +static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) +{ + struct kvm_vcpu_para_state *para_state; + hpa_t para_state_hpa, hypercall_hpa; + struct page *para_state_page; + unsigned char *hypercall; + gpa_t hypercall_gpa; + + printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n"); + printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa); + + /* + * Needs to be page aligned: + */ + if (para_state_gpa != PAGE_ALIGN(para_state_gpa)) + goto err_gp; + + para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa); + printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa); + if (is_error_hpa(para_state_hpa)) + goto err_gp; + + para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); + para_state = kmap_atomic(para_state_page, KM_USER0); + + printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); + printk(KERN_DEBUG ".... size: %d\n", para_state->size); + + para_state->host_version = KVM_PARA_API_VERSION; + /* + * We cannot support guests that try to register themselves + * with a newer API version than the host supports: + */ + if (para_state->guest_version > KVM_PARA_API_VERSION) { + para_state->ret = -KVM_EINVAL; + goto err_kunmap_skip; + } + + hypercall_gpa = para_state->hypercall_gpa; + hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa); + printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa); + if (is_error_hpa(hypercall_hpa)) { + para_state->ret = -KVM_EINVAL; + goto err_kunmap_skip; + } + + printk(KERN_DEBUG "kvm: para guest successfully registered.\n"); + vcpu->para_state_page = para_state_page; + vcpu->para_state_gpa = para_state_gpa; + vcpu->hypercall_gpa = hypercall_gpa; + + hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), + KM_USER1) + (hypercall_hpa & ~PAGE_MASK); + kvm_arch_ops->patch_hypercall(vcpu, hypercall); + kunmap_atomic(hypercall, KM_USER1); + + para_state->ret = 0; +err_kunmap_skip: + kunmap_atomic(para_state, KM_USER0); + return 0; +err_gp: + return 1; +} + int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 data; @@ -1312,6 +1379,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_MISC_ENABLE: vcpu->ia32_misc_enable_msr = data; break; + /* + * This is the 'probe whether the host is KVM' logic: + */ + case MSR_KVM_API_MAGIC: + return vcpu_register_para(vcpu, data); + default: printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); return 1; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 72cac0488b31..f6e86528f031 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1669,6 +1669,18 @@ static int is_disabled(void) return 0; } +static void +svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) +{ + /* + * Patch in the VMMCALL instruction: + */ + hypercall[0] = 0x0f; + hypercall[1] = 0x01; + hypercall[2] = 0xd9; + hypercall[3] = 0xc3; +} + static struct kvm_arch_ops svm_arch_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -1717,6 +1729,7 @@ static struct kvm_arch_ops svm_arch_ops = { .run = svm_vcpu_run, .skip_emulated_instruction = skip_emulated_instruction, .vcpu_setup = svm_vcpu_setup, + .patch_hypercall = svm_patch_hypercall, }; static int __init svm_init(void) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index d1198e2b2b5d..0198d400037f 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1469,6 +1469,18 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } +static void +vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) +{ + /* + * Patch in the VMCALL instruction: + */ + hypercall[0] = 0x0f; + hypercall[1] = 0x01; + hypercall[2] = 0xc1; + hypercall[3] = 0xc3; +} + static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; @@ -2064,6 +2076,7 @@ static struct kvm_arch_ops vmx_arch_ops = { .run = vmx_vcpu_run, .skip_emulated_instruction = skip_emulated_instruction, .vcpu_setup = vmx_vcpu_setup, + .patch_hypercall = vmx_patch_hypercall, }; static int __init vmx_init(void) diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h new file mode 100644 index 000000000000..74be5c1002ec --- /dev/null +++ b/include/linux/kvm_para.h @@ -0,0 +1,55 @@ +#ifndef __LINUX_KVM_PARA_H +#define __LINUX_KVM_PARA_H + +/* + * Guest OS interface for KVM paravirtualization + * + * Note: this interface is totally experimental, and is certain to change + * as we make progress. + */ + +/* + * Per-VCPU descriptor area shared between guest and host. Writable to + * both guest and host. Registered with the host by the guest when + * a guest acknowledges paravirtual mode. + * + * NOTE: all addresses are guest-physical addresses (gpa), to make it + * easier for the hypervisor to map between the various addresses. + */ +struct kvm_vcpu_para_state { + /* + * API version information for compatibility. If there's any support + * mismatch (too old host trying to execute too new guest) then + * the host will deny entry into paravirtual mode. Any other + * combination (new host + old guest and new host + new guest) + * is supposed to work - new host versions will support all old + * guest API versions. + */ + u32 guest_version; + u32 host_version; + u32 size; + u32 ret; + + /* + * The address of the vm exit instruction (VMCALL or VMMCALL), + * which the host will patch according to the CPU model the + * VM runs on: + */ + u64 hypercall_gpa; + +} __attribute__ ((aligned(PAGE_SIZE))); + +#define KVM_PARA_API_VERSION 1 + +/* + * This is used for an RDMSR's ECX parameter to probe for a KVM host. + * Hopefully no CPU vendor will use up this number. This is placed well + * out of way of the typical space occupied by CPU vendors' MSR indices, + * and we think (or at least hope) it wont be occupied in the future + * either. + */ +#define MSR_KVM_API_MAGIC 0x87655678 + +#define KVM_EINVAL 1 + +#endif -- cgit From 270fd9b96f5fcb7df15d3ca6166545d4aa0f3ee9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Feb 2007 14:37:47 +0200 Subject: KVM: Wire up hypercall handlers to a central arch-independent location Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 ++ drivers/kvm/kvm_main.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/kvm/svm.c | 10 +--------- drivers/kvm/vmx.c | 10 +--------- 4 files changed, 40 insertions(+), 18 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index fd7746a2bc3e..41cc27de4d66 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -482,6 +482,8 @@ void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); +int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); + static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code) { diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 757a41f1db84..2be9738dfd78 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1138,6 +1138,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(emulate_instruction); +int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long nr, a0, a1, a2, a3, a4, a5, ret; + + kvm_arch_ops->decache_regs(vcpu); + ret = -KVM_EINVAL; +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + nr = vcpu->regs[VCPU_REGS_RAX]; + a0 = vcpu->regs[VCPU_REGS_RDI]; + a1 = vcpu->regs[VCPU_REGS_RSI]; + a2 = vcpu->regs[VCPU_REGS_RDX]; + a3 = vcpu->regs[VCPU_REGS_RCX]; + a4 = vcpu->regs[VCPU_REGS_R8]; + a5 = vcpu->regs[VCPU_REGS_R9]; + } else +#endif + { + nr = vcpu->regs[VCPU_REGS_RBX] & -1u; + a0 = vcpu->regs[VCPU_REGS_RAX] & -1u; + a1 = vcpu->regs[VCPU_REGS_RCX] & -1u; + a2 = vcpu->regs[VCPU_REGS_RDX] & -1u; + a3 = vcpu->regs[VCPU_REGS_RSI] & -1u; + a4 = vcpu->regs[VCPU_REGS_RDI] & -1u; + a5 = vcpu->regs[VCPU_REGS_RBP] & -1u; + } + switch (nr) { + default: + ; + } + vcpu->regs[VCPU_REGS_RAX] = ret; + kvm_arch_ops->cache_regs(vcpu); + return 1; +} +EXPORT_SYMBOL_GPL(kvm_hypercall); + static u64 mk_cr_64(u64 curr_cr, u32 new_val) { return (curr_cr & ~((1ULL << 32) - 1)) | new_val; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index aaa6742089e5..711ea42370a8 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1078,16 +1078,8 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - printk(KERN_DEBUG "got vmmcall at RIP %08llx\n", - vcpu->svm->vmcb->save.rip); - printk(KERN_DEBUG "vmmcall params: %08llx, %08lx, %08lx, %08lx\n", - vcpu->svm->vmcb->save.rax, - vcpu->regs[VCPU_REGS_RCX], - vcpu->regs[VCPU_REGS_RDX], - vcpu->regs[VCPU_REGS_RBP]); - vcpu->svm->vmcb->save.rax = 0; vcpu->svm->vmcb->save.rip += 3; - return 1; + return kvm_hypercall(vcpu, kvm_run); } static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index ca79e594ea6e..ff956a6302ec 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1659,16 +1659,8 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - kvm_run->exit_reason = KVM_EXIT_DEBUG; - printk(KERN_DEBUG "got vmcall at RIP %08lx\n", vmcs_readl(GUEST_RIP)); - printk(KERN_DEBUG "vmcall params: %08lx, %08lx, %08lx, %08lx\n", - vcpu->regs[VCPU_REGS_RAX], - vcpu->regs[VCPU_REGS_RCX], - vcpu->regs[VCPU_REGS_RDX], - vcpu->regs[VCPU_REGS_RBP]); - vcpu->regs[VCPU_REGS_RAX] = 0; vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3); - return 1; + return kvm_hypercall(vcpu, kvm_run); } /* -- cgit From 19d1408dfd683daf1c158bb8fbf54324eb4bf568 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Feb 2007 14:37:48 +0200 Subject: KVM: More 0 -> NULL conversions Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 2be9738dfd78..0df0eebc54a9 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2229,13 +2229,13 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { decache_vcpus_on_cpu(raw_smp_processor_id()); - on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1); + on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); return 0; } static int kvm_resume(struct sys_device *dev) { - on_each_cpu(kvm_arch_ops->hardware_enable, 0, 0, 1); + on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); return 0; } -- cgit From 37e29d906c6eb1ece907e509160518b2edc2c083 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Feb 2007 14:07:37 +0200 Subject: KVM: Add internal filesystem for generating inodes The kvmfs inodes will represent virtual machines and vcpus, as necessary, reducing cacheline bouncing due to inodes and filps being shared. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 0df0eebc54a9..c01252e84377 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include "x86_emulate.h" #include "segment_descriptor.h" @@ -72,6 +74,9 @@ static struct kvm_stats_debugfs_item { static struct dentry *debugfs_dir; +#define KVMFS_MAGIC 0x19700426 +struct vfsmount *kvmfs_mnt; + #define MAX_IO_MSRS 256 #define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL @@ -2252,6 +2257,18 @@ static struct sys_device kvm_sysdev = { hpa_t bad_page_address; +static int kvmfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) +{ + return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_MAGIC, mnt); +} + +static struct file_system_type kvm_fs_type = { + .name = "kvmfs", + .get_sb = kvmfs_get_sb, + .kill_sb = kill_anon_super, +}; + int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) { int r; @@ -2328,8 +2345,16 @@ void kvm_exit_arch(void) static __init int kvm_init(void) { static struct page *bad_page; - int r = 0; + int r; + + r = register_filesystem(&kvm_fs_type); + if (r) + goto out3; + kvmfs_mnt = kern_mount(&kvm_fs_type); + r = PTR_ERR(kvmfs_mnt); + if (IS_ERR(kvmfs_mnt)) + goto out2; kvm_init_debug(); kvm_init_msr_list(); @@ -2346,6 +2371,10 @@ static __init int kvm_init(void) out: kvm_exit_debug(); + mntput(kvmfs_mnt); +out2: + unregister_filesystem(&kvm_fs_type); +out3: return r; } @@ -2353,6 +2382,8 @@ static __exit void kvm_exit(void) { kvm_exit_debug(); __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); + mntput(kvmfs_mnt); + unregister_filesystem(&kvm_fs_type); } module_init(kvm_init) -- cgit From f17abe9a44425ff9c9858bc1806cc09d6b5dad1c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 21 Feb 2007 19:28:04 +0200 Subject: KVM: Create an inode per virtual machine This avoids having filp->f_op and the corresponding inode->i_fop different, which is a little unorthodox. The ioctl list is split into two: global kvm ioctls and per-vm ioctls. A new ioctl, KVM_CREATE_VM, is used to create VMs and return the VM fd. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 212 +++++++++++++++++++++++++++++++++++++++---------- include/linux/kvm.h | 10 ++- 2 files changed, 180 insertions(+), 42 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c01252e84377..aa07d9c9d20d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -95,6 +96,55 @@ struct segment_descriptor_64 { #endif +static struct inode *kvmfs_inode(struct file_operations *fops) +{ + int error = -ENOMEM; + struct inode *inode = new_inode(kvmfs_mnt->mnt_sb); + + if (!inode) + goto eexit_1; + + inode->i_fop = fops; + + /* + * Mark the inode dirty from the very beginning, + * that way it will never be moved to the dirty + * list because mark_inode_dirty() will think + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; + inode->i_mode = S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + return inode; + +eexit_1: + return ERR_PTR(error); +} + +static struct file *kvmfs_file(struct inode *inode, void *private_data) +{ + struct file *file = get_empty_filp(); + + if (!file) + return ERR_PTR(-ENFILE); + + file->f_path.mnt = mntget(kvmfs_mnt); + file->f_path.dentry = d_alloc_anon(inode); + if (!file->f_path.dentry) + return ERR_PTR(-ENOMEM); + file->f_mapping = inode->i_mapping; + + file->f_pos = 0; + file->f_flags = O_RDWR; + file->f_op = inode->i_fop; + file->f_mode = FMODE_READ | FMODE_WRITE; + file->f_version = 0; + file->private_data = private_data; + return file; +} + unsigned long segment_base(u16 selector) { struct descriptor_table gdt; @@ -222,13 +272,13 @@ static void vcpu_put(struct kvm_vcpu *vcpu) mutex_unlock(&vcpu->mutex); } -static int kvm_dev_open(struct inode *inode, struct file *filp) +static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); int i; if (!kvm) - return -ENOMEM; + return ERR_PTR(-ENOMEM); spin_lock_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); @@ -244,7 +294,11 @@ static int kvm_dev_open(struct inode *inode, struct file *filp) list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); } - filp->private_data = kvm; + return kvm; +} + +static int kvm_dev_open(struct inode *inode, struct file *filp) +{ return 0; } @@ -300,14 +354,24 @@ static void kvm_free_vcpus(struct kvm *kvm) static int kvm_dev_release(struct inode *inode, struct file *filp) { - struct kvm *kvm = filp->private_data; + return 0; +} +static void kvm_destroy_vm(struct kvm *kvm) +{ spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); kfree(kvm); +} + +static int kvm_vm_release(struct inode *inode, struct file *filp) +{ + struct kvm *kvm = filp->private_data; + + kvm_destroy_vm(kvm); return 0; } @@ -1900,17 +1964,14 @@ static int kvm_dev_ioctl_debug_guest(struct kvm *kvm, return r; } -static long kvm_dev_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +static long kvm_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; int r = -EINVAL; switch (ioctl) { - case KVM_GET_API_VERSION: - r = KVM_API_VERSION; - break; case KVM_CREATE_VCPU: r = kvm_dev_ioctl_create_vcpu(kvm, arg); if (r) @@ -2052,6 +2113,107 @@ static long kvm_dev_ioctl(struct file *filp, case KVM_SET_MSRS: r = msr_io(kvm, argp, do_set_msr, 0); break; + default: + ; + } +out: + return r; +} + +static struct page *kvm_vm_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + struct kvm *kvm = vma->vm_file->private_data; + unsigned long pgoff; + struct kvm_memory_slot *slot; + struct page *page; + + *type = VM_FAULT_MINOR; + pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + slot = gfn_to_memslot(kvm, pgoff); + if (!slot) + return NOPAGE_SIGBUS; + page = gfn_to_page(slot, pgoff); + if (!page) + return NOPAGE_SIGBUS; + get_page(page); + return page; +} + +static struct vm_operations_struct kvm_vm_vm_ops = { + .nopage = kvm_vm_nopage, +}; + +static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &kvm_vm_vm_ops; + return 0; +} + +static struct file_operations kvm_vm_fops = { + .release = kvm_vm_release, + .unlocked_ioctl = kvm_vm_ioctl, + .compat_ioctl = kvm_vm_ioctl, + .mmap = kvm_vm_mmap, +}; + +static int kvm_dev_ioctl_create_vm(void) +{ + int fd, r; + struct inode *inode; + struct file *file; + struct kvm *kvm; + + inode = kvmfs_inode(&kvm_vm_fops); + if (IS_ERR(inode)) { + r = PTR_ERR(inode); + goto out1; + } + + kvm = kvm_create_vm(); + if (IS_ERR(kvm)) { + r = PTR_ERR(kvm); + goto out2; + } + + file = kvmfs_file(inode, kvm); + if (IS_ERR(file)) { + r = PTR_ERR(file); + goto out3; + } + + r = get_unused_fd(); + if (r < 0) + goto out4; + fd = r; + fd_install(fd, file); + + return fd; + +out4: + fput(file); +out3: + kvm_destroy_vm(kvm); +out2: + iput(inode); +out1: + return r; +} + +static long kvm_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int r = -EINVAL; + + switch (ioctl) { + case KVM_GET_API_VERSION: + r = KVM_API_VERSION; + break; + case KVM_CREATE_VM: + r = kvm_dev_ioctl_create_vm(); + break; case KVM_GET_MSR_INDEX_LIST: { struct kvm_msr_list __user *user_msr_list = argp; struct kvm_msr_list msr_list; @@ -2086,43 +2248,11 @@ out: return r; } -static struct page *kvm_dev_nopage(struct vm_area_struct *vma, - unsigned long address, - int *type) -{ - struct kvm *kvm = vma->vm_file->private_data; - unsigned long pgoff; - struct kvm_memory_slot *slot; - struct page *page; - - *type = VM_FAULT_MINOR; - pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - slot = gfn_to_memslot(kvm, pgoff); - if (!slot) - return NOPAGE_SIGBUS; - page = gfn_to_page(slot, pgoff); - if (!page) - return NOPAGE_SIGBUS; - get_page(page); - return page; -} - -static struct vm_operations_struct kvm_dev_vm_ops = { - .nopage = kvm_dev_nopage, -}; - -static int kvm_dev_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_dev_vm_ops; - return 0; -} - static struct file_operations kvm_chardev_ops = { .open = kvm_dev_open, .release = kvm_dev_release, .unlocked_ioctl = kvm_dev_ioctl, .compat_ioctl = kvm_dev_ioctl, - .mmap = kvm_dev_mmap, }; static struct miscdevice kvm_dev = { diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f3604593fb76..d6e6635dbec1 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -224,7 +224,16 @@ struct kvm_dirty_log { #define KVMIO 0xAE +/* + * ioctls for /dev/kvm fds: + */ #define KVM_GET_API_VERSION _IO(KVMIO, 1) +#define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */ +#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) + +/* + * ioctls for VM fds + */ #define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) #define KVM_GET_REGS _IOWR(KVMIO, 3, struct kvm_regs) #define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) @@ -238,6 +247,5 @@ struct kvm_dirty_log { #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) #define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) #define KVM_SET_MSRS _IOWR(KVMIO, 14, struct kvm_msrs) -#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) #endif -- cgit From 2c6f5df9793e6f928fc763af3fb535a5e28a1f8a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Feb 2007 18:27:58 +0200 Subject: KVM: Rename some kvm_dev_ioctl_*() functions to kvm_vm_ioctl_*() This reflects the changed scope, from device-wide to single vm (previously every device open created a virtual machine). Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index aa07d9c9d20d..981f5d3cfd94 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -601,7 +601,7 @@ EXPORT_SYMBOL_GPL(fx_init); /* * Creates some virtual cpus. Good luck creating more than one. */ -static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n) +static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) { int r; struct kvm_vcpu *vcpu; @@ -655,8 +655,8 @@ out: * * Discontiguous memory is allowed, mostly for framebuffers. */ -static int kvm_dev_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_memory_region *mem) +static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, + struct kvm_memory_region *mem) { int r; gfn_t base_gfn; @@ -804,8 +804,8 @@ static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) /* * Get (and clear) the dirty memory log for a memory slot. */ -static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log) +static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) { struct kvm_memory_slot *memslot; int r, i; @@ -1535,7 +1535,7 @@ void save_msrs(struct vmx_msr_entry *e, int n) } EXPORT_SYMBOL_GPL(save_msrs); -static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run) +static int kvm_vm_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run) { struct kvm_vcpu *vcpu; int r; @@ -1568,7 +1568,7 @@ static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run) return r; } -static int kvm_dev_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs) +static int kvm_vm_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs) { struct kvm_vcpu *vcpu; @@ -1614,7 +1614,7 @@ static int kvm_dev_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs) return 0; } -static int kvm_dev_ioctl_set_regs(struct kvm *kvm, struct kvm_regs *regs) +static int kvm_vm_ioctl_set_regs(struct kvm *kvm, struct kvm_regs *regs) { struct kvm_vcpu *vcpu; @@ -1660,7 +1660,7 @@ static void get_segment(struct kvm_vcpu *vcpu, return kvm_arch_ops->get_segment(vcpu, var, seg); } -static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs) +static int kvm_vm_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs) { struct kvm_vcpu *vcpu; struct descriptor_table dt; @@ -1711,7 +1711,7 @@ static void set_segment(struct kvm_vcpu *vcpu, return kvm_arch_ops->set_segment(vcpu, var, seg); } -static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs) +static int kvm_vm_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs) { struct kvm_vcpu *vcpu; int mmu_reset_needed = 0; @@ -1904,7 +1904,7 @@ out: /* * Translate a guest virtual address to a guest physical address. */ -static int kvm_dev_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr) +static int kvm_vm_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr) { unsigned long vaddr = tr->linear_address; struct kvm_vcpu *vcpu; @@ -1925,7 +1925,7 @@ static int kvm_dev_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr) return 0; } -static int kvm_dev_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq) +static int kvm_vm_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq) { struct kvm_vcpu *vcpu; @@ -1945,7 +1945,7 @@ static int kvm_dev_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq) return 0; } -static int kvm_dev_ioctl_debug_guest(struct kvm *kvm, +static int kvm_vm_ioctl_debug_guest(struct kvm *kvm, struct kvm_debug_guest *dbg) { struct kvm_vcpu *vcpu; @@ -1973,7 +1973,7 @@ static long kvm_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_VCPU: - r = kvm_dev_ioctl_create_vcpu(kvm, arg); + r = kvm_vm_ioctl_create_vcpu(kvm, arg); if (r) goto out; break; @@ -1983,7 +1983,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) goto out; - r = kvm_dev_ioctl_run(kvm, &kvm_run); + r = kvm_vm_ioctl_run(kvm, &kvm_run); if (r < 0 && r != -EINTR) goto out; if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { @@ -1998,7 +1998,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) goto out; - r = kvm_dev_ioctl_get_regs(kvm, &kvm_regs); + r = kvm_vm_ioctl_get_regs(kvm, &kvm_regs); if (r) goto out; r = -EFAULT; @@ -2013,7 +2013,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) goto out; - r = kvm_dev_ioctl_set_regs(kvm, &kvm_regs); + r = kvm_vm_ioctl_set_regs(kvm, &kvm_regs); if (r) goto out; r = 0; @@ -2025,7 +2025,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) goto out; - r = kvm_dev_ioctl_get_sregs(kvm, &kvm_sregs); + r = kvm_vm_ioctl_get_sregs(kvm, &kvm_sregs); if (r) goto out; r = -EFAULT; @@ -2040,7 +2040,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) goto out; - r = kvm_dev_ioctl_set_sregs(kvm, &kvm_sregs); + r = kvm_vm_ioctl_set_sregs(kvm, &kvm_sregs); if (r) goto out; r = 0; @@ -2052,7 +2052,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&tr, argp, sizeof tr)) goto out; - r = kvm_dev_ioctl_translate(kvm, &tr); + r = kvm_vm_ioctl_translate(kvm, &tr); if (r) goto out; r = -EFAULT; @@ -2067,7 +2067,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&irq, argp, sizeof irq)) goto out; - r = kvm_dev_ioctl_interrupt(kvm, &irq); + r = kvm_vm_ioctl_interrupt(kvm, &irq); if (r) goto out; r = 0; @@ -2079,7 +2079,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof dbg)) goto out; - r = kvm_dev_ioctl_debug_guest(kvm, &dbg); + r = kvm_vm_ioctl_debug_guest(kvm, &dbg); if (r) goto out; r = 0; @@ -2091,7 +2091,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) goto out; - r = kvm_dev_ioctl_set_memory_region(kvm, &kvm_mem); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem); if (r) goto out; break; @@ -2102,7 +2102,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&log, argp, sizeof log)) goto out; - r = kvm_dev_ioctl_get_dirty_log(kvm, &log); + r = kvm_vm_ioctl_get_dirty_log(kvm, &log); if (r) goto out; break; -- cgit From c5ea76600653b1a242321734435cb1c54778941a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Feb 2007 18:41:05 +0200 Subject: KVM: Move kvm_vm_ioctl_create_vcpu() around In preparation of some hacking. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 102 ++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 981f5d3cfd94..6fb36c80e3e8 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -598,57 +598,6 @@ void fx_init(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(fx_init); -/* - * Creates some virtual cpus. Good luck creating more than one. - */ -static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) -{ - int r; - struct kvm_vcpu *vcpu; - - r = -EINVAL; - if (!valid_vcpu(n)) - goto out; - - vcpu = &kvm->vcpus[n]; - - mutex_lock(&vcpu->mutex); - - if (vcpu->vmcs) { - mutex_unlock(&vcpu->mutex); - return -EEXIST; - } - - vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, - FX_IMAGE_ALIGN); - vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; - - r = kvm_arch_ops->vcpu_create(vcpu); - if (r < 0) - goto out_free_vcpus; - - r = kvm_mmu_create(vcpu); - if (r < 0) - goto out_free_vcpus; - - kvm_arch_ops->vcpu_load(vcpu); - r = kvm_mmu_setup(vcpu); - if (r >= 0) - r = kvm_arch_ops->vcpu_setup(vcpu); - vcpu_put(vcpu); - - if (r < 0) - goto out_free_vcpus; - - return 0; - -out_free_vcpus: - kvm_free_vcpu(vcpu); - mutex_unlock(&vcpu->mutex); -out: - return r; -} - /* * Allocate some memory and give it an address in the guest physical address * space. @@ -1964,6 +1913,57 @@ static int kvm_vm_ioctl_debug_guest(struct kvm *kvm, return r; } +/* + * Creates some virtual cpus. Good luck creating more than one. + */ +static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) +{ + int r; + struct kvm_vcpu *vcpu; + + r = -EINVAL; + if (!valid_vcpu(n)) + goto out; + + vcpu = &kvm->vcpus[n]; + + mutex_lock(&vcpu->mutex); + + if (vcpu->vmcs) { + mutex_unlock(&vcpu->mutex); + return -EEXIST; + } + + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, + FX_IMAGE_ALIGN); + vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; + + r = kvm_arch_ops->vcpu_create(vcpu); + if (r < 0) + goto out_free_vcpus; + + r = kvm_mmu_create(vcpu); + if (r < 0) + goto out_free_vcpus; + + kvm_arch_ops->vcpu_load(vcpu); + r = kvm_mmu_setup(vcpu); + if (r >= 0) + r = kvm_arch_ops->vcpu_setup(vcpu); + vcpu_put(vcpu); + + if (r < 0) + goto out_free_vcpus; + + return 0; + +out_free_vcpus: + kvm_free_vcpu(vcpu); + mutex_unlock(&vcpu->mutex); +out: + return r; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit From bccf2150fe62dda5fb09efa2f64d2a234694eb48 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 21 Feb 2007 18:04:26 +0200 Subject: KVM: Per-vcpu inodes Allocate a distinct inode for every vcpu in a VM. This has the following benefits: - the filp cachelines are no longer bounced when f_count is incremented on every ioctl() - the API and internal code are distinctly clearer; for example, on the KVM_GET_REGS ioctl, there is no need to copy the vcpu number from userspace and then copy the registers back; the vcpu identity is derived from the fd used to make the call Right now the performance benefits are completely theoretical since (a) we don't support more than one vcpu per VM and (b) virtualization hardware inefficiencies completely everwhelm any cacheline bouncing effects. But both of these will change, and we need to prepare the API today. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 3 +- drivers/kvm/kvm_main.c | 263 ++++++++++++++++++++++++++++--------------------- drivers/kvm/svm.c | 3 +- drivers/kvm/vmx.c | 3 +- include/linux/kvm.h | 38 ++++--- 5 files changed, 170 insertions(+), 140 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 41cc27de4d66..0d122bf889db 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -309,6 +309,7 @@ struct kvm { int busy; unsigned long rmap_overflow; struct list_head vm_list; + struct file *filp; }; struct kvm_stat { @@ -343,7 +344,7 @@ struct kvm_arch_ops { int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); - struct kvm_vcpu *(*vcpu_load)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*vcpu_decache)(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 6fb36c80e3e8..a593d092d85b 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -96,6 +96,9 @@ struct segment_descriptor_64 { #endif +static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, + unsigned long arg); + static struct inode *kvmfs_inode(struct file_operations *fops) { int error = -ENOMEM; @@ -246,24 +249,30 @@ int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, } EXPORT_SYMBOL_GPL(kvm_write_guest); -static int vcpu_slot(struct kvm_vcpu *vcpu) +/* + * Switches to specified vcpu, until a matching vcpu_put() + */ +static void vcpu_load(struct kvm_vcpu *vcpu) { - return vcpu - vcpu->kvm->vcpus; + mutex_lock(&vcpu->mutex); + kvm_arch_ops->vcpu_load(vcpu); } /* - * Switches to specified vcpu, until a matching vcpu_put() + * Switches to specified vcpu, until a matching vcpu_put(). Will return NULL + * if the slot is not populated. */ -static struct kvm_vcpu *vcpu_load(struct kvm *kvm, int vcpu_slot) +static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot) { - struct kvm_vcpu *vcpu = &kvm->vcpus[vcpu_slot]; + struct kvm_vcpu *vcpu = &kvm->vcpus[slot]; mutex_lock(&vcpu->mutex); - if (unlikely(!vcpu->vmcs)) { + if (!vcpu->vmcs) { mutex_unlock(&vcpu->mutex); return NULL; } - return kvm_arch_ops->vcpu_load(vcpu); + kvm_arch_ops->vcpu_load(vcpu); + return vcpu; } static void vcpu_put(struct kvm_vcpu *vcpu) @@ -336,9 +345,10 @@ static void kvm_free_physmem(struct kvm *kvm) static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { - if (!vcpu_load(vcpu->kvm, vcpu_slot(vcpu))) + if (!vcpu->vmcs) return; + vcpu_load(vcpu); kvm_mmu_destroy(vcpu); vcpu_put(vcpu); kvm_arch_ops->vcpu_free(vcpu); @@ -725,7 +735,7 @@ raced: for (i = 0; i < KVM_MAX_VCPUS; ++i) { struct kvm_vcpu *vcpu; - vcpu = vcpu_load(kvm, i); + vcpu = vcpu_load_slot(kvm, i); if (!vcpu) continue; kvm_mmu_reset_context(vcpu); @@ -791,8 +801,9 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (any) { cleared = 0; for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu = vcpu_load(kvm, i); + struct kvm_vcpu *vcpu; + vcpu = vcpu_load_slot(kvm, i); if (!vcpu) continue; if (!cleared) { @@ -1461,8 +1472,7 @@ void kvm_resched(struct kvm_vcpu *vcpu) { vcpu_put(vcpu); cond_resched(); - /* Cannot fail - no vcpu unplug yet. */ - vcpu_load(vcpu->kvm, vcpu_slot(vcpu)); + vcpu_load(vcpu); } EXPORT_SYMBOL_GPL(kvm_resched); @@ -1484,17 +1494,11 @@ void save_msrs(struct vmx_msr_entry *e, int n) } EXPORT_SYMBOL_GPL(save_msrs); -static int kvm_vm_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run) +static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct kvm_vcpu *vcpu; int r; - if (!valid_vcpu(kvm_run->vcpu)) - return -EINVAL; - - vcpu = vcpu_load(kvm, kvm_run->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); /* re-sync apic's tpr */ vcpu->cr8 = kvm_run->cr8; @@ -1517,16 +1521,10 @@ static int kvm_vm_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run) return r; } -static int kvm_vm_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs) +static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, + struct kvm_regs *regs) { - struct kvm_vcpu *vcpu; - - if (!valid_vcpu(regs->vcpu)) - return -EINVAL; - - vcpu = vcpu_load(kvm, regs->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); kvm_arch_ops->cache_regs(vcpu); @@ -1563,16 +1561,10 @@ static int kvm_vm_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs) return 0; } -static int kvm_vm_ioctl_set_regs(struct kvm *kvm, struct kvm_regs *regs) +static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, + struct kvm_regs *regs) { - struct kvm_vcpu *vcpu; - - if (!valid_vcpu(regs->vcpu)) - return -EINVAL; - - vcpu = vcpu_load(kvm, regs->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); vcpu->regs[VCPU_REGS_RAX] = regs->rax; vcpu->regs[VCPU_REGS_RBX] = regs->rbx; @@ -1609,16 +1601,12 @@ static void get_segment(struct kvm_vcpu *vcpu, return kvm_arch_ops->get_segment(vcpu, var, seg); } -static int kvm_vm_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs) +static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { - struct kvm_vcpu *vcpu; struct descriptor_table dt; - if (!valid_vcpu(sregs->vcpu)) - return -EINVAL; - vcpu = vcpu_load(kvm, sregs->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); @@ -1660,18 +1648,14 @@ static void set_segment(struct kvm_vcpu *vcpu, return kvm_arch_ops->set_segment(vcpu, var, seg); } -static int kvm_vm_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs) +static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { - struct kvm_vcpu *vcpu; int mmu_reset_needed = 0; int i; struct descriptor_table dt; - if (!valid_vcpu(sregs->vcpu)) - return -EINVAL; - vcpu = vcpu_load(kvm, sregs->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); @@ -1777,20 +1761,14 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) * * @return number of msrs set successfully. */ -static int __msr_io(struct kvm *kvm, struct kvm_msrs *msrs, +static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, struct kvm_msr_entry *entries, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { - struct kvm_vcpu *vcpu; int i; - if (!valid_vcpu(msrs->vcpu)) - return -EINVAL; - - vcpu = vcpu_load(kvm, msrs->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) @@ -1806,7 +1784,7 @@ static int __msr_io(struct kvm *kvm, struct kvm_msrs *msrs, * * @return number of msrs set successfully. */ -static int msr_io(struct kvm *kvm, struct kvm_msrs __user *user_msrs, +static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data), int writeback) @@ -1834,7 +1812,7 @@ static int msr_io(struct kvm *kvm, struct kvm_msrs __user *user_msrs, if (copy_from_user(entries, user_msrs->entries, size)) goto out_free; - r = n = __msr_io(kvm, &msrs, entries, do_msr); + r = n = __msr_io(vcpu, &msrs, entries, do_msr); if (r < 0) goto out_free; @@ -1853,38 +1831,31 @@ out: /* * Translate a guest virtual address to a guest physical address. */ -static int kvm_vm_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr) +static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) { unsigned long vaddr = tr->linear_address; - struct kvm_vcpu *vcpu; gpa_t gpa; - vcpu = vcpu_load(kvm, tr->vcpu); - if (!vcpu) - return -ENOENT; - spin_lock(&kvm->lock); + vcpu_load(vcpu); + spin_lock(&vcpu->kvm->lock); gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; tr->usermode = 0; - spin_unlock(&kvm->lock); + spin_unlock(&vcpu->kvm->lock); vcpu_put(vcpu); return 0; } -static int kvm_vm_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq) +static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) { - struct kvm_vcpu *vcpu; - - if (!valid_vcpu(irq->vcpu)) - return -EINVAL; if (irq->irq < 0 || irq->irq >= 256) return -EINVAL; - vcpu = vcpu_load(kvm, irq->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); set_bit(irq->irq, vcpu->irq_pending); set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary); @@ -1894,17 +1865,12 @@ static int kvm_vm_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq) return 0; } -static int kvm_vm_ioctl_debug_guest(struct kvm *kvm, - struct kvm_debug_guest *dbg) +static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) { - struct kvm_vcpu *vcpu; int r; - if (!valid_vcpu(dbg->vcpu)) - return -EINVAL; - vcpu = vcpu_load(kvm, dbg->vcpu); - if (!vcpu) - return -ENOENT; + vcpu_load(vcpu); r = kvm_arch_ops->set_guest_debug(vcpu, dbg); @@ -1913,6 +1879,59 @@ static int kvm_vm_ioctl_debug_guest(struct kvm *kvm, return r; } +static int kvm_vcpu_release(struct inode *inode, struct file *filp) +{ + struct kvm_vcpu *vcpu = filp->private_data; + + fput(vcpu->kvm->filp); + return 0; +} + +static struct file_operations kvm_vcpu_fops = { + .release = kvm_vcpu_release, + .unlocked_ioctl = kvm_vcpu_ioctl, + .compat_ioctl = kvm_vcpu_ioctl, +}; + +/* + * Allocates an inode for the vcpu. + */ +static int create_vcpu_fd(struct kvm_vcpu *vcpu) +{ + int fd, r; + struct inode *inode; + struct file *file; + + atomic_inc(&vcpu->kvm->filp->f_count); + inode = kvmfs_inode(&kvm_vcpu_fops); + if (IS_ERR(inode)) { + r = PTR_ERR(inode); + goto out1; + } + + file = kvmfs_file(inode, vcpu); + if (IS_ERR(file)) { + r = PTR_ERR(file); + goto out2; + } + + r = get_unused_fd(); + if (r < 0) + goto out3; + fd = r; + fd_install(fd, file); + + return fd; + +out3: + fput(file); +out2: + iput(inode); +out1: + fput(vcpu->kvm->filp); + return r; +} + /* * Creates some virtual cpus. Good luck creating more than one. */ @@ -1955,7 +1974,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) if (r < 0) goto out_free_vcpus; - return 0; + r = create_vcpu_fd(vcpu); + if (r < 0) + goto out_free_vcpus; + + return r; out_free_vcpus: kvm_free_vcpu(vcpu); @@ -1964,26 +1987,21 @@ out: return r; } -static long kvm_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +static long kvm_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) { - struct kvm *kvm = filp->private_data; + struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; int r = -EINVAL; switch (ioctl) { - case KVM_CREATE_VCPU: - r = kvm_vm_ioctl_create_vcpu(kvm, arg); - if (r) - goto out; - break; case KVM_RUN: { struct kvm_run kvm_run; r = -EFAULT; if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) goto out; - r = kvm_vm_ioctl_run(kvm, &kvm_run); + r = kvm_vcpu_ioctl_run(vcpu, &kvm_run); if (r < 0 && r != -EINTR) goto out; if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { @@ -1995,10 +2013,8 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_GET_REGS: { struct kvm_regs kvm_regs; - r = -EFAULT; - if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) - goto out; - r = kvm_vm_ioctl_get_regs(kvm, &kvm_regs); + memset(&kvm_regs, 0, sizeof kvm_regs); + r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs); if (r) goto out; r = -EFAULT; @@ -2013,7 +2029,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) goto out; - r = kvm_vm_ioctl_set_regs(kvm, &kvm_regs); + r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs); if (r) goto out; r = 0; @@ -2022,10 +2038,8 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_GET_SREGS: { struct kvm_sregs kvm_sregs; - r = -EFAULT; - if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) - goto out; - r = kvm_vm_ioctl_get_sregs(kvm, &kvm_sregs); + memset(&kvm_sregs, 0, sizeof kvm_sregs); + r = kvm_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs); if (r) goto out; r = -EFAULT; @@ -2040,7 +2054,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) goto out; - r = kvm_vm_ioctl_set_sregs(kvm, &kvm_sregs); + r = kvm_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs); if (r) goto out; r = 0; @@ -2052,7 +2066,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&tr, argp, sizeof tr)) goto out; - r = kvm_vm_ioctl_translate(kvm, &tr); + r = kvm_vcpu_ioctl_translate(vcpu, &tr); if (r) goto out; r = -EFAULT; @@ -2067,7 +2081,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&irq, argp, sizeof irq)) goto out; - r = kvm_vm_ioctl_interrupt(kvm, &irq); + r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); if (r) goto out; r = 0; @@ -2079,12 +2093,38 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof dbg)) goto out; - r = kvm_vm_ioctl_debug_guest(kvm, &dbg); + r = kvm_vcpu_ioctl_debug_guest(vcpu, &dbg); if (r) goto out; r = 0; break; } + case KVM_GET_MSRS: + r = msr_io(vcpu, argp, get_msr, 1); + break; + case KVM_SET_MSRS: + r = msr_io(vcpu, argp, do_set_msr, 0); + break; + default: + ; + } +out: + return r; +} + +static long kvm_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r = -EINVAL; + + switch (ioctl) { + case KVM_CREATE_VCPU: + r = kvm_vm_ioctl_create_vcpu(kvm, arg); + if (r < 0) + goto out; + break; case KVM_SET_MEMORY_REGION: { struct kvm_memory_region kvm_mem; @@ -2107,12 +2147,6 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } - case KVM_GET_MSRS: - r = msr_io(kvm, argp, get_msr, 1); - break; - case KVM_SET_MSRS: - r = msr_io(kvm, argp, do_set_msr, 0); - break; default: ; } @@ -2182,6 +2216,7 @@ static int kvm_dev_ioctl_create_vm(void) r = PTR_ERR(file); goto out3; } + kvm->filp = file; r = get_unused_fd(); if (r < 0) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 5a200c0b4b48..3d8ea7ac2ecc 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -600,10 +600,9 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) kfree(vcpu->svm); } -static struct kvm_vcpu *svm_vcpu_load(struct kvm_vcpu *vcpu) +static void svm_vcpu_load(struct kvm_vcpu *vcpu) { get_cpu(); - return vcpu; } static void svm_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index ff956a6302ec..c07178e61122 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -204,7 +204,7 @@ static void vmcs_write64(unsigned long field, u64 value) * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */ -static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu) +static void vmx_vcpu_load(struct kvm_vcpu *vcpu) { u64 phys_addr = __pa(vcpu->vmcs); int cpu; @@ -242,7 +242,6 @@ static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ } - return vcpu; } static void vmx_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d6e6635dbec1..7c9a4004af44 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -52,11 +52,10 @@ enum kvm_exit_reason { /* for KVM_RUN */ struct kvm_run { /* in */ - __u32 vcpu; __u32 emulated; /* skip current instruction */ __u32 mmio_completed; /* mmio request completed */ __u8 request_interrupt_window; - __u8 padding1[3]; + __u8 padding1[7]; /* out */ __u32 exit_type; @@ -111,10 +110,6 @@ struct kvm_run { /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { - /* in */ - __u32 vcpu; - __u32 padding; - /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ __u64 rax, rbx, rcx, rdx; __u64 rsi, rdi, rsp, rbp; @@ -141,10 +136,6 @@ struct kvm_dtable { /* for KVM_GET_SREGS and KVM_SET_SREGS */ struct kvm_sregs { - /* in */ - __u32 vcpu; - __u32 padding; - /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */ struct kvm_segment cs, ds, es, fs, gs, ss; struct kvm_segment tr, ldt; @@ -163,8 +154,8 @@ struct kvm_msr_entry { /* for KVM_GET_MSRS and KVM_SET_MSRS */ struct kvm_msrs { - __u32 vcpu; __u32 nmsrs; /* number of msrs in entries */ + __u32 pad; struct kvm_msr_entry entries[0]; }; @@ -179,8 +170,6 @@ struct kvm_msr_list { struct kvm_translation { /* in */ __u64 linear_address; - __u32 vcpu; - __u32 padding; /* out */ __u64 physical_address; @@ -193,7 +182,6 @@ struct kvm_translation { /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ - __u32 vcpu; __u32 irq; }; @@ -206,8 +194,8 @@ struct kvm_breakpoint { /* for KVM_DEBUG_GUEST */ struct kvm_debug_guest { /* int */ - __u32 vcpu; __u32 enabled; + __u32 pad; struct kvm_breakpoint breakpoints[4]; __u32 singlestep; }; @@ -234,18 +222,26 @@ struct kvm_dirty_log { /* * ioctls for VM fds */ +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) +/* + * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns + * a vcpu fd. + */ +#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) + +/* + * ioctls for vcpu fds + */ #define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) -#define KVM_GET_REGS _IOWR(KVMIO, 3, struct kvm_regs) +#define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs) #define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) -#define KVM_GET_SREGS _IOWR(KVMIO, 5, struct kvm_sregs) +#define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs) #define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) #define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) -#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int /* vcpu_slot */) -#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) #define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) -#define KVM_SET_MSRS _IOWR(KVMIO, 14, struct kvm_msrs) +#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) #endif -- cgit From ab51a434c5816e1ca3f033791c1cc5c6594998ec Mon Sep 17 00:00:00 2001 From: Uri Lublin Date: Wed, 21 Feb 2007 18:25:21 +0200 Subject: KVM: Add missing calls to mark_page_dirty() A few places where we modify guest memory fail to call mark_page_dirty(), causing live migration to fail. This adds the missing calls. Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index a593d092d85b..edff4055b32b 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -228,12 +228,15 @@ int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, unsigned now; unsigned offset; hva_t guest_buf; + gfn_t gfn; paddr = gva_to_hpa(vcpu, addr); if (is_error_hpa(paddr)) break; + gfn = vcpu->mmu.gva_to_gpa(vcpu, addr) >> PAGE_SHIFT; + mark_page_dirty(vcpu->kvm, gfn); guest_buf = (hva_t)kmap_atomic( pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0); offset = addr & ~PAGE_MASK; @@ -953,6 +956,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, return 0; page = gfn_to_page(m, gpa >> PAGE_SHIFT); kvm_mmu_pre_write(vcpu, gpa, bytes); + mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); memcpy(virt + offset_in_page(gpa), &val, bytes); kunmap_atomic(virt, KM_USER0); @@ -1294,6 +1298,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) if (is_error_hpa(para_state_hpa)) goto err_gp; + mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); para_state = kmap_atomic(para_state_page, KM_USER0); @@ -1323,6 +1328,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) vcpu->para_state_gpa = para_state_gpa; vcpu->hypercall_gpa = hypercall_gpa; + mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), KM_USER1) + (hypercall_hpa & ~PAGE_MASK); kvm_arch_ops->patch_hypercall(vcpu, hypercall); -- cgit From cd1a4a982a78e793125db2f386e91dc7c89c9ed1 Mon Sep 17 00:00:00 2001 From: Uri Lublin Date: Thu, 22 Feb 2007 16:43:09 +0200 Subject: KVM: Fix dirty page log bitmap size/access calculation Since dirty_bitmap is an unsigned long array, the alignment and size need to take that into account. Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index edff4055b32b..e7108105c50b 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -792,9 +792,9 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, 8) / 8; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; - for (i = 0; !any && i < n; ++i) + for (i = 0; !any && i < n/sizeof(long); ++i) any = memslot->dirty_bitmap[i]; r = -EFAULT; -- cgit From 02b27c1f802bfb60cc2cb5b763dde1b6b3479a7e Mon Sep 17 00:00:00 2001 From: Uri Lublin Date: Thu, 22 Feb 2007 17:15:33 +0200 Subject: kvm: move do_remove_write_access() up To be called from kvm_vm_ioctl_set_memory_region() Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index e7108105c50b..be7694db285d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -611,6 +611,13 @@ void fx_init(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(fx_init); +static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) +{ + spin_lock(&vcpu->kvm->lock); + kvm_mmu_slot_remove_write_access(vcpu, slot); + spin_unlock(&vcpu->kvm->lock); +} + /* * Allocate some memory and give it an address in the guest physical address * space. @@ -756,13 +763,6 @@ out: return r; } -static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) -{ - spin_lock(&vcpu->kvm->lock); - kvm_mmu_slot_remove_write_access(vcpu, slot); - spin_unlock(&vcpu->kvm->lock); -} - /* * Get (and clear) the dirty memory log for a memory slot. */ -- cgit From ff990d5952712c2e163b355946c39278da8407a8 Mon Sep 17 00:00:00 2001 From: Uri Lublin Date: Thu, 22 Feb 2007 17:37:32 +0200 Subject: KVM: Remove write access permissions when dirty-page-logging is enabled Enabling dirty page logging is done using KVM_SET_MEMORY_REGION ioctl. If the memory region already exists, we need to remove write accesses, so writes will be caught, and dirty pages will be logged. Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index be7694db285d..e48b4d7a350e 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -748,6 +748,8 @@ raced: vcpu = vcpu_load_slot(kvm, i); if (!vcpu) continue; + if (new.flags & KVM_MEM_LOG_DIRTY_PAGES) + do_remove_write_access(vcpu, mem->slot); kvm_mmu_reset_context(vcpu); vcpu_put(vcpu); } -- cgit From 58e690e6fd47a682b49aed3510443d6797a03021 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 26 Feb 2007 16:29:43 +0200 Subject: KVM: Fix bogus failure in kvm.ko module initialization A bogus 'return r' can cause an otherwise successful module load to fail. This both denies users the use of kvm, and it also denies them the use of their machine, as it leaves a filesystem registered with its callbacks pointing into now-freed module memory. Fix by returning a zero like a good module. Thanks to Richard Lucassen (?) for reporting the problem and for providing access to a machine which exhibited it. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index e48b4d7a350e..ca82ba359e1a 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2540,7 +2540,7 @@ static __init int kvm_init(void) bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT; memset(__va(bad_page_address), 0, PAGE_SIZE); - return r; + return 0; out: kvm_exit_debug(); -- cgit From e9cdb1e330d805f4453c1359cebe2bd6a06ce692 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 1 Mar 2007 11:28:13 +0200 Subject: KVM: Move kvmfs magic number to Use the standard magic.h for kvmfs. Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- include/linux/magic.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ca82ba359e1a..a163bca38973 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -75,7 +76,6 @@ static struct kvm_stats_debugfs_item { static struct dentry *debugfs_dir; -#define KVMFS_MAGIC 0x19700426 struct vfsmount *kvmfs_mnt; #define MAX_IO_MSRS 256 @@ -2433,7 +2433,7 @@ hpa_t bad_page_address; static int kvmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_MAGIC, mnt); + return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt); } static struct file_system_type kvm_fs_type = { diff --git a/include/linux/magic.h b/include/linux/magic.h index b32c8a97fcec..a9c6567fe70c 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -13,6 +13,7 @@ #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 +#define KVMFS_SUPER_MAGIC 0x19700426 #define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ #define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ -- cgit From ca45aaae1ef98890ac4e3ee48d65aa22401fd1dc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 1 Mar 2007 19:21:03 +0200 Subject: KVM: Unset kvm_arch_ops if arch module loading failed Otherwise, the core module thinks the arch module is loaded, and won't let you reload it after you've fixed the bug. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index a163bca38973..dc7a8c78cbf9 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2464,7 +2464,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) r = kvm_arch_ops->hardware_setup(); if (r < 0) - return r; + goto out; on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); r = register_cpu_notifier(&kvm_cpu_notifier); @@ -2500,6 +2500,8 @@ out_free_2: out_free_1: on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); +out: + kvm_arch_ops = NULL; return r; } -- cgit