diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 165 |
1 files changed, 108 insertions, 57 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 884e5b3838c7..2a20ce60152e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2957,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu) struct kvm_host_map map; struct kvm_steal_time *st; + if (kvm_xen_msr_enabled(vcpu->kvm)) { + kvm_xen_runstate_set_running(vcpu); + return; + } + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -3756,11 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: r = 1; break; +#ifdef CONFIG_KVM_XEN case KVM_CAP_XEN_HVM: r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO; + if (sched_info_on()) + r |= KVM_XEN_HVM_CONFIG_RUNSTATE; break; +#endif case KVM_CAP_SYNC_REGS: r = KVM_SYNC_X86_VALID_FIELDS; break; @@ -4038,7 +4047,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); - kvm_steal_time_set_preempted(vcpu); + if (kvm_xen_msr_enabled(vcpu->kvm)) + kvm_xen_runstate_set_preempted(vcpu); + else + kvm_steal_time_set_preempted(vcpu); + static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); /* @@ -5013,6 +5026,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_SUPPORTED_HV_CPUID: r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp); break; +#ifdef CONFIG_KVM_XEN case KVM_XEN_VCPU_GET_ATTR: { struct kvm_xen_vcpu_attr xva; @@ -5033,6 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_xen_vcpu_set_attr(vcpu, &xva); break; } +#endif default: r = -EINVAL; } @@ -5215,10 +5230,18 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { + /* - * Flush potentially hardware-cached dirty pages to dirty_bitmap. + * Flush all CPUs' dirty log buffers to the dirty_bitmap. Called + * before reporting dirty_bitmap to userspace. KVM flushes the buffers + * on all VM-Exits, thus we only need to kick running vCPUs to force a + * VM-Exit. */ - static_call_cond(kvm_x86_flush_log_dirty)(kvm); + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); } int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, @@ -5646,6 +5669,7 @@ set_pit2_out: kvm->arch.bsp_vcpu_id = arg; mutex_unlock(&kvm->lock); break; +#ifdef CONFIG_KVM_XEN case KVM_XEN_HVM_CONFIG: { struct kvm_xen_hvm_config xhc; r = -EFAULT; @@ -5674,6 +5698,7 @@ set_pit2_out: r = kvm_xen_hvm_set_attr(kvm, &xha); break; } +#endif case KVM_SET_CLOCK: { struct kvm_clock_data user_ns; u64 now_ns; @@ -8032,7 +8057,10 @@ void kvm_arch_exit(void) kvm_mmu_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_fpu_cache); +#ifdef CONFIG_KVM_XEN + static_key_deferred_flush(&kvm_xen_enabled); WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); +#endif } static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) @@ -8980,6 +9008,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_check_async_pf_completion(vcpu); if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) static_call(kvm_x86_msr_filter_changed)(vcpu); + + if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) + static_call(kvm_x86_update_cpu_dirty_logging)(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win || @@ -10748,75 +10779,96 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return 0; } + +static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable) +{ + struct kvm_arch *ka = &kvm->arch; + + if (!kvm_x86_ops.cpu_dirty_log_size) + return; + + if ((enable && ++ka->cpu_dirty_logging_count == 1) || + (!enable && --ka->cpu_dirty_logging_count == 0)) + kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING); + + WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0); +} + static void kvm_mmu_slot_apply_flags(struct kvm *kvm, struct kvm_memory_slot *old, struct kvm_memory_slot *new, enum kvm_mr_change change) { + bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES; + /* - * Nothing to do for RO slots or CREATE/MOVE/DELETE of a slot. - * See comments below. + * Update CPU dirty logging if dirty logging is being toggled. This + * applies to all operations. */ - if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY)) - return; + if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES) + kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages); /* - * Dirty logging tracks sptes in 4k granularity, meaning that large - * sptes have to be split. If live migration is successful, the guest - * in the source machine will be destroyed and large sptes will be - * created in the destination. However, if the guest continues to run - * in the source machine (for example if live migration fails), small - * sptes will remain around and cause bad performance. + * Nothing more to do for RO slots (which can't be dirtied and can't be + * made writable) or CREATE/MOVE/DELETE of a slot. * - * Scan sptes if dirty logging has been stopped, dropping those - * which can be collapsed into a single large-page spte. Later - * page faults will create the large-page sptes. - * - * There is no need to do this in any of the following cases: + * For a memslot with dirty logging disabled: * CREATE: No dirty mappings will already exist. * MOVE/DELETE: The old mappings will already have been cleaned up by * kvm_arch_flush_shadow_memslot() + * + * For a memslot with dirty logging enabled: + * CREATE: No shadow pages exist, thus nothing to write-protect + * and no dirty bits to clear. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot(). */ - if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) && - !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) - kvm_mmu_zap_collapsible_sptes(kvm, new); + if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY)) + return; /* - * Enable or disable dirty logging for the slot. - * - * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of the old - * slot have been zapped so no dirty logging updates are needed for - * the old slot. - * For KVM_MR_CREATE and KVM_MR_MOVE, once the new slot is visible - * any mappings that might be created in it will consume the - * properties of the new slot and do not need to be updated here. - * - * When PML is enabled, the kvm_x86_ops dirty logging hooks are - * called to enable/disable dirty logging. - * - * When disabling dirty logging with PML enabled, the D-bit is set - * for sptes in the slot in order to prevent unnecessary GPA - * logging in the PML buffer (and potential PML buffer full VMEXIT). - * This guarantees leaving PML enabled for the guest's lifetime - * won't have any additional overhead from PML when the guest is - * running with dirty logging disabled. - * - * When enabling dirty logging, large sptes are write-protected - * so they can be split on first write. New large sptes cannot - * be created for this slot until the end of the logging. - * See the comments in fast_page_fault(). - * For small sptes, nothing is done if the dirty log is in the - * initial-all-set state. Otherwise, depending on whether pml - * is enabled the D-bit or the W-bit will be cleared. + * READONLY and non-flags changes were filtered out above, and the only + * other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty + * logging isn't being toggled on or off. */ - if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { - if (kvm_x86_ops.slot_enable_log_dirty) { - static_call(kvm_x86_slot_enable_log_dirty)(kvm, new); - } else { - int level = - kvm_dirty_log_manual_protect_and_init_set(kvm) ? - PG_LEVEL_2M : PG_LEVEL_4K; + if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES))) + return; + + if (!log_dirty_pages) { + /* + * Dirty logging tracks sptes in 4k granularity, meaning that + * large sptes have to be split. If live migration succeeds, + * the guest in the source machine will be destroyed and large + * sptes will be created in the destination. However, if the + * guest continues to run in the source machine (for example if + * live migration fails), small sptes will remain around and + * cause bad performance. + * + * Scan sptes if dirty logging has been stopped, dropping those + * which can be collapsed into a single large-page spte. Later + * page faults will create the large-page sptes. + */ + kvm_mmu_zap_collapsible_sptes(kvm, new); + } else { + /* By default, write-protect everything to log writes. */ + int level = PG_LEVEL_4K; + if (kvm_x86_ops.cpu_dirty_log_size) { + /* + * Clear all dirty bits, unless pages are treated as + * dirty from the get-go. + */ + if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) + kvm_mmu_slot_leaf_clear_dirty(kvm, new); + + /* + * Write-protect large pages on write so that dirty + * logging happens at 4k granularity. No need to + * write-protect small SPTEs since write accesses are + * logged by the CPU via dirty bits. + */ + level = PG_LEVEL_2M; + } else if (kvm_dirty_log_manual_protect_and_init_set(kvm)) { /* * If we're with initial-all-set, we don't need * to write protect any small page because @@ -10825,10 +10877,9 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, * so that the page split can happen lazily on * the first write to the huge page. */ - kvm_mmu_slot_remove_write_access(kvm, new, level); + level = PG_LEVEL_2M; } - } else { - static_call_cond(kvm_x86_slot_disable_log_dirty)(kvm, new); + kvm_mmu_slot_remove_write_access(kvm, new, level); } } |