diff options
Diffstat (limited to 'tools/testing/selftests/kvm/dirty_log_test.c')
| -rw-r--r-- | tools/testing/selftests/kvm/dirty_log_test.c | 344 |
1 files changed, 324 insertions, 20 deletions
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 54da9cc20db4..471baecb7772 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -12,8 +12,13 @@ #include <unistd.h> #include <time.h> #include <pthread.h> +#include <semaphore.h> +#include <sys/types.h> +#include <signal.h> +#include <errno.h> #include <linux/bitmap.h> #include <linux/bitops.h> +#include <asm/barrier.h> #include "test_util.h" #include "kvm_util.h" @@ -57,6 +62,10 @@ # define test_and_clear_bit_le test_and_clear_bit #endif +#define TEST_DIRTY_RING_COUNT 65536 + +#define SIG_IPI SIGUSR1 + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -128,6 +137,31 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +/* Whether dirty ring reset is requested, or finished */ +static sem_t dirty_ring_vcpu_stop; +static sem_t dirty_ring_vcpu_cont; +/* + * This is updated by the vcpu thread to tell the host whether it's a + * ring-full event. It should only be read until a sem_wait() of + * dirty_ring_vcpu_stop and before vcpu continues to run. + */ +static bool dirty_ring_vcpu_ring_full; +/* + * This is only used for verifying the dirty pages. Dirty ring has a very + * tricky case when the ring just got full, kvm will do userspace exit due to + * ring full. When that happens, the very last PFN is set but actually the + * data is not changed (the guest WRITE is not really applied yet), because + * we found that the dirty ring is full, refused to continue the vcpu, and + * recorded the dirty gfn with the old contents. + * + * For this specific case, it's safe to skip checking this pfn for this + * bit, because it's a redundant bit, and when the write happens later the bit + * will be set again. We use this variable to always keep track of the latest + * dirty gfn we've collected, so that if a mismatch of data found later in the + * verifying process, we let it pass. + */ +static uint64_t dirty_ring_last_page; + enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ LOG_MODE_DIRTY_LOG = 0, @@ -135,6 +169,9 @@ enum log_mode_t { /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ LOG_MODE_CLEAR_LOG = 1, + /* Use dirty ring for logging */ + LOG_MODE_DIRTY_RING = 2, + LOG_MODE_NUM, /* Run all supported modes */ @@ -145,6 +182,26 @@ enum log_mode_t { static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ static enum log_mode_t host_log_mode; +static pthread_t vcpu_thread; +static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; + +static void vcpu_kick(void) +{ + pthread_kill(vcpu_thread, SIG_IPI); +} + +/* + * In our test we do signal tricks, let's use a better version of + * sem_wait to avoid signal interrupts + */ +static void sem_wait_until(sem_t *sem) +{ + int ret; + + do + ret = sem_wait(sem); + while (ret == -1 && errno == EINTR); +} static bool clear_log_supported(void) { @@ -178,6 +235,152 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); } +static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), + "vcpu run failed: errno=%d", err); + + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); +} + +static bool dirty_ring_supported(void) +{ + return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING); +} + +static void dirty_ring_create_vm_done(struct kvm_vm *vm) +{ + /* + * Switch to dirty ring mode after VM creation but before any + * of the vcpu creation. + */ + vm_enable_dirty_ring(vm, test_dirty_ring_count * + sizeof(struct kvm_dirty_gfn)); +} + +static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags == KVM_DIRTY_GFN_F_DIRTY; +} + +static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = KVM_DIRTY_GFN_F_RESET; +} + +static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, + int slot, void *bitmap, + uint32_t num_pages, uint32_t *fetch_index) +{ + struct kvm_dirty_gfn *cur; + uint32_t count = 0; + + while (true) { + cur = &dirty_gfns[*fetch_index % test_dirty_ring_count]; + if (!dirty_gfn_is_dirtied(cur)) + break; + TEST_ASSERT(cur->slot == slot, "Slot number didn't match: " + "%u != %u", cur->slot, slot); + TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " + "0x%llx >= 0x%x", cur->offset, num_pages); + //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); + set_bit_le(cur->offset, bitmap); + dirty_ring_last_page = cur->offset; + dirty_gfn_set_collected(cur); + (*fetch_index)++; + count++; + } + + return count; +} + +static void dirty_ring_wait_vcpu(void) +{ + /* This makes sure that hardware PML cache flushed */ + vcpu_kick(); + sem_wait_until(&dirty_ring_vcpu_stop); +} + +static void dirty_ring_continue_vcpu(void) +{ + pr_info("Notifying vcpu to continue\n"); + sem_post(&dirty_ring_vcpu_cont); +} + +static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + /* We only have one vcpu */ + static uint32_t fetch_index = 0; + uint32_t count = 0, cleared; + bool continued_vcpu = false; + + dirty_ring_wait_vcpu(); + + if (!dirty_ring_vcpu_ring_full) { + /* + * This is not a ring-full event, it's safe to allow + * vcpu to continue + */ + dirty_ring_continue_vcpu(); + continued_vcpu = true; + } + + /* Only have one vcpu */ + count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID), + slot, bitmap, num_pages, &fetch_index); + + cleared = kvm_vm_reset_dirty_ring(vm); + + /* Cleared pages should be the same as collected */ + TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " + "with collected (%u)", cleared, count); + + if (!continued_vcpu) { + TEST_ASSERT(dirty_ring_vcpu_ring_full, + "Didn't continue vcpu even without ring full"); + dirty_ring_continue_vcpu(); + } + + pr_info("Iteration %ld collected %u pages\n", iteration, count); +} + +static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + /* A ucall-sync or ring-full event is allowed */ + if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { + /* We should allow this to continue */ + ; + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || + (ret == -1 && err == EINTR)) { + /* Update the flag first before pause */ + WRITE_ONCE(dirty_ring_vcpu_ring_full, + run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); + sem_post(&dirty_ring_vcpu_stop); + pr_info("vcpu stops because %s...\n", + dirty_ring_vcpu_ring_full ? + "dirty ring is full" : "vcpu is kicked out"); + sem_wait_until(&dirty_ring_vcpu_cont); + pr_info("vcpu continues now.\n"); + } else { + TEST_ASSERT(false, "Invalid guest sync status: " + "exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + } +} + +static void dirty_ring_before_vcpu_join(void) +{ + /* Kick another round of vcpu just to make sure it will quit */ + sem_post(&dirty_ring_vcpu_cont); +} + struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -187,16 +390,29 @@ struct log_mode { /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, void *bitmap, uint32_t num_pages); + /* Hook to call when after each vcpu run */ + void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err); + void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", .collect_dirty_pages = dirty_log_collect_dirty_pages, + .after_vcpu_run = default_after_vcpu_run, }, { .name = "clear-log", .supported = clear_log_supported, .create_vm_done = clear_log_create_vm_done, .collect_dirty_pages = clear_log_collect_dirty_pages, + .after_vcpu_run = default_after_vcpu_run, + }, + { + .name = "dirty-ring", + .supported = dirty_ring_supported, + .create_vm_done = dirty_ring_create_vm_done, + .collect_dirty_pages = dirty_ring_collect_dirty_pages, + .before_vcpu_join = dirty_ring_before_vcpu_join, + .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -247,6 +463,22 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, mode->collect_dirty_pages(vm, slot, bitmap, num_pages); } +static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->after_vcpu_run) + mode->after_vcpu_run(vm, ret, err); +} + +static void log_mode_before_vcpu_join(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->before_vcpu_join) + mode->before_vcpu_join(); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -257,29 +489,44 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size) static void *vcpu_worker(void *data) { - int ret; + int ret, vcpu_fd; struct kvm_vm *vm = data; uint64_t *guest_array; uint64_t pages_count = 0; - struct kvm_run *run; + struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) + + sizeof(sigset_t)); + sigset_t *sigset = (sigset_t *) &sigmask->sigset; + + vcpu_fd = vcpu_get_fd(vm, VCPU_ID); + + /* + * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the + * ioctl to return with -EINTR, but it is still pending and we need + * to accept it with the sigwait. + */ + sigmask->len = 8; + pthread_sigmask(0, NULL, sigset); + vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask); + sigaddset(sigset, SIG_IPI); + pthread_sigmask(SIG_BLOCK, sigset, NULL); - run = vcpu_state(vm, VCPU_ID); + sigemptyset(sigset); + sigaddset(sigset, SIG_IPI); guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); while (!READ_ONCE(host_quit)) { + /* Clear any existing kick signals */ + generate_random_array(guest_array, TEST_PAGES_PER_LOOP); + pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { - pages_count += TEST_PAGES_PER_LOOP; - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - } else { - TEST_FAIL("Invalid guest sync status: " - "exit_reason=%s\n", - exit_reason_str(run->exit_reason)); + ret = ioctl(vcpu_fd, KVM_RUN, NULL); + if (ret == -1 && errno == EINTR) { + int sig = -1; + sigwait(sigset, &sig); + assert(sig == SIG_IPI); } + log_mode_after_vcpu_run(vm, ret, errno); } pr_info("Dirtied %"PRIu64" pages\n", pages_count); @@ -292,6 +539,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) uint64_t step = vm_num_host_pages(mode, 1); uint64_t page; uint64_t *value_ptr; + uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { value_ptr = host_test_mem + page * host_page_size; @@ -306,14 +554,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) } if (test_and_clear_bit_le(page, bmap)) { + bool matched; + host_dirty_count++; + /* * If the bit is set, the value written onto * the corresponding page should be either the * previous iteration number or the current one. */ - TEST_ASSERT(*value_ptr == iteration || - *value_ptr == iteration - 1, + matched = (*value_ptr == iteration || + *value_ptr == iteration - 1); + + if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { + if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { + /* + * Short answer: this case is special + * only for dirty ring test where the + * page is the last page before a kvm + * dirty ring full in iteration N-2. + * + * Long answer: Assuming ring size R, + * one possible condition is: + * + * main thr vcpu thr + * -------- -------- + * iter=1 + * write 1 to page 0~(R-1) + * full, vmexit + * collect 0~(R-1) + * kick vcpu + * write 1 to (R-1)~(2R-2) + * full, vmexit + * iter=2 + * collect (R-1)~(2R-2) + * kick vcpu + * write 1 to (2R-2) + * (NOTE!!! "1" cached in cpu reg) + * write 2 to (2R-1)~(3R-3) + * full, vmexit + * iter=3 + * collect (2R-2)~(3R-3) + * (here if we read value on page + * "2R-2" is 1, while iter=3!!!) + * + * This however can only happen once per iteration. + */ + min_iter = iteration - 1; + continue; + } else if (page == dirty_ring_last_page) { + /* + * Please refer to comments in + * dirty_ring_last_page. + */ + continue; + } + } + + TEST_ASSERT(matched, "Set page %"PRIu64" value %"PRIu64 " incorrect (iteration=%"PRIu64")", page, *value_ptr, iteration); @@ -378,7 +676,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - pthread_t vcpu_thread; struct kvm_vm *vm; unsigned long *bmap; @@ -443,9 +740,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); -#ifdef __x86_64__ - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); -#endif ucall_init(vm, NULL); /* Export the shared variables to the guest */ @@ -476,6 +770,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Tell the vcpu thread to quit */ host_quit = true; + log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " @@ -506,6 +801,9 @@ static void help(char *name) printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); puts(""); + printf(" -c: specify dirty ring size, in number of entries\n"); + printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", + TEST_DIRTY_RING_COUNT); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", @@ -536,6 +834,9 @@ int main(int argc, char *argv[]) unsigned int mode; int opt, i, j; + sem_init(&dirty_ring_vcpu_stop, 0, 0); + sem_init(&dirty_ring_vcpu_cont, 0, 0); + #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); #endif @@ -558,8 +859,11 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { + while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { + case 'c': + test_dirty_ring_count = strtol(optarg, NULL, 10); + break; case 'i': iterations = strtol(optarg, NULL, 10); break; |